From 7d489d15ce4be5310ca60e5896df833f9b3b4088 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 16 Jul 2014 21:04:01 +0000 Subject: timekeeping: Convert timekeeping core to use timespec64s Convert the core timekeeping logic to use timespec64s. This moves the 2038 issues out of the core logic and into all of the accessor functions. Future changes will need to push the timespec64s out to all timekeeping users, but that can be done interface by interface. Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index c1825eb436ed..1b05491e10f9 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -55,15 +55,15 @@ struct timekeeper { * - wall_to_monotonic is no longer the boot time, getboottime must be * used instead. */ - struct timespec wall_to_monotonic; + struct timespec64 wall_to_monotonic; /* Offset clock monotonic -> clock realtime */ ktime_t offs_real; /* time spent in suspend */ - struct timespec total_sleep_time; + struct timespec64 total_sleep_time; /* Offset clock monotonic -> clock boottime */ ktime_t offs_boot; /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ - struct timespec raw_time; + struct timespec64 raw_time; /* The current UTC to TAI offset in seconds */ s32 tai_offset; /* Offset clock monotonic -> clock tai */ @@ -71,9 +71,9 @@ struct timekeeper { }; -static inline struct timespec tk_xtime(struct timekeeper *tk) +static inline struct timespec64 tk_xtime(struct timekeeper *tk) { - struct timespec ts; + struct timespec64 ts; ts.tv_sec = tk->xtime_sec; ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); -- cgit v1.2.3 From c905fae43f61c2b4508fc01722e8db61b6b8ac0b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:05 +0000 Subject: timekeeper: Move tk_xtime to core code No users outside of the core. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 18 ---------- kernel/time/timekeeping.c | 70 +++++++++++++++++++++++-------------- 2 files changed, 43 insertions(+), 45 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 1b05491e10f9..16de6d7c240a 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -71,16 +71,6 @@ struct timekeeper { }; -static inline struct timespec64 tk_xtime(struct timekeeper *tk) -{ - struct timespec64 ts; - - ts.tv_sec = tk->xtime_sec; - ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); - return ts; -} - - #ifdef CONFIG_GENERIC_TIME_VSYSCALL extern void update_vsyscall(struct timekeeper *tk); @@ -92,14 +82,6 @@ extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm, struct clocksource *c, u32 mult); extern void update_vsyscall_tz(void); -static inline void update_vsyscall(struct timekeeper *tk) -{ - struct timespec xt; - - xt = tk_xtime(tk); - update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); -} - #else static inline void update_vsyscall(struct timekeeper *tk) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3210c9e690c5..983d67b388d7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -51,6 +51,15 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) } } +static inline struct timespec64 tk_xtime(struct timekeeper *tk) +{ + struct timespec64 ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); + return ts; +} + static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; @@ -199,6 +208,40 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) return nsec + arch_gettimeoffset(); } +#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD + +static inline void update_vsyscall(struct timekeeper *tk) +{ + struct timespec xt; + + xt = tk_xtime(tk); + update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); +} + +static inline void old_vsyscall_fixup(struct timekeeper *tk) +{ + s64 remainder; + + /* + * Store only full nanoseconds into xtime_nsec after rounding + * it up and add the remainder to the error difference. + * XXX - This is necessary to avoid small 1ns inconsistnecies caused + * by truncating the remainder in vsyscalls. However, it causes + * additional work to be done in timekeeping_adjust(). Once + * the vsyscall implementations are converted to use xtime_nsec + * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD + * users are removed, this can be killed. + */ + remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); + tk->xtime_nsec -= remainder; + tk->xtime_nsec += 1ULL << tk->shift; + tk->ntp_error += remainder << tk->ntp_error_shift; + tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift; +} +#else +#define old_vsyscall_fixup(tk) +#endif + static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) @@ -1330,33 +1373,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, return offset; } -#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD -static inline void old_vsyscall_fixup(struct timekeeper *tk) -{ - s64 remainder; - - /* - * Store only full nanoseconds into xtime_nsec after rounding - * it up and add the remainder to the error difference. - * XXX - This is necessary to avoid small 1ns inconsistnecies caused - * by truncating the remainder in vsyscalls. However, it causes - * additional work to be done in timekeeping_adjust(). Once - * the vsyscall implementations are converted to use xtime_nsec - * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD - * users are removed, this can be killed. - */ - remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); - tk->xtime_nsec -= remainder; - tk->xtime_nsec += 1ULL << tk->shift; - tk->ntp_error += remainder << tk->ntp_error_shift; - tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift; -} -#else -#define old_vsyscall_fixup(tk) -#endif - - - /** * update_wall_time - Uses the current clocksource to increment the wall time * -- cgit v1.2.3 From 3fdb14fd1df70325e1e91e1203a699a4803ed741 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:07 +0000 Subject: timekeeping: Cache optimize struct timekeeper struct timekeeper is quite badly sorted for the hot readout path. Most time access functions need to load two cache lines. Rearrange it so ktime_get() and getnstimeofday() are happy with a single cache line. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 84 ++++++++-------- kernel/time/timekeeping.c | 185 +++++++++++++++++++----------------- 2 files changed, 143 insertions(+), 126 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 16de6d7c240a..2cb96235c249 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -10,7 +10,22 @@ #include #include -/* Structure holding internal timekeeping values. */ +/* + * Structure holding internal timekeeping values. + * + * Note: wall_to_monotonic is what we need to add to xtime (or xtime + * corrected for sub jiffie times) to get to monotonic time. + * Monotonic is pegged at zero at system boot time, so + * wall_to_monotonic will be negative, however, we will ALWAYS keep + * the tv_nsec part positive so we can use the usual normalization. + * + * wall_to_monotonic is moved after resume from suspend for the + * monotonic time not to jump. We need to add total_sleep_time to + * wall_to_monotonic to get the real boot based time offset. + * + * - wall_to_monotonic is no longer the boot time, getboottime must be + * used instead. + */ struct timekeeper { /* Current clocksource used for timekeeping. */ struct clocksource *clock; @@ -18,6 +33,29 @@ struct timekeeper { u32 mult; /* The shift value of the current clocksource. */ u32 shift; + /* Clock shifted nano seconds */ + u64 xtime_nsec; + + /* Current CLOCK_REALTIME time in seconds */ + u64 xtime_sec; + /* CLOCK_REALTIME to CLOCK_MONOTONIC offset */ + struct timespec64 wall_to_monotonic; + + /* Offset clock monotonic -> clock realtime */ + ktime_t offs_real; + /* Offset clock monotonic -> clock boottime */ + ktime_t offs_boot; + /* Offset clock monotonic -> clock tai */ + ktime_t offs_tai; + + /* time spent in suspend */ + struct timespec64 total_sleep_time; + /* The current UTC to TAI offset in seconds */ + s32 tai_offset; + + /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ + struct timespec64 raw_time; + /* Number of clock cycles in one NTP interval. */ cycle_t cycle_interval; /* Last cycle value (also stored in clock->cycle_last) */ @@ -29,46 +67,16 @@ struct timekeeper { /* Raw nano seconds accumulated per NTP interval. */ u32 raw_interval; - /* Current CLOCK_REALTIME time in seconds */ - u64 xtime_sec; - /* Clock shifted nano seconds */ - u64 xtime_nsec; - - /* Difference between accumulated time and NTP time in ntp - * shifted nano seconds. */ + /* + * Difference between accumulated time and NTP time in ntp + * shifted nano seconds. + */ s64 ntp_error; - /* Shift conversion between clock shifted nano seconds and - * ntp shifted nano seconds. */ - u32 ntp_error_shift; - /* - * wall_to_monotonic is what we need to add to xtime (or xtime corrected - * for sub jiffie times) to get to monotonic time. Monotonic is pegged - * at zero at system boot time, so wall_to_monotonic will be negative, - * however, we will ALWAYS keep the tv_nsec part positive so we can use - * the usual normalization. - * - * wall_to_monotonic is moved after resume from suspend for the - * monotonic time not to jump. We need to add total_sleep_time to - * wall_to_monotonic to get the real boot based time offset. - * - * - wall_to_monotonic is no longer the boot time, getboottime must be - * used instead. + * Shift conversion between clock shifted nano seconds and + * ntp shifted nano seconds. */ - struct timespec64 wall_to_monotonic; - /* Offset clock monotonic -> clock realtime */ - ktime_t offs_real; - /* time spent in suspend */ - struct timespec64 total_sleep_time; - /* Offset clock monotonic -> clock boottime */ - ktime_t offs_boot; - /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ - struct timespec64 raw_time; - /* The current UTC to TAI offset in seconds */ - s32 tai_offset; - /* Offset clock monotonic -> clock tai */ - ktime_t offs_tai; - + u32 ntp_error_shift; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 983d67b388d7..7ca150ad387d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -32,9 +32,16 @@ #define TK_MIRROR (1 << 1) #define TK_CLOCK_WAS_SET (1 << 2) -static struct timekeeper timekeeper; +/* + * The most important data for readout fits into a single 64 byte + * cache line. + */ +static struct { + seqcount_t seq; + struct timekeeper timekeeper; +} tk_core ____cacheline_aligned; + static DEFINE_RAW_SPINLOCK(timekeeper_lock); -static seqcount_t timekeeper_seq; static struct timekeeper shadow_timekeeper; /* flag for if timekeeping is suspended */ @@ -254,7 +261,7 @@ static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) */ int pvclock_gtod_register_notifier(struct notifier_block *nb) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; int ret; @@ -295,7 +302,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); if (action & TK_MIRROR) - memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); + memcpy(&shadow_timekeeper, &tk_core.timekeeper, + sizeof(tk_core.timekeeper)); } /** @@ -336,17 +344,17 @@ static void timekeeping_forward_now(struct timekeeper *tk) */ int __getnstimeofday64(struct timespec64 *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; s64 nsecs = 0; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; nsecs = timekeeping_get_ns(tk); - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); ts->tv_nsec = 0; timespec64_add_ns(ts, nsecs); @@ -375,18 +383,18 @@ EXPORT_SYMBOL(getnstimeofday64); ktime_t ktime_get(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; s64 secs, nsecs; WARN_ON(timekeeping_suspended); do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); return ktime_set(secs, nsecs); } @@ -402,7 +410,7 @@ EXPORT_SYMBOL_GPL(ktime_get); */ void ktime_get_ts64(struct timespec64 *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 tomono; s64 nsec; unsigned int seq; @@ -410,12 +418,12 @@ void ktime_get_ts64(struct timespec64 *ts) WARN_ON(timekeeping_suspended); do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; nsec = timekeeping_get_ns(tk); tomono = tk->wall_to_monotonic; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); ts->tv_sec += tomono.tv_sec; ts->tv_nsec = 0; @@ -432,7 +440,7 @@ EXPORT_SYMBOL_GPL(ktime_get_ts64); */ void timekeeping_clocktai(struct timespec *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 ts64; unsigned long seq; u64 nsecs; @@ -440,12 +448,12 @@ void timekeeping_clocktai(struct timespec *ts) WARN_ON(timekeeping_suspended); do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ts64.tv_sec = tk->xtime_sec + tk->tai_offset; nsecs = timekeeping_get_ns(tk); - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); ts64.tv_nsec = 0; timespec64_add_ns(&ts64, nsecs); @@ -482,14 +490,14 @@ EXPORT_SYMBOL(ktime_get_clocktai); */ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; s64 nsecs_raw, nsecs_real; WARN_ON_ONCE(timekeeping_suspended); do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); *ts_raw = timespec64_to_timespec(tk->raw_time); ts_real->tv_sec = tk->xtime_sec; @@ -498,7 +506,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) nsecs_raw = timekeeping_get_ns_raw(tk); nsecs_real = timekeeping_get_ns(tk); - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); timespec_add_ns(ts_raw, nsecs_raw); timespec_add_ns(ts_real, nsecs_real); @@ -531,7 +539,7 @@ EXPORT_SYMBOL(do_gettimeofday); */ int do_settimeofday(const struct timespec *tv) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 ts_delta, xt, tmp; unsigned long flags; @@ -539,7 +547,7 @@ int do_settimeofday(const struct timespec *tv) return -EINVAL; raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); @@ -554,7 +562,7 @@ int do_settimeofday(const struct timespec *tv) timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); /* signal hrtimers about time change */ @@ -572,7 +580,7 @@ EXPORT_SYMBOL(do_settimeofday); */ int timekeeping_inject_offset(struct timespec *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; struct timespec64 ts64, tmp; int ret = 0; @@ -583,7 +591,7 @@ int timekeeping_inject_offset(struct timespec *ts) ts64 = timespec_to_timespec64(*ts); raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); @@ -600,7 +608,7 @@ int timekeeping_inject_offset(struct timespec *ts) error: /* even if we error out, we forwarded the time, so call update */ timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); /* signal hrtimers about time change */ @@ -617,14 +625,14 @@ EXPORT_SYMBOL(timekeeping_inject_offset); */ s32 timekeeping_get_tai_offset(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned int seq; s32 ret; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ret = tk->tai_offset; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); return ret; } @@ -645,14 +653,14 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) */ void timekeeping_set_tai_offset(s32 tai_offset) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); __timekeeping_set_tai_offset(tk, tai_offset); timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); clock_was_set(); } @@ -664,14 +672,14 @@ void timekeeping_set_tai_offset(s32 tai_offset) */ static int change_clocksource(void *data) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct clocksource *new, *old; unsigned long flags; new = (struct clocksource *) data; raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); /* @@ -691,7 +699,7 @@ static int change_clocksource(void *data) } timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); return 0; @@ -706,7 +714,7 @@ static int change_clocksource(void *data) */ int timekeeping_notify(struct clocksource *clock) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; if (tk->clock == clock) return 0; @@ -738,17 +746,17 @@ EXPORT_SYMBOL_GPL(ktime_get_real); */ void getrawmonotonic(struct timespec *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 ts64; unsigned long seq; s64 nsecs; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); nsecs = timekeeping_get_ns_raw(tk); ts64 = tk->raw_time; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); timespec64_add_ns(&ts64, nsecs); *ts = timespec64_to_timespec(ts64); @@ -760,16 +768,16 @@ EXPORT_SYMBOL(getrawmonotonic); */ int timekeeping_valid_for_hres(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; int ret; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); return ret; } @@ -779,16 +787,16 @@ int timekeeping_valid_for_hres(void) */ u64 timekeeping_max_deferment(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; u64 ret; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ret = tk->clock->max_idle_ns; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); return ret; } @@ -828,7 +836,7 @@ void __weak read_boot_clock(struct timespec *ts) */ void __init timekeeping_init(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct clocksource *clock; unsigned long flags; struct timespec64 now, boot, tmp; @@ -854,7 +862,7 @@ void __init timekeeping_init(void) } raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); ntp_init(); clock = clocksource_default_clock(); @@ -875,9 +883,10 @@ void __init timekeeping_init(void) tmp.tv_nsec = 0; tk_set_sleep_time(tk, tmp); - memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper)); + memcpy(&shadow_timekeeper, &tk_core.timekeeper, + sizeof(tk_core.timekeeper)); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); } @@ -918,7 +927,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, */ void timekeeping_inject_sleeptime(struct timespec *delta) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 tmp; unsigned long flags; @@ -930,7 +939,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) return; raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); @@ -939,7 +948,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); /* signal hrtimers about time change */ @@ -955,7 +964,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) */ static void timekeeping_resume(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct clocksource *clock = tk->clock; unsigned long flags; struct timespec64 ts_new, ts_delta; @@ -970,7 +979,7 @@ static void timekeeping_resume(void) clocksource_resume(); raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); /* * After system resumes, we need to calculate the suspended time and @@ -1022,7 +1031,7 @@ static void timekeeping_resume(void) tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); touch_softlockup_watchdog(); @@ -1035,7 +1044,7 @@ static void timekeeping_resume(void) static int timekeeping_suspend(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; struct timespec64 delta, delta_delta; static struct timespec64 old_delta; @@ -1053,7 +1062,7 @@ static int timekeeping_suspend(void) persistent_clock_exist = true; raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); timekeeping_suspended = 1; @@ -1078,7 +1087,7 @@ static int timekeeping_suspend(void) } timekeeping_update(tk, TK_MIRROR); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); @@ -1380,7 +1389,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, void update_wall_time(void) { struct clocksource *clock; - struct timekeeper *real_tk = &timekeeper; + struct timekeeper *real_tk = &tk_core.timekeeper; struct timekeeper *tk = &shadow_timekeeper; cycle_t offset; int shift = 0, maxshift; @@ -1440,7 +1449,7 @@ void update_wall_time(void) */ clock_set |= accumulate_nsecs_to_secs(tk); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); /* Update clock->cycle_last with the new value */ clock->cycle_last = tk->cycle_last; /* @@ -1450,12 +1459,12 @@ void update_wall_time(void) * requires changes to all other timekeeper usage sites as * well, i.e. move the timekeeper pointer getter into the * spinlocked/seqcount protected sections. And we trade this - * memcpy under the timekeeper_seq against one before we start + * memcpy under the tk_core.seq against one before we start * updating. */ memcpy(real_tk, tk, sizeof(*tk)); timekeeping_update(real_tk, clock_set); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); if (clock_set) @@ -1476,7 +1485,7 @@ out: */ void getboottime(struct timespec *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec boottime = { .tv_sec = tk->wall_to_monotonic.tv_sec + tk->total_sleep_time.tv_sec, @@ -1499,7 +1508,7 @@ EXPORT_SYMBOL_GPL(getboottime); */ void get_monotonic_boottime(struct timespec *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 tomono, sleep, ret; s64 nsec; unsigned int seq; @@ -1507,13 +1516,13 @@ void get_monotonic_boottime(struct timespec *ts) WARN_ON(timekeeping_suspended); do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ret.tv_sec = tk->xtime_sec; nsec = timekeeping_get_ns(tk); tomono = tk->wall_to_monotonic; sleep = tk->total_sleep_time; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); ret.tv_sec += tomono.tv_sec + sleep.tv_sec; ret.tv_nsec = 0; @@ -1545,7 +1554,7 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime); */ void monotonic_to_bootbased(struct timespec *ts) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 ts64; ts64 = timespec_to_timespec64(*ts); @@ -1556,7 +1565,7 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; return tk->xtime_sec; } @@ -1564,22 +1573,22 @@ EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; return timespec64_to_timespec(tk_xtime(tk)); } struct timespec current_kernel_time(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 now; unsigned long seq; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); now = tk_xtime(tk); - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); return timespec64_to_timespec(now); } @@ -1587,16 +1596,16 @@ EXPORT_SYMBOL(current_kernel_time); struct timespec get_monotonic_coarse(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 now, mono; unsigned long seq; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); now = tk_xtime(tk); mono = tk->wall_to_monotonic; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec, now.tv_nsec + mono.tv_nsec); @@ -1624,19 +1633,19 @@ void do_timer(unsigned long ticks) ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot, ktime_t *offs_tai) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; struct timespec64 ts; ktime_t now; unsigned int seq; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); ts = tk_xtime(tk); *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; *offs_tai = tk->offs_tai; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); now = ktime_set(ts.tv_sec, ts.tv_nsec); now = ktime_sub(now, *offs_real); @@ -1656,13 +1665,13 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot, ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, ktime_t *offs_tai) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; ktime_t now; unsigned int seq; u64 secs, nsecs; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); secs = tk->xtime_sec; nsecs = timekeeping_get_ns(tk); @@ -1670,7 +1679,7 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; *offs_tai = tk->offs_tai; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); now = ktime_add_ns(ktime_set(secs, 0), nsecs); now = ktime_sub(now, *offs_real); @@ -1683,14 +1692,14 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, */ ktime_t ktime_get_monotonic_offset(void) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; struct timespec64 wtom; do { - seq = read_seqcount_begin(&timekeeper_seq); + seq = read_seqcount_begin(&tk_core.seq); wtom = tk->wall_to_monotonic; - } while (read_seqcount_retry(&timekeeper_seq, seq)); + } while (read_seqcount_retry(&tk_core.seq, seq)); return timespec64_to_ktime(wtom); } @@ -1701,7 +1710,7 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); */ int do_adjtimex(struct timex *txc) { - struct timekeeper *tk = &timekeeper; + struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; struct timespec64 ts; s32 orig_tai, tai; @@ -1726,7 +1735,7 @@ int do_adjtimex(struct timex *txc) getnstimeofday64(&ts); raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); orig_tai = tai = tk->tai_offset; ret = __do_adjtimex(txc, &ts, &tai); @@ -1735,7 +1744,7 @@ int do_adjtimex(struct timex *txc) __timekeeping_set_tai_offset(tk, tai); timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); } - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); if (tai != orig_tai) @@ -1755,11 +1764,11 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) unsigned long flags; raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&timekeeper_seq); + write_seqcount_begin(&tk_core.seq); __hardpps(phase_ts, raw_ts); - write_seqcount_end(&timekeeper_seq); + write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); } EXPORT_SYMBOL(hardpps); -- cgit v1.2.3 From 7c032df5570388044b4efda3d9f4d2ffb96a3116 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:10 +0000 Subject: timekeeping: Provide internal ktime_t based data The ktime_t based interfaces are used a lot in performance critical code pathes. Add ktime_t based data so the interfaces don't have to convert from the xtime/timespec based data. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 3 +++ kernel/time/timekeeping.c | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 2cb96235c249..87e0992564f2 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -36,6 +36,9 @@ struct timekeeper { /* Clock shifted nano seconds */ u64 xtime_nsec; + /* Monotonic base time */ + ktime_t base_mono; + /* Current CLOCK_REALTIME time in seconds */ u64 xtime_sec; /* CLOCK_REALTIME to CLOCK_MONOTONIC offset */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index bfe3ea09afc9..86a92476c027 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -291,6 +291,26 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); +/* + * Update the ktime_t based scalar nsec members of the timekeeper + */ +static inline void tk_update_ktime_data(struct timekeeper *tk) +{ + s64 nsec; + + /* + * The xtime based monotonic readout is: + * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now(); + * The ktime based monotonic readout is: + * nsec = base_mono + now(); + * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + */ + nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); + nsec *= NSEC_PER_SEC; + nsec += tk->wall_to_monotonic.tv_nsec; + tk->base_mono = ns_to_ktime(nsec); +} + /* must hold timekeeper_lock */ static void timekeeping_update(struct timekeeper *tk, unsigned int action) { @@ -301,6 +321,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) update_vsyscall(tk); update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); + tk_update_ktime_data(tk); + if (action & TK_MIRROR) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); -- cgit v1.2.3 From 47da70d32535000ec29cc206cfc1d318fbd8761f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:00 +0000 Subject: timekeeping: Remove timekeeper.total_sleep_time No more users. Remove it Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 6 ++---- kernel/time/timekeeping.c | 14 +++----------- 2 files changed, 5 insertions(+), 15 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 87e0992564f2..8e5d77a01787 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -20,8 +20,8 @@ * the tv_nsec part positive so we can use the usual normalization. * * wall_to_monotonic is moved after resume from suspend for the - * monotonic time not to jump. We need to add total_sleep_time to - * wall_to_monotonic to get the real boot based time offset. + * monotonic time not to jump. To calculate the real boot time offset + * we need to do offs_real - offs_boot. * * - wall_to_monotonic is no longer the boot time, getboottime must be * used instead. @@ -51,8 +51,6 @@ struct timekeeper { /* Offset clock monotonic -> clock tai */ ktime_t offs_tai; - /* time spent in suspend */ - struct timespec64 total_sleep_time; /* The current UTC to TAI offset in seconds */ s32 tai_offset; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3edc0c1d6fe8..50d5de05b837 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -97,13 +97,9 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0)); } -static void tk_set_sleep_time(struct timekeeper *tk, struct timespec64 t) +static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { - /* Verify consistency before modifying */ - WARN_ON_ONCE(tk->offs_boot.tv64 != timespec64_to_ktime(tk->total_sleep_time).tv64); - - tk->total_sleep_time = t; - tk->offs_boot = timespec64_to_ktime(t); + tk->offs_boot = ktime_add(tk->offs_boot, delta); } /** @@ -919,10 +915,6 @@ void __init timekeeping_init(void) set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); tk_set_wall_to_mono(tk, tmp); - tmp.tv_sec = 0; - tmp.tv_nsec = 0; - tk_set_sleep_time(tk, tmp); - timekeeping_update(tk, TK_MIRROR); write_seqcount_end(&tk_core.seq); @@ -950,7 +942,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, } tk_xtime_add(tk, delta); tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta)); - tk_set_sleep_time(tk, timespec64_add(tk->total_sleep_time, *delta)); + tk_update_sleep_time(tk, timespec64_to_ktime(*delta)); tk_debug_account_sleep_time(delta); } -- cgit v1.2.3 From f519b1a2e08c913375324a927992bb328387f169 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:04 +0000 Subject: timekeeping: Provide ktime_get_raw() Provide a ktime_t based interface for raw monotonic time. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 3 +++ include/linux/timekeeping.h | 6 ++++++ kernel/time/timekeeping.c | 25 +++++++++++++++++++++++++ 3 files changed, 34 insertions(+) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 8e5d77a01787..2e20275a7083 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -54,6 +54,9 @@ struct timekeeper { /* The current UTC to TAI offset in seconds */ s32 tai_offset; + /* Monotonic raw base time */ + ktime_t base_raw; + /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ struct timespec64 raw_time; diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index f0f12a84a31b..58ad7eff83ff 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -107,6 +107,7 @@ enum tk_offsets { extern ktime_t ktime_get(void); extern ktime_t ktime_get_with_offset(enum tk_offsets offs); extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs); +extern ktime_t ktime_get_raw(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format @@ -158,6 +159,11 @@ static inline u64 ktime_get_boot_ns(void) return ktime_to_ns(ktime_get_boottime()); } +static inline u64 ktime_get_raw_ns(void) +{ + return ktime_to_ns(ktime_get_raw()); +} + /* * Timespec interfaces utilizing the ktime based ones */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 118e91e3071c..af8051f4420d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -305,6 +305,9 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) nsec *= NSEC_PER_SEC; nsec += tk->wall_to_monotonic.tv_nsec; tk->base_mono = ns_to_ktime(nsec); + + /* Update the monotonic raw base */ + tk->base_raw = timespec64_to_ktime(tk->raw_time); } /* must hold timekeeper_lock */ @@ -466,6 +469,27 @@ ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs) } EXPORT_SYMBOL_GPL(ktime_mono_to_any); +/** + * ktime_get_raw - Returns the raw monotonic time in ktime_t format + */ +ktime_t ktime_get_raw(void) +{ + struct timekeeper *tk = &tk_core.timekeeper; + unsigned int seq; + ktime_t base; + s64 nsecs; + + do { + seq = read_seqcount_begin(&tk_core.seq); + base = tk->base_raw; + nsecs = timekeeping_get_ns_raw(tk); + + } while (read_seqcount_retry(&tk_core.seq, seq)); + + return ktime_add_ns(base, nsecs); +} +EXPORT_SYMBOL_GPL(ktime_get_raw); + /** * ktime_get_ts64 - get the monotonic clock in timespec64 format * @ts: pointer to timespec variable @@ -878,6 +902,7 @@ void __init timekeeping_init(void) tk_set_xtime(tk, &now); tk->raw_time.tv_sec = 0; tk->raw_time.tv_nsec = 0; + tk->base_raw.tv64 = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(tk); -- cgit v1.2.3 From 4a0e637738f06673725792d74eed67f8779b62c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:13 +0000 Subject: clocksource: Get rid of cycle_last cycle_last was added to the clocksource to support the TSC validation. We moved that to the core code, so we can get rid of the extra copy. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/arm64/kernel/vdso.c | 2 +- arch/ia64/kernel/time.c | 4 ++-- arch/powerpc/kernel/time.c | 4 ++-- arch/s390/kernel/time.c | 2 +- arch/tile/kernel/time.c | 2 +- arch/x86/kernel/vsyscall_gtod.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/clocksource.h | 2 -- include/linux/timekeeper_internal.h | 7 ++++--- kernel/time/timekeeping.c | 23 +++++++++++------------ 10 files changed, 24 insertions(+), 26 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 50384fec56c4..574672f001f7 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -224,7 +224,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { - vdso_data->cs_cycle_last = tk->clock->cycle_last; + vdso_data->cs_cycle_last = tk->cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->cs_mult = tk->mult; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 71c52bc7c28d..11dc42da7daf 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -441,7 +441,7 @@ void update_vsyscall_tz(void) } void update_vsyscall_old(struct timespec *wall, struct timespec *wtm, - struct clocksource *c, u32 mult) + struct clocksource *c, u32 mult, cycles_t cycle_last) { write_seqcount_begin(&fsyscall_gtod_data.seq); @@ -450,7 +450,7 @@ void update_vsyscall_old(struct timespec *wall, struct timespec *wtm, fsyscall_gtod_data.clk_mult = mult; fsyscall_gtod_data.clk_shift = c->shift; fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio; - fsyscall_gtod_data.clk_cycle_last = c->cycle_last; + fsyscall_gtod_data.clk_cycle_last = cycle_last; /* copy kernel time structures */ fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9fff9cdcc519..368ab374d33c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -741,7 +741,7 @@ static cycle_t timebase_read(struct clocksource *cs) } void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) + struct clocksource *clock, u32 mult, cycle_t cycle_last) { u64 new_tb_to_xs, new_stamp_xsec; u32 frac_sec; @@ -774,7 +774,7 @@ void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, * We expect the caller to have done the first increment of * vdso_data->tb_update_count already. */ - vdso_data->tb_orig_stamp = clock->cycle_last; + vdso_data->tb_orig_stamp = cycle_last; vdso_data->stamp_xsec = new_stamp_xsec; vdso_data->tb_to_xs = new_tb_to_xs; vdso_data->wtom_clock_sec = wtm->tv_sec; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 0931b110c826..97950f392613 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -220,7 +220,7 @@ void update_vsyscall(struct timekeeper *tk) /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = tk->clock->cycle_last; + vdso_data->xtime_tod_stamp = tk->cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->wtom_clock_sec = diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index ae70155c2f16..d22d5bfc1e4e 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -269,7 +269,7 @@ void update_vsyscall(struct timekeeper *tk) /* Userspace gettimeofday will spin while this value is odd. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = clock->cycle_last; + vdso_data->xtime_tod_stamp = tk->cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->wtom_clock_sec = wtm->tv_sec; diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index 9531fbb123ba..c3cb3c144591 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -32,7 +32,7 @@ void update_vsyscall(struct timekeeper *tk) /* copy vsyscall data */ vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->clock->cycle_last; + vdata->cycle_last = tk->cycle_last; vdata->mask = tk->clock->mask; vdata->mult = tk->mult; vdata->shift = tk->shift; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63832f5110b6..7b25125f3f42 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1001,7 +1001,7 @@ static void update_pvclock_gtod(struct timekeeper *tk) /* copy pvclock gtod data */ vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->clock->cycle_last; + vdata->clock.cycle_last = tk->cycle_last; vdata->clock.mask = tk->clock->mask; vdata->clock.mult = tk->mult; vdata->clock.shift = tk->shift; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index a16b497d5159..653f0e2b6ca9 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -162,7 +162,6 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary - * @cycle_last: most recent cycle counter value seen by ::read() * @owner: module reference, must be set by clocksource in modules */ struct clocksource { @@ -171,7 +170,6 @@ struct clocksource { * clocksource itself is cacheline aligned. */ cycle_t (*read)(struct clocksource *cs); - cycle_t cycle_last; cycle_t mask; u32 mult; u32 shift; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 2e20275a7083..cb88096222c0 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -29,6 +29,8 @@ struct timekeeper { /* Current clocksource used for timekeeping. */ struct clocksource *clock; + /* Last cycle value */ + cycle_t cycle_last; /* NTP adjusted clock multiplier */ u32 mult; /* The shift value of the current clocksource. */ @@ -62,8 +64,6 @@ struct timekeeper { /* Number of clock cycles in one NTP interval. */ cycle_t cycle_interval; - /* Last cycle value (also stored in clock->cycle_last) */ - cycle_t cycle_last; /* Number of clock shifted nano seconds in one NTP interval. */ u64 xtime_interval; /* shifted nano seconds left over when rounding cycle_interval */ @@ -91,7 +91,8 @@ extern void update_vsyscall_tz(void); #elif defined(CONFIG_GENERIC_TIME_VSYSCALL_OLD) extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm, - struct clocksource *c, u32 mult); + struct clocksource *c, u32 mult, + cycles_t cycle_last); extern void update_vsyscall_tz(void); #else diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 531805013786..4e748c404749 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -121,7 +121,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) old_clock = tk->clock; tk->clock = clock; - tk->cycle_last = clock->cycle_last = clock->read(clock); + tk->cycle_last = clock->read(clock); /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -182,7 +182,7 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk) cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, clock->cycle_last, clock->mask); + delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask); nsec = delta * tk->mult + tk->xtime_nsec; nsec >>= tk->shift; @@ -202,7 +202,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, clock->cycle_last, clock->mask); + delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask); /* convert delta to nanoseconds. */ nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); @@ -218,7 +218,8 @@ static inline void update_vsyscall(struct timekeeper *tk) struct timespec xt; xt = tk_xtime(tk); - update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); + update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult, + tk->cycle_last); } static inline void old_vsyscall_fixup(struct timekeeper *tk) @@ -342,8 +343,8 @@ static void timekeeping_forward_now(struct timekeeper *tk) clock = tk->clock; cycle_now = clock->read(clock); - delta = clocksource_delta(cycle_now, clock->cycle_last, clock->mask); - tk->cycle_last = clock->cycle_last = cycle_now; + delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask); + tk->cycle_last = cycle_now; tk->xtime_nsec += delta * tk->mult; @@ -1020,13 +1021,13 @@ static void timekeeping_resume(void) */ cycle_now = clock->read(clock); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && - cycle_now > clock->cycle_last) { + cycle_now > tk->cycle_last) { u64 num, max = ULLONG_MAX; u32 mult = clock->mult; u32 shift = clock->shift; s64 nsec = 0; - cycle_delta = clocksource_delta(cycle_now, clock->cycle_last, + cycle_delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask); /* @@ -1053,7 +1054,7 @@ static void timekeeping_resume(void) __timekeeping_inject_sleeptime(tk, &ts_delta); /* Re-base the last cycle value */ - tk->cycle_last = clock->cycle_last = cycle_now; + tk->cycle_last = cycle_now; tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1433,7 +1434,7 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(clock->read(clock), clock->cycle_last, + offset = clocksource_delta(clock->read(clock), tk->cycle_last, clock->mask); #endif @@ -1477,8 +1478,6 @@ void update_wall_time(void) clock_set |= accumulate_nsecs_to_secs(tk); write_seqcount_begin(&tk_core.seq); - /* Update clock->cycle_last with the new value */ - clock->cycle_last = tk->cycle_last; /* * Update the real timekeeper. * -- cgit v1.2.3 From 6d3aadf3e180e09dbefab16478c6876b584ce16e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:15 +0000 Subject: timekeeping: Restructure the timekeeper some more Access to time requires to touch two cachelines at minimum 1) The timekeeper data structure 2) The clocksource data structure The access to the clocksource data structure can be avoided as almost all clocksource implementations ignore the argument to the read callback, which is a pointer to the clocksource. But the core needs to touch it to access the members @read and @mask. So we are better off by copying the @read function pointer and the @mask from the clocksource to the core data structure itself. For the most used ktime_get() access all required data including the @read and @mask copies fits together with the sequence counter into a single 64 byte cacheline. For the other time access functions we touch in the current code three cache lines in the worst case. But with the clocksource data copies we can reduce that to two adjacent cachelines, which is more efficient than disjunct cache lines. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 4 ++++ kernel/time/timekeeping.c | 35 +++++++++++++++-------------------- 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index cb88096222c0..75bb8add78f5 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -29,6 +29,10 @@ struct timekeeper { /* Current clocksource used for timekeeping. */ struct clocksource *clock; + /* Read function of @clock */ + cycle_t (*read)(struct clocksource *cs); + /* Bitmask for two's complement subtraction of non 64bit counters */ + cycle_t mask; /* Last cycle value */ cycle_t cycle_last; /* NTP adjusted clock multiplier */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4e748c404749..14b7367e6b94 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -121,7 +121,9 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) old_clock = tk->clock; tk->clock = clock; - tk->cycle_last = clock->read(clock); + tk->read = clock->read; + tk->mask = clock->mask; + tk->cycle_last = tk->read(clock); /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -174,15 +176,13 @@ static inline u32 arch_gettimeoffset(void) { return 0; } static inline s64 timekeeping_get_ns(struct timekeeper *tk) { cycle_t cycle_now, delta; - struct clocksource *clock; s64 nsec; /* read clocksource: */ - clock = tk->clock; - cycle_now = clock->read(clock); + cycle_now = tk->read(tk->clock); /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask); + delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask); nsec = delta * tk->mult + tk->xtime_nsec; nsec >>= tk->shift; @@ -193,16 +193,15 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk) static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { + struct clocksource *clock = tk->clock; cycle_t cycle_now, delta; - struct clocksource *clock; s64 nsec; /* read clocksource: */ - clock = tk->clock; - cycle_now = clock->read(clock); + cycle_now = tk->read(clock); /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask); + delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask); /* convert delta to nanoseconds. */ nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); @@ -337,13 +336,12 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { + struct clocksource *clock = tk->clock; cycle_t cycle_now, delta; - struct clocksource *clock; s64 nsec; - clock = tk->clock; - cycle_now = clock->read(clock); - delta = clocksource_delta(cycle_now, tk->cycle_last, clock->mask); + cycle_now = tk->read(clock); + delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask); tk->cycle_last = cycle_now; tk->xtime_nsec += delta * tk->mult; @@ -1019,7 +1017,7 @@ static void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = clock->read(clock); + cycle_now = tk->read(clock); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && cycle_now > tk->cycle_last) { u64 num, max = ULLONG_MAX; @@ -1028,7 +1026,7 @@ static void timekeeping_resume(void) s64 nsec = 0; cycle_delta = clocksource_delta(cycle_now, tk->cycle_last, - clock->mask); + tk->mask); /* * "cycle_delta * mutl" may cause 64 bits overflow, if the @@ -1415,7 +1413,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, */ void update_wall_time(void) { - struct clocksource *clock; struct timekeeper *real_tk = &tk_core.timekeeper; struct timekeeper *tk = &shadow_timekeeper; cycle_t offset; @@ -1429,13 +1426,11 @@ void update_wall_time(void) if (unlikely(timekeeping_suspended)) goto out; - clock = real_tk->clock; - #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(clock->read(clock), tk->cycle_last, - clock->mask); + offset = clocksource_delta(tk->read(tk->clock), tk->cycle_last, + tk->mask); #endif /* Check if there's really nothing to do */ -- cgit v1.2.3 From d28ede83791defee9a81e558540699dc46dbbe13 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:16 +0000 Subject: timekeeping: Create struct tk_read_base and use it in struct timekeeper The members of the new struct are the required ones for the new NMI safe accessor to clcok monotonic. In order to reuse the existing timekeeping code and to make the update of the fast NMI safe timekeepers a simple memcpy use the struct for the timekeeper as well and convert all users. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mathieu Desnoyers Signed-off-by: John Stultz --- arch/arm64/kernel/vdso.c | 10 +-- arch/s390/kernel/time.c | 16 ++--- arch/tile/kernel/time.c | 10 +-- arch/x86/kernel/vsyscall_gtod.c | 23 ++++--- arch/x86/kvm/x86.c | 14 ++-- include/linux/timekeeper_internal.h | 103 +++++++++++++++------------- kernel/time/timekeeping.c | 132 ++++++++++++++++++------------------ 7 files changed, 158 insertions(+), 150 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 574672f001f7..8296f7f5f0ba 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -211,7 +211,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) void update_vsyscall(struct timekeeper *tk) { struct timespec xtime_coarse; - u32 use_syscall = strcmp(tk->clock->name, "arch_sys_counter"); + u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter"); ++vdso_data->tb_seq_count; smp_wmb(); @@ -224,11 +224,11 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { - vdso_data->cs_cycle_last = tk->cycle_last; + vdso_data->cs_cycle_last = tk->tkr.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->xtime_nsec; - vdso_data->cs_mult = tk->mult; - vdso_data->cs_shift = tk->shift; + vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; + vdso_data->cs_mult = tk->tkr.mult; + vdso_data->cs_shift = tk->tkr.shift; } smp_wmb(); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 97950f392613..4cef607f3711 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -214,26 +214,26 @@ void update_vsyscall(struct timekeeper *tk) { u64 nsecps; - if (tk->clock != &clocksource_tod) + if (tk->tkr.clock != &clocksource_tod) return; /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = tk->cycle_last; + vdso_data->xtime_tod_stamp = tk->tkr.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->xtime_nsec; + vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; vdso_data->wtom_clock_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->xtime_nsec + - + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift); - nsecps = (u64) NSEC_PER_SEC << tk->shift; + vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec + + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift); + nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift; while (vdso_data->wtom_clock_nsec >= nsecps) { vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; } - vdso_data->tk_mult = tk->mult; - vdso_data->tk_shift = tk->shift; + vdso_data->tk_mult = tk->tkr.mult; + vdso_data->tk_shift = tk->tkr.shift; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index d22d5bfc1e4e..d8fbc289e680 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -261,7 +261,7 @@ void update_vsyscall_tz(void) void update_vsyscall(struct timekeeper *tk) { struct timespec *wtm = &tk->wall_to_monotonic; - struct clocksource *clock = tk->clock; + struct clocksource *clock = tk->tkr.clock; if (clock != &cycle_counter_cs) return; @@ -269,13 +269,13 @@ void update_vsyscall(struct timekeeper *tk) /* Userspace gettimeofday will spin while this value is odd. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = tk->cycle_last; + vdso_data->xtime_tod_stamp = tk->tkr.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->xtime_nsec; + vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; vdso_data->wtom_clock_sec = wtm->tv_sec; vdso_data->wtom_clock_nsec = wtm->tv_nsec; - vdso_data->mult = tk->mult; - vdso_data->shift = tk->shift; + vdso_data->mult = tk->tkr.mult; + vdso_data->shift = tk->tkr.shift; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index c3cb3c144591..c7d791f32b98 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk) gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->cycle_last; - vdata->mask = tk->clock->mask; - vdata->mult = tk->mult; - vdata->shift = tk->shift; + vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->cycle_last = tk->tkr.cycle_last; + vdata->mask = tk->tkr.mask; + vdata->mult = tk->tkr.mult; + vdata->shift = tk->tkr.shift; vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->xtime_nsec; + vdata->wall_time_snsec = tk->tkr.xtime_nsec; vdata->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec + vdata->monotonic_time_snsec = tk->tkr.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->shift); + << tk->tkr.shift); while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; + ((u64)NSEC_PER_SEC) << tk->tkr.shift; vdata->monotonic_time_sec++; } vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); + vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> + tk->tkr.shift); vdata->monotonic_time_coarse_sec = vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7b25125f3f42..b7e57946d1c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -995,19 +995,19 @@ static void update_pvclock_gtod(struct timekeeper *tk) struct pvclock_gtod_data *vdata = &pvclock_gtod_data; u64 boot_ns; - boot_ns = ktime_to_ns(ktime_add(tk->base_mono, tk->offs_boot)); + boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot)); write_seqcount_begin(&vdata->seq); /* copy pvclock gtod data */ - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->cycle_last; - vdata->clock.mask = tk->clock->mask; - vdata->clock.mult = tk->mult; - vdata->clock.shift = tk->shift; + vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->tkr.cycle_last; + vdata->clock.mask = tk->tkr.mask; + vdata->clock.mult = tk->tkr.mult; + vdata->clock.shift = tk->tkr.shift; vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->xtime_nsec; + vdata->nsec_base = tk->tkr.xtime_nsec; write_seqcount_end(&vdata->seq); } diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 75bb8add78f5..97381997625b 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -10,80 +10,87 @@ #include #include -/* - * Structure holding internal timekeeping values. - * - * Note: wall_to_monotonic is what we need to add to xtime (or xtime - * corrected for sub jiffie times) to get to monotonic time. - * Monotonic is pegged at zero at system boot time, so - * wall_to_monotonic will be negative, however, we will ALWAYS keep - * the tv_nsec part positive so we can use the usual normalization. +/** + * struct tk_read_base - base structure for timekeeping readout + * @clock: Current clocksource used for timekeeping. + * @read: Read function of @clock + * @mask: Bitmask for two's complement subtraction of non 64bit clocks + * @cycle_last: @clock cycle value at last update + * @mult: NTP adjusted multiplier for scaled math conversion + * @shift: Shift value for scaled math conversion + * @xtime_nsec: Shifted (fractional) nano seconds offset for readout + * @base_mono: ktime_t (nanoseconds) base time for readout * - * wall_to_monotonic is moved after resume from suspend for the - * monotonic time not to jump. To calculate the real boot time offset - * we need to do offs_real - offs_boot. + * This struct has size 56 byte on 64 bit. Together with a seqcount it + * occupies a single 64byte cache line. * - * - wall_to_monotonic is no longer the boot time, getboottime must be - * used instead. + * The struct is separate from struct timekeeper as it is also used + * for a fast NMI safe accessor to clock monotonic. */ -struct timekeeper { - /* Current clocksource used for timekeeping. */ +struct tk_read_base { struct clocksource *clock; - /* Read function of @clock */ cycle_t (*read)(struct clocksource *cs); - /* Bitmask for two's complement subtraction of non 64bit counters */ cycle_t mask; - /* Last cycle value */ cycle_t cycle_last; - /* NTP adjusted clock multiplier */ u32 mult; - /* The shift value of the current clocksource. */ u32 shift; - /* Clock shifted nano seconds */ u64 xtime_nsec; - - /* Monotonic base time */ ktime_t base_mono; +}; - /* Current CLOCK_REALTIME time in seconds */ +/** + * struct timekeeper - Structure holding internal timekeeping values. + * @tkr: The readout base structure + * @xtime_sec: Current CLOCK_REALTIME time in seconds + * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset + * @offs_real: Offset clock monotonic -> clock realtime + * @offs_boot: Offset clock monotonic -> clock boottime + * @offs_tai: Offset clock monotonic -> clock tai + * @tai_offset: The current UTC to TAI offset in seconds + * @base_raw: Monotonic raw base time in ktime_t format + * @raw_time: Monotonic raw base time in timespec64 format + * @cycle_interval: Number of clock cycles in one NTP interval + * @xtime_interval: Number of clock shifted nano seconds in one NTP + * interval. + * @xtime_remainder: Shifted nano seconds left over when rounding + * @cycle_interval + * @raw_interval: Raw nano seconds accumulated per NTP interval. + * @ntp_error: Difference between accumulated time and NTP time in ntp + * shifted nano seconds. + * @ntp_error_shift: Shift conversion between clock shifted nano seconds and + * ntp shifted nano seconds. + * + * Note: For timespec(64) based interfaces wall_to_monotonic is what + * we need to add to xtime (or xtime corrected for sub jiffie times) + * to get to monotonic time. Monotonic is pegged at zero at system + * boot time, so wall_to_monotonic will be negative, however, we will + * ALWAYS keep the tv_nsec part positive so we can use the usual + * normalization. + * + * wall_to_monotonic is moved after resume from suspend for the + * monotonic time not to jump. We need to add total_sleep_time to + * wall_to_monotonic to get the real boot based time offset. + * + * wall_to_monotonic is no longer the boot time, getboottime must be + * used instead. + */ +struct timekeeper { + struct tk_read_base tkr; u64 xtime_sec; - /* CLOCK_REALTIME to CLOCK_MONOTONIC offset */ struct timespec64 wall_to_monotonic; - - /* Offset clock monotonic -> clock realtime */ ktime_t offs_real; - /* Offset clock monotonic -> clock boottime */ ktime_t offs_boot; - /* Offset clock monotonic -> clock tai */ ktime_t offs_tai; - - /* The current UTC to TAI offset in seconds */ s32 tai_offset; - - /* Monotonic raw base time */ ktime_t base_raw; - - /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ struct timespec64 raw_time; - /* Number of clock cycles in one NTP interval. */ + /* The following members are for timekeeping internal use */ cycle_t cycle_interval; - /* Number of clock shifted nano seconds in one NTP interval. */ u64 xtime_interval; - /* shifted nano seconds left over when rounding cycle_interval */ s64 xtime_remainder; - /* Raw nano seconds accumulated per NTP interval. */ u32 raw_interval; - - /* - * Difference between accumulated time and NTP time in ntp - * shifted nano seconds. - */ s64 ntp_error; - /* - * Shift conversion between clock shifted nano seconds and - * ntp shifted nano seconds. - */ u32 ntp_error_shift; }; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 14b7367e6b94..ccb69980ef7e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -52,8 +52,8 @@ bool __read_mostly persistent_clock_exist = false; static inline void tk_normalize_xtime(struct timekeeper *tk) { - while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { - tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift; + while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) { + tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift; tk->xtime_sec++; } } @@ -63,20 +63,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk) struct timespec64 ts; ts.tv_sec = tk->xtime_sec; - ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); + ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); return ts; } static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; - tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift; + tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift; } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec += ts->tv_sec; - tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; + tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift; tk_normalize_xtime(tk); } @@ -119,11 +119,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) u64 tmp, ntpinterval; struct clocksource *old_clock; - old_clock = tk->clock; - tk->clock = clock; - tk->read = clock->read; - tk->mask = clock->mask; - tk->cycle_last = tk->read(clock); + old_clock = tk->tkr.clock; + tk->tkr.clock = clock; + tk->tkr.read = clock->read; + tk->tkr.mask = clock->mask; + tk->tkr.cycle_last = tk->tkr.read(clock); /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -147,11 +147,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) if (old_clock) { int shift_change = clock->shift - old_clock->shift; if (shift_change < 0) - tk->xtime_nsec >>= -shift_change; + tk->tkr.xtime_nsec >>= -shift_change; else - tk->xtime_nsec <<= shift_change; + tk->tkr.xtime_nsec <<= shift_change; } - tk->shift = clock->shift; + tk->tkr.shift = clock->shift; tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; @@ -161,7 +161,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ - tk->mult = clock->mult; + tk->tkr.mult = clock->mult; } /* Timekeeper helper functions. */ @@ -179,13 +179,13 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk) s64 nsec; /* read clocksource: */ - cycle_now = tk->read(tk->clock); + cycle_now = tk->tkr.read(tk->tkr.clock); /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask); + delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); - nsec = delta * tk->mult + tk->xtime_nsec; - nsec >>= tk->shift; + nsec = delta * tk->tkr.mult + tk->tkr.xtime_nsec; + nsec >>= tk->tkr.shift; /* If arch requires, add in get_arch_timeoffset() */ return nsec + arch_gettimeoffset(); @@ -193,15 +193,15 @@ static inline s64 timekeeping_get_ns(struct timekeeper *tk) static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { - struct clocksource *clock = tk->clock; + struct clocksource *clock = tk->tkr.clock; cycle_t cycle_now, delta; s64 nsec; /* read clocksource: */ - cycle_now = tk->read(clock); + cycle_now = tk->tkr.read(clock); /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask); + delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); /* convert delta to nanoseconds. */ nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); @@ -217,8 +217,8 @@ static inline void update_vsyscall(struct timekeeper *tk) struct timespec xt; xt = tk_xtime(tk); - update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult, - tk->cycle_last); + update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->tkr.clock, tk->tkr.mult, + tk->tkr.cycle_last); } static inline void old_vsyscall_fixup(struct timekeeper *tk) @@ -235,11 +235,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD * users are removed, this can be killed. */ - remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); - tk->xtime_nsec -= remainder; - tk->xtime_nsec += 1ULL << tk->shift; + remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1); + tk->tkr.xtime_nsec -= remainder; + tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift; tk->ntp_error += remainder << tk->ntp_error_shift; - tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift; + tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift; } #else #define old_vsyscall_fixup(tk) @@ -304,7 +304,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); nsec *= NSEC_PER_SEC; nsec += tk->wall_to_monotonic.tv_nsec; - tk->base_mono = ns_to_ktime(nsec); + tk->tkr.base_mono = ns_to_ktime(nsec); /* Update the monotonic raw base */ tk->base_raw = timespec64_to_ktime(tk->raw_time); @@ -336,18 +336,18 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { - struct clocksource *clock = tk->clock; + struct clocksource *clock = tk->tkr.clock; cycle_t cycle_now, delta; s64 nsec; - cycle_now = tk->read(clock); - delta = clocksource_delta(cycle_now, tk->cycle_last, tk->mask); - tk->cycle_last = cycle_now; + cycle_now = tk->tkr.read(clock); + delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); + tk->tkr.cycle_last = cycle_now; - tk->xtime_nsec += delta * tk->mult; + tk->tkr.xtime_nsec += delta * tk->tkr.mult; /* If arch requires, add in get_arch_timeoffset() */ - tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; + tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift; tk_normalize_xtime(tk); @@ -412,7 +412,7 @@ ktime_t ktime_get(void) do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->base_mono; + base = tk->tkr.base_mono; nsecs = timekeeping_get_ns(tk); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -438,7 +438,7 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs) do { seq = read_seqcount_begin(&tk_core.seq); - base = ktime_add(tk->base_mono, *offset); + base = ktime_add(tk->tkr.base_mono, *offset); nsecs = timekeeping_get_ns(tk); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -731,7 +731,7 @@ static int change_clocksource(void *data) */ if (try_module_get(new->owner)) { if (!new->enable || new->enable(new) == 0) { - old = tk->clock; + old = tk->tkr.clock; tk_setup_internals(tk, new); if (old->disable) old->disable(old); @@ -759,11 +759,11 @@ int timekeeping_notify(struct clocksource *clock) { struct timekeeper *tk = &tk_core.timekeeper; - if (tk->clock == clock) + if (tk->tkr.clock == clock) return 0; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); - return tk->clock == clock ? 0 : -1; + return tk->tkr.clock == clock ? 0 : -1; } /** @@ -803,7 +803,7 @@ int timekeeping_valid_for_hres(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -822,7 +822,7 @@ u64 timekeeping_max_deferment(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->clock->max_idle_ns; + ret = tk->tkr.clock->max_idle_ns; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -989,7 +989,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) static void timekeeping_resume(void) { struct timekeeper *tk = &tk_core.timekeeper; - struct clocksource *clock = tk->clock; + struct clocksource *clock = tk->tkr.clock; unsigned long flags; struct timespec64 ts_new, ts_delta; struct timespec tmp; @@ -1017,16 +1017,16 @@ static void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = tk->read(clock); + cycle_now = tk->tkr.read(clock); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && - cycle_now > tk->cycle_last) { + cycle_now > tk->tkr.cycle_last) { u64 num, max = ULLONG_MAX; u32 mult = clock->mult; u32 shift = clock->shift; s64 nsec = 0; - cycle_delta = clocksource_delta(cycle_now, tk->cycle_last, - tk->mask); + cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, + tk->tkr.mask); /* * "cycle_delta * mutl" may cause 64 bits overflow, if the @@ -1052,7 +1052,7 @@ static void timekeeping_resume(void) __timekeeping_inject_sleeptime(tk, &ts_delta); /* Re-base the last cycle value */ - tk->cycle_last = cycle_now; + tk->tkr.cycle_last = cycle_now; tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1239,12 +1239,12 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) } } - if (unlikely(tk->clock->maxadj && - (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { + if (unlikely(tk->tkr.clock->maxadj && + (tk->tkr.mult + adj > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) { printk_deferred_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", - tk->clock->name, (long)tk->mult + adj, - (long)tk->clock->mult + tk->clock->maxadj); + tk->tkr.clock->name, (long)tk->tkr.mult + adj, + (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); } /* * So the following can be confusing. @@ -1295,9 +1295,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * * XXX - TODO: Doc ntp_error calculation. */ - tk->mult += adj; + tk->tkr.mult += adj; tk->xtime_interval += interval; - tk->xtime_nsec -= offset; + tk->tkr.xtime_nsec -= offset; tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; out_adjust: @@ -1315,9 +1315,9 @@ out_adjust: * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)tk->xtime_nsec < 0)) { - s64 neg = -(s64)tk->xtime_nsec; - tk->xtime_nsec = 0; + if (unlikely((s64)tk->tkr.xtime_nsec < 0)) { + s64 neg = -(s64)tk->tkr.xtime_nsec; + tk->tkr.xtime_nsec = 0; tk->ntp_error += neg << tk->ntp_error_shift; } @@ -1333,13 +1333,13 @@ out_adjust: */ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) { - u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; + u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift; unsigned int clock_set = 0; - while (tk->xtime_nsec >= nsecps) { + while (tk->tkr.xtime_nsec >= nsecps) { int leap; - tk->xtime_nsec -= nsecps; + tk->tkr.xtime_nsec -= nsecps; tk->xtime_sec++; /* Figure out if its a leap sec and apply if needed */ @@ -1384,9 +1384,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, /* Accumulate one shifted interval */ offset -= interval; - tk->cycle_last += interval; + tk->tkr.cycle_last += interval; - tk->xtime_nsec += tk->xtime_interval << shift; + tk->tkr.xtime_nsec += tk->xtime_interval << shift; *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ @@ -1429,8 +1429,8 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(tk->read(tk->clock), tk->cycle_last, - tk->mask); + offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), + tk->tkr.cycle_last, tk->tkr.mask); #endif /* Check if there's really nothing to do */ @@ -1591,8 +1591,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->base_mono; - nsecs = tk->xtime_nsec >> tk->shift; + base = tk->tkr.base_mono; + nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift; *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; @@ -1623,7 +1623,7 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->base_mono; + base = tk->tkr.base_mono; nsecs = timekeeping_get_ns(tk); *offs_real = tk->offs_real; -- cgit v1.2.3 From dc491596f6394382fbc74ad331156207d619fa0a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 Dec 2013 17:25:21 -0800 Subject: timekeeping: Rework frequency adjustments to work better w/ nohz The existing timekeeping_adjust logic has always been complicated to understand. Further, since it was developed prior to NOHZ becoming common, its not surprising it performs poorly when NOHZ is enabled. Since Miroslav pointed out the problematic nature of the existing code in the NOHZ case, I've tried to refactor the code to perform better. The problem with the previous approach was that it tried to adjust for the total cumulative error using a scaled dampening factor. This resulted in large errors to be corrected slowly, while small errors were corrected quickly. With NOHZ the timekeeping code doesn't know how far out the next tick will be, so this results in bad over-correction to small errors, and insufficient correction to large errors. Inspired by Miroslav's patch, I've refactored the code to try to address the correction in two steps. 1) Check the future freq error for the next tick, and if the frequency error is large, try to make sure we correct it so it doesn't cause much accumulated error. 2) Then make a small single unit adjustment to correct any cumulative error that has collected over time. This method performs fairly well in the simulator Miroslav created. Major credit to Miroslav for pointing out the issue, providing the original patch to resolve this, a simulator for testing, as well as helping debug and resolve issues in my implementation so that it performed closer to his original implementation. Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Reported-by: Miroslav Lichvar Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 1 + kernel/time/timekeeping.c | 193 ++++++++++++++++-------------------- 2 files changed, 84 insertions(+), 110 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 97381997625b..f7ac48d2edf5 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -92,6 +92,7 @@ struct timekeeper { u32 raw_interval; s64 ntp_error; u32 ntp_error_shift; + u32 ntp_err_mult; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2b56b959615b..43c706a7a728 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -178,6 +178,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) * to counteract clock drifting. */ tk->tkr.mult = clock->mult; + tk->ntp_err_mult = 0; } /* Timekeeper helper functions. */ @@ -1257,125 +1258,34 @@ static int __init timekeeping_init_ops(void) register_syscore_ops(&timekeeping_syscore_ops); return 0; } - device_initcall(timekeeping_init_ops); /* - * If the error is already larger, we look ahead even further - * to compensate for late or lost adjustments. - */ -static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, - s64 error, s64 *interval, - s64 *offset) -{ - s64 tick_error, i; - u32 look_ahead, adj; - s32 error2, mult; - - /* - * Use the current error value to determine how much to look ahead. - * The larger the error the slower we adjust for it to avoid problems - * with losing too many ticks, otherwise we would overadjust and - * produce an even larger error. The smaller the adjustment the - * faster we try to adjust for it, as lost ticks can do less harm - * here. This is tuned so that an error of about 1 msec is adjusted - * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). - */ - error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); - error2 = abs(error2); - for (look_ahead = 0; error2 > 0; look_ahead++) - error2 >>= 2; - - /* - * Now calculate the error in (1 << look_ahead) ticks, but first - * remove the single look ahead already included in the error. - */ - tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); - tick_error -= tk->xtime_interval >> 1; - error = ((error - tick_error) >> look_ahead) + tick_error; - - /* Finally calculate the adjustment shift value. */ - i = *interval; - mult = 1; - if (error < 0) { - error = -error; - *interval = -*interval; - *offset = -*offset; - mult = -1; - } - for (adj = 0; error > i; adj++) - error >>= 1; - - *interval <<= adj; - *offset <<= adj; - return mult << adj; -} - -/* - * Adjust the multiplier to reduce the error value, - * this is optimized for the most common adjustments of -1,0,1, - * for other values we can do a bit more work. + * Apply a multiplier adjustment to the timekeeper */ -static void timekeeping_adjust(struct timekeeper *tk, s64 offset) +static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, + s64 offset, + bool negative, + int adj_scale) { - s64 error, interval = tk->cycle_interval; - int adj; + s64 interval = tk->cycle_interval; + s32 mult_adj = 1; - /* - * The point of this is to check if the error is greater than half - * an interval. - * - * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. - * - * Note we subtract one in the shift, so that error is really error*2. - * This "saves" dividing(shifting) interval twice, but keeps the - * (error > interval) comparison as still measuring if error is - * larger than half an interval. - * - * Note: It does not "save" on aggravation when reading the code. - */ - error = tk->ntp_error >> (tk->ntp_error_shift - 1); - if (error > interval) { - /* - * We now divide error by 4(via shift), which checks if - * the error is greater than twice the interval. - * If it is greater, we need a bigadjust, if its smaller, - * we can adjust by 1. - */ - error >>= 2; - if (likely(error <= interval)) - adj = 1; - else - adj = timekeeping_bigadjust(tk, error, &interval, &offset); - } else { - if (error < -interval) { - /* See comment above, this is just switched for the negative */ - error >>= 2; - if (likely(error >= -interval)) { - adj = -1; - interval = -interval; - offset = -offset; - } else { - adj = timekeeping_bigadjust(tk, error, &interval, &offset); - } - } else { - goto out_adjust; - } + if (negative) { + mult_adj = -mult_adj; + interval = -interval; + offset = -offset; } + mult_adj <<= adj_scale; + interval <<= adj_scale; + offset <<= adj_scale; - if (unlikely(tk->tkr.clock->maxadj && - (tk->tkr.mult + adj > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) { - printk_deferred_once(KERN_WARNING - "Adjusting %s more than 11%% (%ld vs %ld)\n", - tk->tkr.clock->name, (long)tk->tkr.mult + adj, - (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); - } /* * So the following can be confusing. * - * To keep things simple, lets assume adj == 1 for now. + * To keep things simple, lets assume mult_adj == 1 for now. * - * When adj != 1, remember that the interval and offset values + * When mult_adj != 1, remember that the interval and offset values * have been appropriately scaled so the math is the same. * * The basic idea here is that we're increasing the multiplier @@ -1419,12 +1329,76 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * * XXX - TODO: Doc ntp_error calculation. */ - tk->tkr.mult += adj; + tk->tkr.mult += mult_adj; tk->xtime_interval += interval; tk->tkr.xtime_nsec -= offset; tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; +} + +/* + * Calculate the multiplier adjustment needed to match the frequency + * specified by NTP + */ +static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, + s64 offset) +{ + s64 interval = tk->cycle_interval; + s64 xinterval = tk->xtime_interval; + s64 tick_error; + bool negative; + u32 adj; + + /* Remove any current error adj from freq calculation */ + if (tk->ntp_err_mult) + xinterval -= tk->cycle_interval; + + /* Calculate current error per tick */ + tick_error = ntp_tick_length() >> tk->ntp_error_shift; + tick_error -= (xinterval + tk->xtime_remainder); + + /* Don't worry about correcting it if its small */ + if (likely((tick_error >= 0) && (tick_error <= interval))) + return; + + /* preserve the direction of correction */ + negative = (tick_error < 0); + + /* Sort out the magnitude of the correction */ + tick_error = abs(tick_error); + for (adj = 0; tick_error > interval; adj++) + tick_error >>= 1; + + /* scale the corrections */ + timekeeping_apply_adjustment(tk, offset, negative, adj); +} + +/* + * Adjust the timekeeper's multiplier to the correct frequency + * and also to reduce the accumulated error value. + */ +static void timekeeping_adjust(struct timekeeper *tk, s64 offset) +{ + /* Correct for the current frequency error */ + timekeeping_freqadjust(tk, offset); + + /* Next make a small adjustment to fix any cumulative error */ + if (!tk->ntp_err_mult && (tk->ntp_error > 0)) { + tk->ntp_err_mult = 1; + timekeeping_apply_adjustment(tk, offset, 0, 0); + } else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) { + /* Undo any existing error adjustment */ + timekeeping_apply_adjustment(tk, offset, 1, 0); + tk->ntp_err_mult = 0; + } + + if (unlikely(tk->tkr.clock->maxadj && + (tk->tkr.mult > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) { + printk_once(KERN_WARNING + "Adjusting %s more than 11%% (%ld vs %ld)\n", + tk->tkr.clock->name, (long)tk->tkr.mult, + (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); + } -out_adjust: /* * It may be possible that when we entered this function, xtime_nsec * was very small. Further, if we're slightly speeding the clocksource @@ -1444,7 +1418,6 @@ out_adjust: tk->tkr.xtime_nsec = 0; tk->ntp_error += neg << tk->ntp_error_shift; } - } /** -- cgit v1.2.3 From 375f45b5b53a91dfa8f0c11328e0e044f82acbed Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 23 Apr 2014 20:53:29 -0700 Subject: timekeeping: Use cached ntp_tick_length when accumulating error By caching the ntp_tick_length() when we correct the frequency error, and then using that cached value to accumulate error, we avoid large initial errors when the tick length is changed. This makes convergence happen much faster in the simulator, since the initial error doesn't have to be slowly whittled away. This initially seems like an accounting error, but Miroslav pointed out that ntp_tick_length() can change mid-tick, so when we apply it in the error accumulation, we are applying any recent change to the entire tick. This approach chooses to apply changes in the ntp_tick_length() only to the next tick, which allows us to calculate the freq correction before using the new tick length, which avoids accummulating error. Credit to Miroslav for pointing this out and providing the original patch this functionality has been pulled out from, along with the rational. Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Reported-by: Miroslav Lichvar Signed-off-by: John Stultz --- include/linux/timekeeper_internal.h | 9 +++++++++ kernel/time/timekeeping.c | 5 ++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index f7ac48d2edf5..e9660e52dc09 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -90,6 +90,15 @@ struct timekeeper { u64 xtime_interval; s64 xtime_remainder; u32 raw_interval; + /* The ntp_tick_length() value currently being used. + * This cached copy ensures we consistently apply the tick + * length for an entire tick, as ntp_tick_length may change + * mid-tick, and we don't want to apply that new value to + * the tick in progress. + */ + u64 ntp_tick; + /* Difference between accumulated time and NTP time in ntp + * shifted nano seconds. */ s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 43c706a7a728..f36b02838a47 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -171,6 +171,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; + tk->ntp_tick = ntpinterval << tk->ntp_error_shift; /* * The timekeeper keeps its own mult values for the currently @@ -1352,6 +1353,8 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, if (tk->ntp_err_mult) xinterval -= tk->cycle_interval; + tk->ntp_tick = ntp_tick_length(); + /* Calculate current error per tick */ tick_error = ntp_tick_length() >> tk->ntp_error_shift; tick_error -= (xinterval + tk->xtime_remainder); @@ -1497,7 +1500,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, tk->raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ - tk->ntp_error += ntp_tick_length() << shift; + tk->ntp_error += tk->ntp_tick << shift; tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << (tk->ntp_error_shift + shift); -- cgit v1.2.3 From 953dec21aed4038464fec02f96a2f1b8701a5bce Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 25 Jul 2014 21:37:19 -0700 Subject: timekeeping: Fixup typo in update_vsyscall_old definition In commit 4a0e637738f0 ("clocksource: Get rid of cycle_last"), currently in the -tip tree, there was a small typo where cycles_t was used intstead of cycle_t. This broke ppc64 builds. Fix this by using the proper cycle_t type for this usage, in both the definition and the ia64 implementation. Now, having both cycle_t and cycles_t types seems like a very bad idea just asking for these sorts of issues. But that will be a cleanup for another day. Reported-by: Stephen Rothwell Signed-off-by: John Stultz Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1406349439-11785-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- arch/ia64/kernel/time.c | 2 +- include/linux/timekeeper_internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/timekeeper_internal.h') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 11dc42da7daf..3e71ef85e439 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -441,7 +441,7 @@ void update_vsyscall_tz(void) } void update_vsyscall_old(struct timespec *wall, struct timespec *wtm, - struct clocksource *c, u32 mult, cycles_t cycle_last) + struct clocksource *c, u32 mult, cycle_t cycle_last) { write_seqcount_begin(&fsyscall_gtod_data.seq); diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e9660e52dc09..95640dcd1899 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -113,7 +113,7 @@ extern void update_vsyscall_tz(void); extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm, struct clocksource *c, u32 mult, - cycles_t cycle_last); + cycle_t cycle_last); extern void update_vsyscall_tz(void); #else -- cgit v1.2.3