From e81ce1f7ecdaed2844c75313b09af791d44e6373 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 5 Mar 2007 00:30:51 -0800 Subject: [PATCH] timer/hrtimer: take per cpu locks in sane order Doing something like this on a two cpu system # echo 0 > /sys/devices/system/cpu/cpu0/online # echo 1 > /sys/devices/system/cpu/cpu0/online # echo 0 > /sys/devices/system/cpu/cpu1/online will give me this: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.21-rc2-g562aa1d4-dirty #7 ------------------------------------------------------- bash/1282 is trying to acquire lock: (&cpu_base->lock_key){.+..}, at: [<000000000005f17e>] hrtimer_cpu_notify+0xc6/0x240 but task is already holding lock: (&cpu_base->lock_key#2){.+..}, at: [<000000000005f174>] hrtimer_cpu_notify+0xbc/0x240 which lock already depends on the new lock. This happens because we have the following code in kernel/hrtimer.c: migrate_hrtimers(int cpu) [...] old_base = &per_cpu(hrtimer_bases, cpu); new_base = &get_cpu_var(hrtimer_bases); [...] spin_lock(&new_base->lock); spin_lock(&old_base->lock); Which means the spinlocks are taken in an order which depends on which cpu gets shut down from which other cpu. Therefore lockdep complains that there might be an ABBA deadlock. Since migrate_hrtimers() gets only called on cpu hotplug it's safe to assume that it isn't executed concurrently on a The same problem exists in kernel/timer.c: migrate_timers(). As pointed out by Christian Borntraeger one possible solution to avoid the locking order complaints would be to make sure that the locks are always taken in the same order. E.g. by taking the lock of the cpu with the lower number first. To achieve this we introduce two new spinlock functions double_spin_lock and double_spin_unlock which lock or unlock two locks in a given order. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Roman Zippel Cc: John Stultz Cc: Christian Borntraeger Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 476cb0c0b4a4..de93a8176ca6 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1355,17 +1355,16 @@ static void migrate_hrtimers(int cpu) tick_cancel_sched_timer(cpu); local_irq_disable(); - - spin_lock(&new_base->lock); - spin_lock(&old_base->lock); + double_spin_lock(&new_base->lock, &old_base->lock, + smp_processor_id() < cpu); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); + double_spin_unlock(&new_base->lock, &old_base->lock, + smp_processor_id() < cpu); local_irq_enable(); put_cpu_var(hrtimer_bases); } -- cgit v1.2.3 From f8953856eb8dd62232aee6cacb46993dc2ac4869 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 6 Mar 2007 01:42:08 -0800 Subject: [PATCH] highres: do not run the TIMER_SOFTIRQ after switching to highres mode The TIMER_SOFTIRQ runs the hrtimers during bootup until a usable clocksource and clock event sources are registered. The switch to high resolution mode happens inside of the TIMER_SOFTIRQ, but runs the softirq afterwards. That way the tick emulation timer, which was set up in the switch to highres might be executed in the softirq context, which is a BUG. The rbtree has not to be touched by the softirq after the highres switch. This BUG was observed by Andres Salomon, who provided the information to debug it. Return early from the softirq, when the switch was sucessful. [dilinger@debian.org: add debug warning] [akpm@linux-foundation.org: make debug warning compile] Signed-off-by: Thomas Gleixner Cc: Andres Salomon Acked-by: Ingo Molnar Signed-off-by: Andres Salomon Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index de93a8176ca6..ec4cb9f3e3b7 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -540,19 +540,19 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* * Switch to high resolution mode */ -static void hrtimer_switch_to_hres(void) +static int hrtimer_switch_to_hres(void) { struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); unsigned long flags; if (base->hres_active) - return; + return 1; local_irq_save(flags); if (tick_init_highres()) { local_irq_restore(flags); - return; + return 0; } base->hres_active = 1; base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES; @@ -565,13 +565,14 @@ static void hrtimer_switch_to_hres(void) local_irq_restore(flags); printk(KERN_INFO "Switched to high resolution mode on CPU %d\n", smp_processor_id()); + return 1; } #else static inline int hrtimer_hres_active(void) { return 0; } static inline int hrtimer_is_hres_enabled(void) { return 0; } -static inline void hrtimer_switch_to_hres(void) { } +static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) @@ -1130,6 +1131,9 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, if (base->softirq_time.tv64 <= timer->expires.tv64) break; +#ifdef CONFIG_HIGH_RES_TIMERS + WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ); +#endif timer_stats_account_hrtimer(timer); fn = timer->function; @@ -1173,7 +1177,8 @@ void hrtimer_run_queues(void) * deadlock vs. xtime_lock. */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) - hrtimer_switch_to_hres(); + if (hrtimer_switch_to_hres()) + return; hrtimer_get_softirq_time(cpu_base); -- cgit v1.2.3 From 13788ccc41ceea5893f9c747c59bc0b28f2416c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 16 Mar 2007 13:38:20 -0800 Subject: [PATCH] hrtimer: prevent overrun DoS in hrtimer_forward() hrtimer_forward() does not check for the possible overflow of timer->expires. This can happen on 64 bit machines with large interval values and results currently in an endless loop in the softirq because the expiry value becomes negative and therefor the timer is expired all the time. Check for this condition and set the expiry value to the max. expiry time in the future. The fix should be applied to stable kernel series as well. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ec4cb9f3e3b7..5e7122d3f46f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -644,6 +644,12 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) orun++; } timer->expires = ktime_add(timer->expires, interval); + /* + * Make sure, that the result did not wrap with a very large + * interval. + */ + if (timer->expires.tv64 < 0) + timer->expires = ktime_set(KTIME_SEC_MAX, 0); return orun; } -- cgit v1.2.3 From ad28d94abb1313bdf27e196676292c493f92f824 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 16 Mar 2007 13:38:21 -0800 Subject: [PATCH] hrtimer: fix up unlocked access to wall_to_monotonic commit f4304ab21513b834c8fe3403927c60c2b81a72d7 (HZ free NTP) moved the access to wall_to_monotonic in hrtimer_get_softirq_time() out of the xtime_lock protection. Move it back into the seq_lock section. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5e7122d3f46f..6a7938a0d513 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) { ktime_t xtim, tomono; - struct timespec xts; + struct timespec xts, tom; unsigned long seq; do { @@ -145,10 +145,11 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) #else xts = xtime; #endif + tom = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); xtim = timespec_to_ktime(xts); - tomono = timespec_to_ktime(wall_to_monotonic); + tomono = timespec_to_ktime(tom); base->clock_base[CLOCK_REALTIME].softirq_time = xtim; base->clock_base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono); -- cgit v1.2.3 From 935c631db827cc3a96df4dcc6fec374b994fdbd1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Mar 2007 13:17:18 +0200 Subject: [PATCH] hrtimers: fix reprogramming SMP race hrtimer_start() incorrectly set the 'reprogram' flag to enqueue_hrtimer(), which should only be 1 if the hrtimer is queued to the current CPU. Doing otherwise could result in a reprogramming of the current CPU's clockevents device, with a timer that is not queued to it - resulting in a bogus next expiry value. Signed-off-by: Ingo Molnar Cc: Michal Piotrowski Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6a7938a0d513..067ba2c05328 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -814,7 +814,12 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) timer_stats_hrtimer_set_start_info(timer); - enqueue_hrtimer(timer, new_base, base == new_base); + /* + * Only allow reprogramming if the new base is on this CPU. + * (it might still be on another CPU if the timer was pending) + */ + enqueue_hrtimer(timer, new_base, + new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); unlock_hrtimer_base(timer, &flags); -- cgit v1.2.3 From 995f054f2a342f8505fed4f8395d12c0f5966414 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 7 Apr 2007 12:05:00 +0200 Subject: [PATCH] high-res timers: resume fix Soeren Sonnenburg reported that upon resume he is getting this backtrace: [] smp_apic_timer_interrupt+0x57/0x90 [] retrigger_next_event+0x0/0xb0 [] apic_timer_interrupt+0x28/0x30 [] retrigger_next_event+0x0/0xb0 [] __kfifo_put+0x8/0x90 [] on_each_cpu+0x35/0x60 [] clock_was_set+0x18/0x20 [] timekeeping_resume+0x7c/0xa0 [] __sysdev_resume+0x11/0x80 [] sysdev_resume+0x47/0x80 [] device_power_up+0x5/0x10 it turns out that on resume we mistakenly re-enable interrupts too early. Do the timer retrigger only on the current CPU. Signed-off-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: Soeren Sonnenburg Signed-off-by: Linus Torvalds --- include/linux/hrtimer.h | 3 +++ kernel/hrtimer.c | 12 ++++++++++++ kernel/timer.c | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 5bdbc744e773..17c29dca8354 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -206,6 +206,7 @@ struct hrtimer_cpu_base { struct clock_event_device; extern void clock_was_set(void); +extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); /* @@ -236,6 +237,8 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) */ static inline void clock_was_set(void) { } +static inline void hres_timers_resume(void) { } + /* * In non high resolution mode the time reference is taken from * the base softirq time variable. diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 067ba2c05328..b74860aaf5f1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -458,6 +458,18 @@ void clock_was_set(void) on_each_cpu(retrigger_next_event, NULL, 0, 1); } +/* + * During resume we might have to reprogram the high resolution timer + * interrupt (on the local CPU): + */ +void hres_timers_resume(void) +{ + WARN_ON_ONCE(num_online_cpus() > 1); + + /* Retrigger the CPU local events: */ + retrigger_next_event(NULL); +} + /* * Check, whether the timer is on the callback pending list */ diff --git a/kernel/timer.c b/kernel/timer.c index 440048acaea1..dd6c2c1c561b 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1016,7 +1016,7 @@ static int timekeeping_resume(struct sys_device *dev) clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); /* Resume hrtimers */ - clock_was_set(); + hres_timers_resume(); return 0; } -- cgit v1.2.3 From 641b9e0e8b7f96425da6ce98f3361e3af0baee29 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 16 Mar 2007 01:18:42 -0700 Subject: [NET_SCHED]: Use ktime as clocksource Get rid of the manual clock source selection mess and use ktime. Also use a scalar representation, which allows to clean up pkt_sched.h a bit more and results in less ktime_to_ns() calls in most cases. The PSCHED_US2JIFFIE/PSCHED_JIFFIE2US macros are implemented quite inefficient by this patch, following patches will convert all qdiscs to hrtimers and get rid of them entirely. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 169 ++++-------------------------------------------- kernel/hrtimer.c | 1 + net/sched/Kconfig | 56 ---------------- net/sched/sch_api.c | 77 +--------------------- net/sched/sch_hfsc.c | 31 +-------- 5 files changed, 19 insertions(+), 315 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f6afee73235d..1c12afd113d6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -2,6 +2,7 @@ #define __NET_PKT_SCHED_H #include +#include #include struct qdisc_walker @@ -37,176 +38,32 @@ static inline void *qdisc_priv(struct Qdisc *q) The things are not so bad, because we may use artifical clock evaluated by integration of network data flow in the most critical places. - - Note: we do not use fastgettimeofday. - The reason is that, when it is not the same thing as - gettimeofday, it returns invalid timestamp, which is - not updated, when net_bh is active. */ -/* General note about internal clock. - - Any clock source returns time intervals, measured in units - close to 1usec. With source CONFIG_NET_SCH_CLK_GETTIMEOFDAY it is precisely - microseconds, otherwise something close but different chosen to minimize - arithmetic cost. Ratio usec/internal untis in form nominator/denominator - may be read from /proc/net/psched. - */ - - -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY - -typedef struct timeval psched_time_t; -typedef long psched_tdiff_t; - -#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) -#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs) -#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay) - -#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ - typedef u64 psched_time_t; typedef long psched_tdiff_t; -#ifdef CONFIG_NET_SCH_CLK_JIFFIES - -#if HZ < 96 -#define PSCHED_JSCALE 14 -#elif HZ >= 96 && HZ < 192 -#define PSCHED_JSCALE 13 -#elif HZ >= 192 && HZ < 384 -#define PSCHED_JSCALE 12 -#elif HZ >= 384 && HZ < 768 -#define PSCHED_JSCALE 11 -#elif HZ >= 768 -#define PSCHED_JSCALE 10 -#endif - -#define PSCHED_GET_TIME(stamp) ((stamp) = (get_jiffies_64()<>PSCHED_JSCALE) -#define PSCHED_JIFFIE2US(delay) ((delay)< - -extern psched_tdiff_t psched_clock_per_hz; -extern int psched_clock_scale; -extern psched_time_t psched_time_base; -extern cycles_t psched_time_mark; - -#define PSCHED_GET_TIME(stamp) \ -do { \ - cycles_t cur = get_cycles(); \ - if (sizeof(cycles_t) == sizeof(u32)) { \ - if (cur <= psched_time_mark) \ - psched_time_base += 0x100000000ULL; \ - psched_time_mark = cur; \ - (stamp) = (psched_time_base + cur)>>psched_clock_scale; \ - } else { \ - (stamp) = cur>>psched_clock_scale; \ - } \ -} while (0) -#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz) -#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz) - -#endif /* CONFIG_NET_SCH_CLK_CPU */ - -#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ - -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY -#define PSCHED_TDIFF(tv1, tv2) \ -({ \ - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ - int __delta = (tv1).tv_usec - (tv2).tv_usec; \ - if (__delta_sec) { \ - switch (__delta_sec) { \ - default: \ - __delta = 0; \ - case 2: \ - __delta += USEC_PER_SEC; \ - case 1: \ - __delta += USEC_PER_SEC; \ - } \ - } \ - __delta; \ -}) - -static inline int -psched_tod_diff(int delta_sec, int bound) -{ - int delta; - - if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1) - return bound; - delta = delta_sec * USEC_PER_SEC; - if (delta > bound || delta < 0) - delta = bound; - return delta; -} - -#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \ -({ \ - int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \ - int __delta = (tv1).tv_usec - (tv2).tv_usec; \ - switch (__delta_sec) { \ - default: \ - __delta = psched_tod_diff(__delta_sec, bound); break; \ - case 2: \ - __delta += USEC_PER_SEC; \ - case 1: \ - __delta += USEC_PER_SEC; \ - case 0: \ - if (__delta > bound || __delta < 0) \ - __delta = bound; \ - } \ - __delta; \ -}) - -#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \ - (tv1).tv_sec <= (tv2).tv_sec) || \ - (tv1).tv_sec < (tv2).tv_sec) - -#define PSCHED_TADD2(tv, delta, tv_res) \ -({ \ - int __delta = (tv).tv_usec + (delta); \ - (tv_res).tv_sec = (tv).tv_sec; \ - while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \ - (tv_res).tv_usec = __delta; \ -}) - -#define PSCHED_TADD(tv, delta) \ -({ \ - (tv).tv_usec += (delta); \ - while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \ - (tv).tv_usec -= USEC_PER_SEC; } \ -}) - -/* Set/check that time is in the "past perfect"; - it depends on concrete representation of system time - */ - -#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0) -#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0) +/* Avoid doing 64 bit divide by 1000 */ +#define PSCHED_US2NS(x) ((s64)(x) << 10) +#define PSCHED_NS2US(x) ((x) >> 10) -#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; }) +#define PSCHED_TICKS_PER_SEC PSCHED_NS2US(NSEC_PER_SEC) +#define PSCHED_GET_TIME(stamp) \ + ((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get()))) -#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ +#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(PSCHED_US2NS((usecs)) / NSEC_PER_USEC) +#define PSCHED_JIFFIE2US(delay) PSCHED_NS2US(jiffies_to_usecs((delay)) * NSEC_PER_USEC) -#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) +#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2)) #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \ - min_t(long long, (tv1) - (tv2), bound) - - -#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) + min_t(long long, (tv1) - (tv2), bound) +#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2)) #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta)) -#define PSCHED_TADD(tv, delta) ((tv) += (delta)) +#define PSCHED_TADD(tv, delta) ((tv) += (delta)) #define PSCHED_SET_PASTPERFECT(t) ((t) = 0) #define PSCHED_IS_PASTPERFECT(t) ((t) == 0) #define PSCHED_AUDIT_TDIFF(t) -#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ - extern struct Qdisc_ops pfifo_qdisc_ops; extern struct Qdisc_ops bfifo_qdisc_ops; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b74860aaf5f1..f5cfde8c9025 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -59,6 +59,7 @@ ktime_t ktime_get(void) return timespec_to_ktime(now); } +EXPORT_SYMBOL_GPL(ktime_get); /** * ktime_get_real - get the real (wall-) time in ktime_t format diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f4544dd86476..475df8449be9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -46,62 +46,6 @@ config NET_SCH_FIFO if NET_SCHED -choice - prompt "Packet scheduler clock source" - default NET_SCH_CLK_GETTIMEOFDAY - ---help--- - Packet schedulers need a monotonic clock that increments at a static - rate. The kernel provides several suitable interfaces, each with - different properties: - - - high resolution (us or better) - - fast to read (minimal locking, no i/o access) - - synchronized on all processors - - handles cpu clock frequency changes - - but nothing provides all of the above. - -config NET_SCH_CLK_JIFFIES - bool "Timer interrupt" - ---help--- - Say Y here if you want to use the timer interrupt (jiffies) as clock - source. This clock source is fast, synchronized on all processors and - handles cpu clock frequency changes, but its resolution is too low - for accurate shaping except at very low speed. - -config NET_SCH_CLK_GETTIMEOFDAY - bool "gettimeofday" - ---help--- - Say Y here if you want to use gettimeofday as clock source. This clock - source has high resolution, is synchronized on all processors and - handles cpu clock frequency changes, but it is slow. - - Choose this if you need a high resolution clock source but can't use - the CPU's cycle counter. - -# don't allow on SMP x86 because they can have unsynchronized TSCs. -# gettimeofday is a good alternative -config NET_SCH_CLK_CPU - bool "CPU cycle counter" - depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 - ---help--- - Say Y here if you want to use the CPU's cycle counter as clock source. - This is a cheap and high resolution clock source, but on some - architectures it is not synchronized on all processors and doesn't - handle cpu clock frequency changes. - - The useable cycle counters are: - - x86/x86_64 - Timestamp Counter - alpha - Cycle Counter - sparc64 - %ticks register - ppc64 - Time base - ia64 - Interval Time Counter - - Choose this if your CPU's cycle counter is working properly. - -endchoice - comment "Queueing/Scheduling" config NET_SCH_CBQ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4a927a5e1fa6..d71bf79eb80b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1175,15 +1175,12 @@ reclassify: return -1; } -static int psched_us_per_tick = 1; -static int psched_tick_per_us = 1; - #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { seq_printf(seq, "%08x %08x %08x %08x\n", - psched_tick_per_us, psched_us_per_tick, - 1000000, HZ); + (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1), + 1000000, HZ); return 0; } @@ -1202,80 +1199,10 @@ static const struct file_operations psched_fops = { }; #endif -#ifdef CONFIG_NET_SCH_CLK_CPU -psched_tdiff_t psched_clock_per_hz; -int psched_clock_scale; -EXPORT_SYMBOL(psched_clock_per_hz); -EXPORT_SYMBOL(psched_clock_scale); - -psched_time_t psched_time_base; -cycles_t psched_time_mark; -EXPORT_SYMBOL(psched_time_mark); -EXPORT_SYMBOL(psched_time_base); - -/* - * Periodically adjust psched_time_base to avoid overflow - * with 32-bit get_cycles(). Safe up to 4GHz CPU. - */ -static void psched_tick(unsigned long); -static DEFINE_TIMER(psched_timer, psched_tick, 0, 0); - -static void psched_tick(unsigned long dummy) -{ - if (sizeof(cycles_t) == sizeof(u32)) { - psched_time_t dummy_stamp; - PSCHED_GET_TIME(dummy_stamp); - psched_timer.expires = jiffies + 1*HZ; - add_timer(&psched_timer); - } -} - -int __init psched_calibrate_clock(void) -{ - psched_time_t stamp, stamp1; - struct timeval tv, tv1; - psched_tdiff_t delay; - long rdelay; - unsigned long stop; - - psched_tick(0); - stop = jiffies + HZ/10; - PSCHED_GET_TIME(stamp); - do_gettimeofday(&tv); - while (time_before(jiffies, stop)) { - barrier(); - cpu_relax(); - } - PSCHED_GET_TIME(stamp1); - do_gettimeofday(&tv1); - - delay = PSCHED_TDIFF(stamp1, stamp); - rdelay = tv1.tv_usec - tv.tv_usec; - rdelay += (tv1.tv_sec - tv.tv_sec)*1000000; - if (rdelay > delay) - return -1; - delay /= rdelay; - psched_tick_per_us = delay; - while ((delay>>=1) != 0) - psched_clock_scale++; - psched_us_per_tick = 1<>psched_clock_scale; - return 0; -} -#endif - static int __init pktsched_init(void) { struct rtnetlink_link *link_p; -#ifdef CONFIG_NET_SCH_CLK_CPU - if (psched_calibrate_clock() < 0) - return -1; -#elif defined(CONFIG_NET_SCH_CLK_JIFFIES) - psched_tick_per_us = HZ< -#undef PSCHED_GET_TIME -#define PSCHED_GET_TIME(stamp) \ -do { \ - struct timeval tv; \ - do_gettimeofday(&tv); \ - (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \ -} while (0) -#endif - #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ @@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl) * ism: (psched_us/byte) << ISM_SHIFT * dx: psched_us * - * Clock source resolution (CONFIG_NET_SCH_CLK_*) - * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us. - * CPU: resolution is between 0.5us and 1us. - * GETTIMEOFDAY: resolution is exactly 1us. + * The clock source resolution with ktime is 1.024us. * * sm and ism are scaled in order to keep effective digits. * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective * digits in decimal using the following table. * - * Note: We can afford the additional accuracy (altq hfsc keeps at most - * 3 effective digits) thanks to the fact that linux clock is bounded - * much more tightly. - * * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps * ------------+------------------------------------------------------- - * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3 - * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3 - * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3 + * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3 * - * 0.5us/byte 160 16 1.6 0.16 0.016 - * us/byte 80 8 0.8 0.08 0.008 - * 1.27us/byte 63 6.3 0.63 0.063 0.0063 + * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125 */ #define SM_SHIFT 20 #define ISM_SHIFT 18 -- cgit v1.2.3 From b8b8fd2dc23725fba77f66b3fef11b11f983fc08 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Apr 2007 15:31:24 -0700 Subject: [NET]: Fix networking compilation errors Fix miscellaneous networking compilation errors. (*) Export ktime_add_ns() for modules. (*) wext_proc_init() should have an ANSI declaration. Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/net/wext.h | 2 +- kernel/hrtimer.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/include/net/wext.h b/include/net/wext.h index 55741836a675..c02b8decf3af 100644 --- a/include/net/wext.h +++ b/include/net/wext.h @@ -10,7 +10,7 @@ extern int wext_proc_init(void); extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, void __user *arg); #else -static inline int wext_proc_init() +static inline int wext_proc_init(void) { return 0; } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f5cfde8c9025..1b3033105b40 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -279,6 +279,8 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) return ktime_add(kt, tmp); } + +EXPORT_SYMBOL_GPL(ktime_add_ns); # endif /* !CONFIG_KTIME_SCALAR */ /* -- cgit v1.2.3