From 771dae81896855d25f7f8746aaf56c0238deafb6 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 30 Nov 2011 02:46:31 +0000 Subject: powerpc/cpuidle: Add cpu_idle_wait() to allow switching of idle routines This patch provides cpu_idle_wait() routine for the powerpc platform which is required by the cpuidle subsystem. This routine is required to change the idle handler on SMP systems. The equivalent routine for x86 is in arch/x86/kernel/process.c but the powerpc implementation is different. cpuidle_disable variable is to enable/disable cpuidle framework if power_save option is set during the boot time. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Signed-off-by: Arun R Bharadwaj Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/idle.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..8574b0e81ff1 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -39,9 +39,13 @@ #define cpu_should_die() 0 #endif +unsigned long cpuidle_disable = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(cpuidle_disable); + static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; + cpuidle_disable = IDLE_POWERSAVE_OFF; return 0; } __setup("powersave=off", powersave_off); @@ -102,6 +106,29 @@ void cpu_idle(void) } } + +/* + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old + * idle loop and start using the new idle loop. + * Required while changing idle handler on SMP systems. + * Caller must have changed idle handler to the new value before the call. + * This window may be larger on shared systems. + */ +void cpu_idle_wait(void) +{ + int cpu; + smp_mb(); + + /* kick all the CPUs so that they exit out of old idle routine */ + get_online_cpus(); + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + smp_send_reschedule(cpu); + } + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + int powersave_nap; #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From 280f06774afedf849f0b34248ed6aff57d0f6908 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 18:22:06 +0200 Subject: nohz: Separate out irq exit and idle loop dyntick logic The tick_nohz_stop_sched_tick() function, which tries to delay the next timer tick as long as possible, can be called from two places: - From the idle loop to start the dytick idle mode - From interrupt exit if we have interrupted the dyntick idle mode, so that we reprogram the next tick event in case the irq changed some internal state that requires this action. There are only few minor differences between both that are handled by that function, driven by the ts->inidle cpu variable and the inidle parameter. The whole guarantees that we only update the dyntick mode on irq exit if we actually interrupted the dyntick idle mode, and that we enter in RCU extended quiescent state from idle loop entry only. Split this function into: - tick_nohz_idle_enter(), which sets ts->inidle to 1, enters dynticks idle mode unconditionally if it can, and enters into RCU extended quiescent state. - tick_nohz_irq_exit() which only updates the dynticks idle mode when ts->inidle is set (ie: if tick_nohz_idle_enter() has been called). To maintain symmetry, tick_nohz_restart_sched_tick() has been renamed into tick_nohz_idle_exit(). This simplifies the code and micro-optimize the irq exit path (no need for local_irq_save there). This also prepares for the split between dynticks and rcu extended quiescent state logics. We'll need this split to further fix illegal uses of RCU in extended quiescent states in the idle loop. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/arm/kernel/process.c | 4 +- arch/avr32/kernel/process.c | 4 +- arch/blackfin/kernel/process.c | 4 +- arch/microblaze/kernel/process.c | 4 +- arch/mips/kernel/process.c | 4 +- arch/openrisc/kernel/idle.c | 4 +- arch/powerpc/kernel/idle.c | 4 +- arch/powerpc/platforms/iseries/setup.c | 8 +-- arch/s390/kernel/process.c | 4 +- arch/sh/kernel/idle.c | 4 +- arch/sparc/kernel/process_64.c | 4 +- arch/tile/kernel/process.c | 4 +- arch/um/kernel/process.c | 4 +- arch/unicore32/kernel/process.c | 4 +- arch/x86/kernel/process_32.c | 4 +- arch/x86/kernel/process_64.c | 4 +- include/linux/tick.h | 13 ++--- kernel/softirq.c | 2 +- kernel/time/tick-sched.c | 93 +++++++++++++++++++++------------- 19 files changed, 99 insertions(+), 77 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3d0c6fb74ae4..3f1f8daf703c 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -183,7 +183,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); leds_event(led_idle_start); while (!need_resched()) { #ifdef CONFIG_HOTPLUG_CPU @@ -213,7 +213,7 @@ void cpu_idle(void) } } leds_event(led_idle_end); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index ef5a2a08fcca..6ee7952248db 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -34,10 +34,10 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) cpu_idle_sleep(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 6a80a9e9fc4a..7b141b5c9e8d 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -88,10 +88,10 @@ void cpu_idle(void) #endif if (!idle) idle = default_idle; - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) idle(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 95cc295976a7..5407f09b4be4 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -103,10 +103,10 @@ void cpu_idle(void) if (!idle) idle = default_idle; - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) idle(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c47f96e453c0..c11e5ca2a434 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -56,7 +56,7 @@ void __noreturn cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched() && cpu_online(cpu)) { #ifdef CONFIG_MIPS_MT_SMTC extern void smtc_idle_loop_hook(void); @@ -77,7 +77,7 @@ void __noreturn cpu_idle(void) system_state == SYSTEM_BOOTING)) play_dead(); #endif - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c index d5bc5f813e89..fb6a9bf40006 100644 --- a/arch/openrisc/kernel/idle.c +++ b/arch/openrisc/kernel/idle.c @@ -51,7 +51,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -69,7 +69,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); } - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 39a2baa6ad58..878572f70ac5 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -56,7 +56,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +93,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index ea0acbd8966d..e83dfaf89f69 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -563,7 +563,7 @@ static void yield_shared_processor(void) static void iseries_shared_idle(void) { while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched() && !hvlpevent_is_pending()) { local_irq_disable(); ppc64_runlatch_off(); @@ -577,7 +577,7 @@ static void iseries_shared_idle(void) } ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); if (hvlpevent_is_pending()) process_iSeries_events(); @@ -593,7 +593,7 @@ static void iseries_dedicated_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); if (!need_resched()) { while (!need_resched()) { ppc64_runlatch_off(); @@ -610,7 +610,7 @@ static void iseries_dedicated_idle(void) } ppc64_runlatch_on(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 9451b210a1b4..6224f9dbbc1f 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,10 +91,10 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) default_idle(); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index db4ecd731a00..6015743020a0 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -89,7 +89,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -111,7 +111,7 @@ void cpu_idle(void) start_critical_timings(); } - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 3739a06a76cb..9c2795ba2cfe 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -95,12 +95,12 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while(1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched() && !cpu_is_offline(cpu)) sparc64_yield(cpu); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 9c45d8bbdf57..920e674aedb9 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -85,7 +85,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) { if (cpu_is_offline(cpu)) BUG(); /* no HOTPLUG_CPU */ @@ -105,7 +105,7 @@ void cpu_idle(void) local_irq_enable(); current_thread_info()->status |= TS_POLLING; } - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index c5338351aecd..cfb657e92849 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -246,10 +246,10 @@ void default_idle(void) if (need_resched()) schedule(); - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); nsecs = disable_timer(); idle_sleep(nsecs); - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); } } diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index ba401df971ed..9999b9a84d46 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -55,7 +55,7 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) { local_irq_disable(); stop_critical_timings(); @@ -63,7 +63,7 @@ void cpu_idle(void) local_irq_enable(); start_critical_timings(); } - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 795b79f984c2..6d9d4d52cac5 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -99,7 +99,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -116,7 +116,7 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); } - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3bd7e6eebf31..b069e9d7875f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -122,7 +122,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_stop_sched_tick(1); + tick_nohz_idle_enter(); while (!need_resched()) { rmb(); @@ -149,7 +149,7 @@ void cpu_idle(void) __exit_idle(); } - tick_nohz_restart_sched_tick(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/include/linux/tick.h b/include/linux/tick.h index ca40838fdfb7..0df1d50a408a 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -121,21 +121,22 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ -extern void tick_nohz_stop_sched_tick(int inidle); -extern void tick_nohz_restart_sched_tick(void); +extern void tick_nohz_idle_enter(void); +extern void tick_nohz_idle_exit(void); +extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else -static inline void tick_nohz_stop_sched_tick(int inidle) +static inline void tick_nohz_idle_enter(void) { - if (inidle) - rcu_idle_enter(); + rcu_idle_enter(); } -static inline void tick_nohz_restart_sched_tick(void) +static inline void tick_nohz_idle_exit(void) { rcu_idle_exit(); } + static inline ktime_t tick_nohz_get_sleep_length(void) { ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; diff --git a/kernel/softirq.c b/kernel/softirq.c index 2c71d91efff0..f9f2aa81ce53 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -351,7 +351,7 @@ void irq_exit(void) #ifdef CONFIG_NO_HZ /* Make sure that timer wheel updates are propagated */ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) - tick_nohz_stop_sched_tick(0); + tick_nohz_irq_exit(); #endif preempt_enable_no_resched(); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 5d9d23665f12..266c242dc354 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -275,42 +275,17 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -/** - * tick_nohz_stop_sched_tick - stop the idle tick from the idle task - * - * When the next event is more than a tick into the future, stop the idle tick - * Called either from the idle loop or from irq_exit() when an idle period was - * just interrupted by an interrupt which did not cause a reschedule. - */ -void tick_nohz_stop_sched_tick(int inidle) +static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { - unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; - struct tick_sched *ts; + unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; - local_irq_save(flags); - cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); - /* - * Call to tick_nohz_start_idle stops the last_update_time from being - * updated. Thus, it must not be called in the event we are called from - * irq_exit() with the prior state different than idle. - */ - if (!inidle && !ts->inidle) - goto end; - - /* - * Set ts->inidle unconditionally. Even if the system did not - * switch to NOHZ mode the cpu frequency governers rely on the - * update of the idle time accounting in tick_nohz_start_idle(). - */ - ts->inidle = 1; - now = tick_nohz_start_idle(cpu, ts); /* @@ -326,10 +301,10 @@ void tick_nohz_stop_sched_tick(int inidle) } if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) - goto end; + return; if (need_resched()) - goto end; + return; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; @@ -339,7 +314,7 @@ void tick_nohz_stop_sched_tick(int inidle) (unsigned int) local_softirq_pending()); ratelimit++; } - goto end; + return; } ts->idle_calls++; @@ -471,10 +446,54 @@ out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); -end: - if (inidle) - rcu_idle_enter(); - local_irq_restore(flags); +} + +/** + * tick_nohz_idle_enter - stop the idle tick from the idle task + * + * When the next event is more than a tick into the future, stop the idle tick + * Called when we start the idle loop. + * This also enters into RCU extended quiescent state so that this CPU doesn't + * need anymore to be part of any global grace period completion. This way + * the tick can be stopped safely as we don't need to report quiescent states. + */ +void tick_nohz_idle_enter(void) +{ + struct tick_sched *ts; + + WARN_ON_ONCE(irqs_disabled()); + + local_irq_disable(); + + ts = &__get_cpu_var(tick_cpu_sched); + /* + * set ts->inidle unconditionally. even if the system did not + * switch to nohz mode the cpu frequency governers rely on the + * update of the idle time accounting in tick_nohz_start_idle(). + */ + ts->inidle = 1; + tick_nohz_stop_sched_tick(ts); + rcu_idle_enter(); + + local_irq_enable(); +} + +/** + * tick_nohz_irq_exit - update next tick event from interrupt exit + * + * When an interrupt fires while we are idle and it doesn't cause + * a reschedule, it may still add, modify or delete a timer, enqueue + * an RCU callback, etc... + * So we need to re-calculate and reprogram the next tick event. + */ +void tick_nohz_irq_exit(void) +{ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + if (!ts->inidle) + return; + + tick_nohz_stop_sched_tick(ts); } /** @@ -516,11 +535,13 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) } /** - * tick_nohz_restart_sched_tick - restart the idle tick from the idle task + * tick_nohz_idle_exit - restart the idle tick from the idle task * * Restart the idle tick when the CPU is woken up from idle + * This also exit the RCU extended quiescent state. The CPU + * can use RCU again after this function is called. */ -void tick_nohz_restart_sched_tick(void) +void tick_nohz_idle_exit(void) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); -- cgit v1.2.3 From 2bbb6817c0ac1b5f2a68d720f364f98eeb1ac4fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Oct 2011 16:01:00 +0200 Subject: nohz: Allow rcu extended quiescent state handling seperately from tick stop It is assumed that rcu won't be used once we switch to tickless mode and until we restart the tick. However this is not always true, as in x86-64 where we dereference the idle notifiers after the tick is stopped. To prepare for fixing this, add two new APIs: tick_nohz_idle_enter_norcu() and tick_nohz_idle_exit_norcu(). If no use of RCU is made in the idle loop between tick_nohz_enter_idle() and tick_nohz_exit_idle() calls, the arch must instead call the new *_norcu() version such that the arch doesn't need to call rcu_idle_enter() and rcu_idle_exit(). Otherwise the arch must call tick_nohz_enter_idle() and tick_nohz_exit_idle() and also call explicitly: - rcu_idle_enter() after its last use of RCU before the CPU is put to sleep. - rcu_idle_exit() before the first use of RCU after the CPU is woken up. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney --- arch/arm/kernel/process.c | 4 +-- arch/avr32/kernel/process.c | 4 +-- arch/blackfin/kernel/process.c | 4 +-- arch/microblaze/kernel/process.c | 4 +-- arch/mips/kernel/process.c | 4 +-- arch/openrisc/kernel/idle.c | 4 +-- arch/powerpc/kernel/idle.c | 4 +-- arch/powerpc/platforms/iseries/setup.c | 8 +++--- arch/s390/kernel/process.c | 4 +-- arch/sh/kernel/idle.c | 4 +-- arch/sparc/kernel/process_64.c | 4 +-- arch/tile/kernel/process.c | 4 +-- arch/um/kernel/process.c | 4 +-- arch/unicore32/kernel/process.c | 4 +-- arch/x86/kernel/process_32.c | 4 +-- arch/x86/kernel/process_64.c | 4 +-- include/linux/tick.h | 46 +++++++++++++++++++++++++++++++--- kernel/time/tick-sched.c | 25 +++++++++--------- 18 files changed, 90 insertions(+), 49 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3f1f8daf703c..47e34c091276 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -183,7 +183,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); leds_event(led_idle_start); while (!need_resched()) { #ifdef CONFIG_HOTPLUG_CPU @@ -213,7 +213,7 @@ void cpu_idle(void) } } leds_event(led_idle_end); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 6ee7952248db..34c8c703bb16 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -34,10 +34,10 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) cpu_idle_sleep(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 7b141b5c9e8d..57e07498a0e7 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -88,10 +88,10 @@ void cpu_idle(void) #endif if (!idle) idle = default_idle; - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) idle(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 5407f09b4be4..13d59f34b94e 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -103,10 +103,10 @@ void cpu_idle(void) if (!idle) idle = default_idle; - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) idle(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index c11e5ca2a434..17fb3a270160 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -56,7 +56,7 @@ void __noreturn cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched() && cpu_online(cpu)) { #ifdef CONFIG_MIPS_MT_SMTC extern void smtc_idle_loop_hook(void); @@ -77,7 +77,7 @@ void __noreturn cpu_idle(void) system_state == SYSTEM_BOOTING)) play_dead(); #endif - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c index fb6a9bf40006..2e82cd0fa5e1 100644 --- a/arch/openrisc/kernel/idle.c +++ b/arch/openrisc/kernel/idle.c @@ -51,7 +51,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) { check_pgt_cache(); @@ -69,7 +69,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); } - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 878572f70ac5..2e782a36d8f2 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -56,7 +56,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +93,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index e83dfaf89f69..d69d3d185e89 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -563,7 +563,7 @@ static void yield_shared_processor(void) static void iseries_shared_idle(void) { while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched() && !hvlpevent_is_pending()) { local_irq_disable(); ppc64_runlatch_off(); @@ -577,7 +577,7 @@ static void iseries_shared_idle(void) } ppc64_runlatch_on(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); if (hvlpevent_is_pending()) process_iSeries_events(); @@ -593,7 +593,7 @@ static void iseries_dedicated_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); if (!need_resched()) { while (!need_resched()) { ppc64_runlatch_off(); @@ -610,7 +610,7 @@ static void iseries_dedicated_idle(void) } ppc64_runlatch_on(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 6224f9dbbc1f..6fa987367ae6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,10 +91,10 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) default_idle(); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 6015743020a0..ad58e7535a7c 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -89,7 +89,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) { check_pgt_cache(); @@ -111,7 +111,7 @@ void cpu_idle(void) start_critical_timings(); } - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 9c2795ba2cfe..4a0e7d79cb92 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -95,12 +95,12 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while(1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched() && !cpu_is_offline(cpu)) sparc64_yield(cpu); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 920e674aedb9..53ac89595ab1 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -85,7 +85,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) { if (cpu_is_offline(cpu)) BUG(); /* no HOTPLUG_CPU */ @@ -105,7 +105,7 @@ void cpu_idle(void) local_irq_enable(); current_thread_info()->status |= TS_POLLING; } - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index cfb657e92849..55d2cf455f63 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -246,10 +246,10 @@ void default_idle(void) if (need_resched()) schedule(); - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); nsecs = disable_timer(); idle_sleep(nsecs); - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); } } diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index 9999b9a84d46..095ff5a57928 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -55,7 +55,7 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) { local_irq_disable(); stop_critical_timings(); @@ -63,7 +63,7 @@ void cpu_idle(void) local_irq_enable(); start_critical_timings(); } - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6d9d4d52cac5..f94da3920c36 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -99,7 +99,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) { check_pgt_cache(); @@ -116,7 +116,7 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); } - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b069e9d7875f..18e8cf3581f6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -122,7 +122,7 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter(); + tick_nohz_idle_enter_norcu(); while (!need_resched()) { rmb(); @@ -149,7 +149,7 @@ void cpu_idle(void) __exit_idle(); } - tick_nohz_idle_exit(); + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/include/linux/tick.h b/include/linux/tick.h index 0df1d50a408a..327434a05757 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -7,6 +7,7 @@ #define _LINUX_TICK_H #include +#include #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -121,18 +122,57 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ -extern void tick_nohz_idle_enter(void); +extern void __tick_nohz_idle_enter(void); +static inline void tick_nohz_idle_enter(void) +{ + local_irq_disable(); + __tick_nohz_idle_enter(); + local_irq_enable(); +} extern void tick_nohz_idle_exit(void); + +/* + * Call this pair of function if the arch doesn't make any use + * of RCU in-between. You won't need to call rcu_idle_enter() and + * rcu_idle_exit(). + * Otherwise you need to call tick_nohz_idle_enter() and tick_nohz_idle_exit() + * and explicitly tell RCU about the window around the place the CPU enters low + * power mode where no RCU use is made. This is done by calling rcu_idle_enter() + * after the last use of RCU before the CPU is put to sleep and by calling + * rcu_idle_exit() before the first use of RCU after the CPU woke up. + */ +static inline void tick_nohz_idle_enter_norcu(void) +{ + /* + * Also call rcu_idle_enter() in the irq disabled section even + * if it disables irq itself. + * Just an optimization that prevents from an interrupt happening + * between it and __tick_nohz_idle_enter() to lose time to help + * completing a grace period while we could be in extended grace + * period already. + */ + local_irq_disable(); + __tick_nohz_idle_enter(); + rcu_idle_enter(); + local_irq_enable(); +} +static inline void tick_nohz_idle_exit_norcu(void) +{ + rcu_idle_exit(); + tick_nohz_idle_exit(); +} extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else -static inline void tick_nohz_idle_enter(void) +static inline void tick_nohz_idle_enter(void) { } +static inline void tick_nohz_idle_exit(void) { } +static inline void tick_nohz_idle_enter_norcu(void) { rcu_idle_enter(); } -static inline void tick_nohz_idle_exit(void) +static inline void tick_nohz_idle_exit_norcu(void) { rcu_idle_exit(); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 266c242dc354..c76aefe764b0 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -453,18 +453,22 @@ out: * * When the next event is more than a tick into the future, stop the idle tick * Called when we start the idle loop. - * This also enters into RCU extended quiescent state so that this CPU doesn't - * need anymore to be part of any global grace period completion. This way - * the tick can be stopped safely as we don't need to report quiescent states. + * + * If no use of RCU is made in the idle loop between + * tick_nohz_idle_enter() and tick_nohz_idle_exit() calls, then + * tick_nohz_idle_enter_norcu() should be called instead and the arch + * doesn't need to call rcu_idle_enter() and rcu_idle_exit() explicitly. + * + * Otherwise the arch is responsible of calling: + * + * - rcu_idle_enter() after its last use of RCU before the CPU is put + * to sleep. + * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. */ -void tick_nohz_idle_enter(void) +void __tick_nohz_idle_enter(void) { struct tick_sched *ts; - WARN_ON_ONCE(irqs_disabled()); - - local_irq_disable(); - ts = &__get_cpu_var(tick_cpu_sched); /* * set ts->inidle unconditionally. even if the system did not @@ -473,9 +477,6 @@ void tick_nohz_idle_enter(void) */ ts->inidle = 1; tick_nohz_stop_sched_tick(ts); - rcu_idle_enter(); - - local_irq_enable(); } /** @@ -551,7 +552,7 @@ void tick_nohz_idle_exit(void) ktime_t now; local_irq_disable(); - rcu_idle_exit(); + if (ts->idle_active || (ts->inidle && ts->tick_stopped)) now = ktime_get(); -- cgit v1.2.3 From a7b152d5342c06e81ab0cf7d18345f69fa7e56b5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 13 Oct 2011 19:05:12 -0700 Subject: powerpc: Tell RCU about idle after hcall tracing The PowerPC pSeries platform (CONFIG_PPC_PSERIES=y) enables hypervisor-call tracing for CONFIG_TRACEPOINTS=y kernels. One of the hypervisor calls that is traced is the H_CEDE call in the idle loop that tells the hypervisor that this OS instance no longer needs the current CPU. However, tracing uses RCU, so this combination of kernel configuration variables needs to avoid telling RCU about the current CPU's idleness until after the H_CEDE-entry tracing completes on the one hand, and must tell RCU that the the current CPU is no longer idle before the H_CEDE-exit tracing starts. In all other cases, it suffices to inform RCU of CPU idleness upon idle-loop entry and exit. This commit makes the required adjustments. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- arch/powerpc/kernel/idle.c | 16 ++++++++++++++-- arch/powerpc/platforms/pseries/lpar.c | 4 ++++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 2e782a36d8f2..3cd73d1fc427 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -46,6 +46,12 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); +#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) +static const bool idle_uses_rcu = 1; +#else +static const bool idle_uses_rcu; +#endif + /* * The body of the idle task. */ @@ -56,7 +62,10 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter_norcu(); + if (idle_uses_rcu) + tick_nohz_idle_enter(); + else + tick_nohz_idle_enter_norcu(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -93,7 +102,10 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - tick_nohz_idle_exit_norcu(); + if (idle_uses_rcu) + tick_nohz_idle_exit(); + else + tick_nohz_idle_exit_norcu(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 27a49508b410..52d429be6c76 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -555,6 +555,8 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) (*depth)++; trace_hcall_entry(opcode, args); + if (opcode == H_CEDE) + rcu_idle_enter(); (*depth)--; out: @@ -575,6 +577,8 @@ void __trace_hcall_exit(long opcode, unsigned long retval, goto out; (*depth)++; + if (opcode == H_CEDE) + rcu_idle_exit(); trace_hcall_exit(opcode, retval, retbuf); (*depth)--; -- cgit v1.2.3 From 1268fbc746ea1cd279886a740dcbad4ba5232225 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 17 Nov 2011 18:48:14 +0100 Subject: nohz: Remove tick_nohz_idle_enter_norcu() / tick_nohz_idle_exit_norcu() Those two APIs were provided to optimize the calls of tick_nohz_idle_enter() and rcu_idle_enter() into a single irq disabled section. This way no interrupt happening in-between would needlessly process any RCU job. Now we are talking about an optimization for which benefits have yet to be measured. Let's start simple and completely decouple idle rcu and dyntick idle logics to simplify. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Reviewed-by: Josh Triplett Signed-off-by: Paul E. McKenney --- arch/arm/kernel/process.c | 6 +++-- arch/avr32/kernel/process.c | 6 +++-- arch/blackfin/kernel/process.c | 6 +++-- arch/microblaze/kernel/process.c | 6 +++-- arch/mips/kernel/process.c | 6 +++-- arch/openrisc/kernel/idle.c | 6 +++-- arch/powerpc/kernel/idle.c | 15 +++++------ arch/powerpc/platforms/iseries/setup.c | 12 ++++++--- arch/s390/kernel/process.c | 6 +++-- arch/sh/kernel/idle.c | 6 +++-- arch/sparc/kernel/process_64.c | 6 +++-- arch/tile/kernel/process.c | 6 +++-- arch/um/kernel/process.c | 6 +++-- arch/unicore32/kernel/process.c | 6 +++-- arch/x86/kernel/process_32.c | 6 +++-- include/linux/tick.h | 47 +--------------------------------- kernel/time/tick-sched.c | 15 ++++++----- 17 files changed, 76 insertions(+), 91 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 47e34c091276..e8e8fe505df1 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -183,7 +183,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); leds_event(led_idle_start); while (!need_resched()) { #ifdef CONFIG_HOTPLUG_CPU @@ -213,7 +214,8 @@ void cpu_idle(void) } } leds_event(led_idle_end); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 34c8c703bb16..ea3395750324 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -34,10 +34,12 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) cpu_idle_sleep(); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 57e07498a0e7..8dd0416673cb 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -88,10 +88,12 @@ void cpu_idle(void) #endif if (!idle) idle = default_idle; - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) idle(); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 13d59f34b94e..7dcb5bfffb75 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -103,10 +103,12 @@ void cpu_idle(void) if (!idle) idle = default_idle; - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) idle(); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 17fb3a270160..7955409051c4 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -56,7 +56,8 @@ void __noreturn cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched() && cpu_online(cpu)) { #ifdef CONFIG_MIPS_MT_SMTC extern void smtc_idle_loop_hook(void); @@ -77,7 +78,8 @@ void __noreturn cpu_idle(void) system_state == SYSTEM_BOOTING)) play_dead(); #endif - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c index 2e82cd0fa5e1..e5fc78877830 100644 --- a/arch/openrisc/kernel/idle.c +++ b/arch/openrisc/kernel/idle.c @@ -51,7 +51,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -69,7 +70,8 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); } - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 3cd73d1fc427..9c3cd490b1bd 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -62,10 +62,10 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - if (idle_uses_rcu) - tick_nohz_idle_enter(); - else - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + if (!idle_uses_rcu) + rcu_idle_enter(); + while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -102,10 +102,9 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - if (idle_uses_rcu) - tick_nohz_idle_exit(); - else - tick_nohz_idle_exit_norcu(); + if (!idle_uses_rcu) + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index d69d3d185e89..8fc62586a973 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -563,7 +563,8 @@ static void yield_shared_processor(void) static void iseries_shared_idle(void) { while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched() && !hvlpevent_is_pending()) { local_irq_disable(); ppc64_runlatch_off(); @@ -577,7 +578,8 @@ static void iseries_shared_idle(void) } ppc64_runlatch_on(); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); if (hvlpevent_is_pending()) process_iSeries_events(); @@ -593,7 +595,8 @@ static void iseries_dedicated_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); if (!need_resched()) { while (!need_resched()) { ppc64_runlatch_off(); @@ -610,7 +613,8 @@ static void iseries_dedicated_idle(void) } ppc64_runlatch_on(); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 6fa987367ae6..3201ae447990 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,10 +91,12 @@ static void default_idle(void) void cpu_idle(void) { for (;;) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) default_idle(); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index ad58e7535a7c..406508d4ce74 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -89,7 +89,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -111,7 +112,8 @@ void cpu_idle(void) start_critical_timings(); } - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 4a0e7d79cb92..39d8b05201a2 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -95,12 +95,14 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while(1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched() && !cpu_is_offline(cpu)) sparc64_yield(cpu); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 53ac89595ab1..4c1ac6e5347a 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -85,7 +85,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { if (cpu_is_offline(cpu)) BUG(); /* no HOTPLUG_CPU */ @@ -105,7 +106,8 @@ void cpu_idle(void) local_irq_enable(); current_thread_info()->status |= TS_POLLING; } - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 55d2cf455f63..69f24905abdc 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -246,10 +246,12 @@ void default_idle(void) if (need_resched()) schedule(); - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); nsecs = disable_timer(); idle_sleep(nsecs); - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); } } diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index 095ff5a57928..52edc2b62873 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -55,7 +55,8 @@ void cpu_idle(void) { /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { local_irq_disable(); stop_critical_timings(); @@ -63,7 +64,8 @@ void cpu_idle(void) local_irq_enable(); start_critical_timings(); } - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f94da3920c36..485204f58cda 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -99,7 +99,8 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - tick_nohz_idle_enter_norcu(); + tick_nohz_idle_enter(); + rcu_idle_enter(); while (!need_resched()) { check_pgt_cache(); @@ -116,7 +117,8 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); } - tick_nohz_idle_exit_norcu(); + rcu_idle_exit(); + tick_nohz_idle_exit(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/include/linux/tick.h b/include/linux/tick.h index 327434a05757..ab8be90b5cc9 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,45 +122,8 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ -extern void __tick_nohz_idle_enter(void); -static inline void tick_nohz_idle_enter(void) -{ - local_irq_disable(); - __tick_nohz_idle_enter(); - local_irq_enable(); -} +extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); - -/* - * Call this pair of function if the arch doesn't make any use - * of RCU in-between. You won't need to call rcu_idle_enter() and - * rcu_idle_exit(). - * Otherwise you need to call tick_nohz_idle_enter() and tick_nohz_idle_exit() - * and explicitly tell RCU about the window around the place the CPU enters low - * power mode where no RCU use is made. This is done by calling rcu_idle_enter() - * after the last use of RCU before the CPU is put to sleep and by calling - * rcu_idle_exit() before the first use of RCU after the CPU woke up. - */ -static inline void tick_nohz_idle_enter_norcu(void) -{ - /* - * Also call rcu_idle_enter() in the irq disabled section even - * if it disables irq itself. - * Just an optimization that prevents from an interrupt happening - * between it and __tick_nohz_idle_enter() to lose time to help - * completing a grace period while we could be in extended grace - * period already. - */ - local_irq_disable(); - __tick_nohz_idle_enter(); - rcu_idle_enter(); - local_irq_enable(); -} -static inline void tick_nohz_idle_exit_norcu(void) -{ - rcu_idle_exit(); - tick_nohz_idle_exit(); -} extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); @@ -168,14 +131,6 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); # else static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } -static inline void tick_nohz_idle_enter_norcu(void) -{ - rcu_idle_enter(); -} -static inline void tick_nohz_idle_exit_norcu(void) -{ - rcu_idle_exit(); -} static inline ktime_t tick_nohz_get_sleep_length(void) { diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c76aefe764b0..0ec8b832ab6b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -454,21 +454,20 @@ out: * When the next event is more than a tick into the future, stop the idle tick * Called when we start the idle loop. * - * If no use of RCU is made in the idle loop between - * tick_nohz_idle_enter() and tick_nohz_idle_exit() calls, then - * tick_nohz_idle_enter_norcu() should be called instead and the arch - * doesn't need to call rcu_idle_enter() and rcu_idle_exit() explicitly. - * - * Otherwise the arch is responsible of calling: + * The arch is responsible of calling: * * - rcu_idle_enter() after its last use of RCU before the CPU is put * to sleep. * - rcu_idle_exit() before the first use of RCU after the CPU is woken up. */ -void __tick_nohz_idle_enter(void) +void tick_nohz_idle_enter(void) { struct tick_sched *ts; + WARN_ON_ONCE(irqs_disabled()); + + local_irq_disable(); + ts = &__get_cpu_var(tick_cpu_sched); /* * set ts->inidle unconditionally. even if the system did not @@ -477,6 +476,8 @@ void __tick_nohz_idle_enter(void) */ ts->inidle = 1; tick_nohz_stop_sched_tick(ts); + + local_irq_enable(); } /** -- cgit v1.2.3 From a5ccfee05a439b803640e94584056204501db31c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 9 Jan 2012 14:29:15 +0000 Subject: powerpc: Fix RCU idle and hcall tracing Tracepoints should not be called inside an rcu_idle_enter/rcu_idle_exit region. Since pSeries calls H_CEDE in the idle loop, we were violating this rule. commit a7b152d5342c (powerpc: Tell RCU about idle after hcall tracing) tried to work around it by delaying the rcu_idle_enter until after we called the hcall tracepoint, but there are a number of issues with it. The hcall tracepoint trampoline code is called conditionally when the tracepoint is enabled. If the tracepoint is not enabled we never call rcu_idle_enter. The idle_uses_rcu check was also done at compile time which breaks multiplatform builds. The simple fix is to avoid tracing H_CEDE and rely on other tracepoints and the hypervisor dispatch trace log to work out if we called H_CEDE. This fixes a hang during boot on pSeries. Signed-off-by: Anton Blanchard Acked-by: Paul E. McKenney Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/idle.c | 12 ++---------- arch/powerpc/platforms/pseries/lpar.c | 14 ++++++++++---- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel/idle.c') diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 7c66ce13da89..0a48bf5db6c8 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -50,12 +50,6 @@ static int __init powersave_off(char *arg) } __setup("powersave=off", powersave_off); -#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_TRACEPOINTS) -static const bool idle_uses_rcu = 1; -#else -static const bool idle_uses_rcu; -#endif - /* * The body of the idle task. */ @@ -67,8 +61,7 @@ void cpu_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { tick_nohz_idle_enter(); - if (!idle_uses_rcu) - rcu_idle_enter(); + rcu_idle_enter(); while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); @@ -106,8 +99,7 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); - if (!idle_uses_rcu) - rcu_idle_exit(); + rcu_idle_exit(); tick_nohz_idle_exit(); preempt_enable_no_resched(); if (cpu_should_die()) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 948e0e3b3547..7bc73af6c7b9 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -546,6 +546,13 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) unsigned long flags; unsigned int *depth; + /* + * We cannot call tracepoints inside RCU idle regions which + * means we must not trace H_CEDE. + */ + if (opcode == H_CEDE) + return; + local_irq_save(flags); depth = &__get_cpu_var(hcall_trace_depth); @@ -556,8 +563,6 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) (*depth)++; preempt_disable(); trace_hcall_entry(opcode, args); - if (opcode == H_CEDE) - rcu_idle_enter(); (*depth)--; out: @@ -570,6 +575,9 @@ void __trace_hcall_exit(long opcode, unsigned long retval, unsigned long flags; unsigned int *depth; + if (opcode == H_CEDE) + return; + local_irq_save(flags); depth = &__get_cpu_var(hcall_trace_depth); @@ -578,8 +586,6 @@ void __trace_hcall_exit(long opcode, unsigned long retval, goto out; (*depth)++; - if (opcode == H_CEDE) - rcu_idle_exit(); trace_hcall_exit(opcode, retval, retbuf); preempt_enable(); (*depth)--; -- cgit v1.2.3