From 280f06774afedf849f0b34248ed6aff57d0f6908 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 18:22:06 +0200 Subject: nohz: Separate out irq exit and idle loop dyntick logic The tick_nohz_stop_sched_tick() function, which tries to delay the next timer tick as long as possible, can be called from two places: - From the idle loop to start the dytick idle mode - From interrupt exit if we have interrupted the dyntick idle mode, so that we reprogram the next tick event in case the irq changed some internal state that requires this action. There are only few minor differences between both that are handled by that function, driven by the ts->inidle cpu variable and the inidle parameter. The whole guarantees that we only update the dyntick mode on irq exit if we actually interrupted the dyntick idle mode, and that we enter in RCU extended quiescent state from idle loop entry only. Split this function into: - tick_nohz_idle_enter(), which sets ts->inidle to 1, enters dynticks idle mode unconditionally if it can, and enters into RCU extended quiescent state. - tick_nohz_irq_exit() which only updates the dynticks idle mode when ts->inidle is set (ie: if tick_nohz_idle_enter() has been called). To maintain symmetry, tick_nohz_restart_sched_tick() has been renamed into tick_nohz_idle_exit(). This simplifies the code and micro-optimize the irq exit path (no need for local_irq_save there). This also prepares for the split between dynticks and rcu extended quiescent state logics. We'll need this split to further fix illegal uses of RCU in extended quiescent states in the idle loop. Signed-off-by: Frederic Weisbecker Cc: Mike Frysinger Cc: Guan Xuetao Cc: David Miller Cc: Chris Metcalf Cc: Hans-Christian Egtvedt Cc: Ralf Baechle Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Russell King Cc: Paul Mackerras Cc: Heiko Carstens Cc: Paul Mundt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/softirq.c') diff --git a/kernel/softirq.c b/kernel/softirq.c index 2c71d91efff0..f9f2aa81ce53 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -351,7 +351,7 @@ void irq_exit(void) #ifdef CONFIG_NO_HZ /* Make sure that timer wheel updates are propagated */ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) - tick_nohz_stop_sched_tick(0); + tick_nohz_irq_exit(); #endif preempt_enable_no_resched(); } -- cgit v1.2.3 From 416eb33cd60ef405e2860a186364e57bcb2d89f6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Oct 2011 16:31:02 -0700 Subject: rcu: Fix early call to rcu_idle_enter() On the irq exit path, tick_nohz_irq_exit() may raise a softirq, which action leads to the wake up path and select_task_rq_fair() that makes use of rcu to iterate the domains. This is an illegal use of RCU because we may be in RCU extended quiescent state if we interrupted an RCU-idle window in the idle loop: [ 132.978883] =============================== [ 132.978883] [ INFO: suspicious RCU usage. ] [ 132.978883] ------------------------------- [ 132.978883] kernel/sched_fair.c:1707 suspicious rcu_dereference_check() usage! [ 132.978883] [ 132.978883] other info that might help us debug this: [ 132.978883] [ 132.978883] [ 132.978883] rcu_scheduler_active = 1, debug_locks = 0 [ 132.978883] RCU used illegally from extended quiescent state! [ 132.978883] 2 locks held by swapper/0: [ 132.978883] #0: (&p->pi_lock){-.-.-.}, at: [] try_to_wake_up+0x39/0x2f0 [ 132.978883] #1: (rcu_read_lock){.+.+..}, at: [] select_task_rq_fair+0x6a/0xec0 [ 132.978883] [ 132.978883] stack backtrace: [ 132.978883] Pid: 0, comm: swapper Tainted: G W 3.0.0+ #178 [ 132.978883] Call Trace: [ 132.978883] [] lockdep_rcu_suspicious+0xe6/0x100 [ 132.978883] [] select_task_rq_fair+0x749/0xec0 [ 132.978883] [] ? select_task_rq_fair+0x6a/0xec0 [ 132.978883] [] ? do_raw_spin_lock+0x54/0x150 [ 132.978883] [] ? trace_hardirqs_on+0xd/0x10 [ 132.978883] [] try_to_wake_up+0xd3/0x2f0 [ 132.978883] [] ? ktime_get+0x68/0xf0 [ 132.978883] [] wake_up_process+0x15/0x20 [ 132.978883] [] raise_softirq_irqoff+0x65/0x110 [ 132.978883] [] __hrtimer_start_range_ns+0x415/0x5a0 [ 132.978883] [] ? do_raw_spin_unlock+0x5e/0xb0 [ 132.978883] [] hrtimer_start+0x18/0x20 [ 132.978883] [] tick_nohz_stop_sched_tick+0x393/0x450 [ 132.978883] [] irq_exit+0xd2/0x100 [ 132.978883] [] do_IRQ+0x66/0xe0 [ 132.978883] [] common_interrupt+0x13/0x13 [ 132.978883] [] ? native_safe_halt+0xb/0x10 [ 132.978883] [] ? trace_hardirqs_on+0xd/0x10 [ 132.978883] [] default_idle+0xba/0x370 [ 132.978883] [] amd_e400_idle+0x5e/0x130 [ 132.978883] [] cpu_idle+0xb6/0x120 [ 132.978883] [] rest_init+0xef/0x150 [ 132.978883] [] ? rest_init+0x52/0x150 [ 132.978883] [] start_kernel+0x3da/0x3e5 [ 132.978883] [] x86_64_start_reservations+0x131/0x135 [ 132.978883] [] x86_64_start_kernel+0x103/0x112 Fix this by calling rcu_idle_enter() after tick_nohz_irq_exit(). Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/softirq.c') diff --git a/kernel/softirq.c b/kernel/softirq.c index f9f2aa81ce53..4eb3a0fa351e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -347,12 +347,12 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); - rcu_irq_exit(); #ifdef CONFIG_NO_HZ /* Make sure that timer wheel updates are propagated */ if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_irq_exit(); #endif + rcu_irq_exit(); preempt_enable_no_resched(); } -- cgit v1.2.3