From 528a25b00e1f84eaba6c98e63f58ee0a8e472102 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 28 Jan 2015 14:09:43 -0800 Subject: cpu: Make CPU-offline idle-loop transition point more precise This commit uses a per-CPU variable to make the CPU-offline code path through the idle loop more precise, so that the outgoing CPU is guaranteed to make it into the idle loop before it is powered off. This commit is in preparation for putting the RCU offline-handling code on this code path, which will eliminate the magic one-jiffy wait that RCU uses as the maximum time for an outgoing CPU to get all the way through the scheduler. The magic one-jiffy wait for incoming CPUs remains a separate issue. Signed-off-by: Paul E. McKenney --- kernel/cpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/cpu.c') diff --git a/kernel/cpu.c b/kernel/cpu.c index 1972b161c61e..d46b4dae0ca0 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -408,8 +408,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) * * Wait for the stop thread to go away. */ - while (!idle_cpu(cpu)) + while (!per_cpu(cpu_dead_idle, cpu)) cpu_relax(); + smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ + per_cpu(cpu_dead_idle, cpu) = false; /* This actually kills the CPU. */ __cpu_die(cpu); -- cgit v1.2.3 From 345527b1edce8df719e0884500c76832a18211c3 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Mon, 30 Mar 2015 14:59:19 +0530 Subject: clockevents: Fix cpu_down() race for hrtimer based broadcasting It was found when doing a hotplug stress test on POWER, that the machine either hit softlockups or rcu_sched stall warnings. The issue was traced to commit: 7cba160ad789 ("powernv/cpuidle: Redesign idle states management") which exposed the cpu_down() race with hrtimer based broadcast mode: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast") The race is the following: Assume CPU1 is the CPU which holds the hrtimer broadcasting duty before it is taken down. CPU0 CPU1 cpu_down() take_cpu_down() disable_interrupts() cpu_die() while (CPU1 != CPU_DEAD) { msleep(100); switch_to_idle(); stop_cpu_timer(); schedule_broadcast(); } tick_cleanup_cpu_dead() take_over_broadcast() So after CPU1 disabled interrupts it cannot handle the broadcast hrtimer anymore, so CPU0 will be stuck forever. Fix this by explicitly taking over broadcast duty before cpu_die(). This is a temporary workaround. What we really want is a callback in the clockevent device which allows us to do that from the dying CPU by pushing the hrtimer onto a different cpu. That might involve an IPI and is definitely more complex than this immediate fix. Changelog was picked up from: https://lkml.org/lkml/2015/2/16/213 Suggested-by: Thomas Gleixner Tested-by: Nicolas Pitre Signed-off-by: Preeti U. Murthy Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: nicolas.pitre@linaro.org Cc: peterz@infradead.org Cc: rjw@rjwysocki.net Fixes: http://linuxppc.10917.n7.nabble.com/offlining-cpus-breakage-td88619.html Link: http://lkml.kernel.org/r/20150330092410.24979.59887.stgit@preeti.in.ibm.com [ Merged it to the latest timer tree, renamed the callback, tidied up the changelog. ] Signed-off-by: Ingo Molnar --- include/linux/tick.h | 6 ++++++ kernel/cpu.c | 2 ++ kernel/time/tick-broadcast.c | 19 +++++++++++-------- 3 files changed, 19 insertions(+), 8 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/tick.h b/include/linux/tick.h index 589868b09aff..f9ff225d53c0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -36,6 +36,12 @@ extern void tick_irq_enter(void); static inline void tick_irq_enter(void) { } #endif +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu); +#else +static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { } +#endif + #ifdef CONFIG_NO_HZ_COMMON extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); diff --git a/kernel/cpu.c b/kernel/cpu.c index 1972b161c61e..af5db20e5803 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "smpboot.h" @@ -411,6 +412,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) while (!idle_cpu(cpu)) cpu_relax(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 19cfb381faa9..f5e0fd5652dc 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -680,14 +680,19 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -static void broadcast_move_bc(int deadcpu) +void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct clock_event_device *bc = tick_broadcast_device.evtdev; + struct clock_event_device *bc; + unsigned long flags; - if (!bc || !broadcast_needs_cpu(bc, deadcpu)) - return; - /* This moves the broadcast assignment to this cpu */ - clockevents_program_event(bc, bc->next_event, 1); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + bc = tick_broadcast_device.evtdev; + + if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* This moves the broadcast assignment to this CPU: */ + clockevents_program_event(bc, bc->next_event, 1); + } + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* @@ -924,8 +929,6 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); cpumask_clear_cpu(cpu, tick_broadcast_force_mask); - broadcast_move_bc(cpu); - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v1.2.3 From 52c063d1adbc16c76e70fffa20727fcd4e9343b3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:37:24 +0200 Subject: clockevents: Make tick handover explicit clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the tick_handover call and invoke it explicitely from the hotplug code. Temporary solution will be cleaned up in later patches. Signed-off-by: Thomas Gleixner [ Rebase ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: John Stultz Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1658173.RkEEILFiQZ@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 1 - include/linux/tick.h | 2 ++ kernel/cpu.c | 2 ++ kernel/time/clockevents.c | 4 ---- kernel/time/hrtimer.c | 4 ---- kernel/time/tick-common.c | 9 ++++++--- kernel/time/tick-internal.h | 1 - 7 files changed, 10 insertions(+), 13 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index f97f498a5d10..f4bde22f8a05 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -11,7 +11,6 @@ /* Clock event notification values */ enum clock_event_nofitiers { CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_CPU_DYING, CLOCK_EVT_NOTIFY_CPU_DEAD, }; diff --git a/include/linux/tick.h b/include/linux/tick.h index 6119321fc3be..2c68fa3b9436 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -19,12 +19,14 @@ extern void tick_unfreeze(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); +extern void tick_handover_do_timer(void); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } static inline void tick_unfreeze(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } +static inline void tick_handover_do_timer(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT diff --git a/kernel/cpu.c b/kernel/cpu.c index af5db20e5803..eba7eaa1341d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -339,6 +339,8 @@ static int __ref take_cpu_down(void *_param) return err; cpu_notify(CPU_DYING | param->mod, param->hcpu); + /* Give up timekeeping duties */ + tick_handover_do_timer(); /* Park the stopper thread */ kthread_park(current); return 0; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index be9abf32c0b9..88fb3b96c7cc 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -655,10 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg) raw_spin_lock_irqsave(&clockevents_lock, flags); switch (reason) { - case CLOCK_EVT_NOTIFY_CPU_DYING: - tick_handover_do_timer(arg); - break; - case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(arg); tick_shutdown_broadcast(arg); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 721d29b99d10..6a7a64ec7d1b 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1707,10 +1707,6 @@ static int hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_DYING: - case CPU_DYING_FROZEN: - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); - break; case CPU_DEAD: case CPU_DEAD_FROZEN: { diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index e28ba5c044c5..055c868f3ec9 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -332,20 +332,23 @@ out_bc: tick_install_broadcast_device(newdev); } +#ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. * - * Called with interrupts disabled. + * Called with interrupts disabled. Not locking required. If + * tick_do_timer_cpu is owned by this cpu, nothing can change it. */ -void tick_handover_do_timer(int *cpup) +void tick_handover_do_timer(void) { - if (*cpup == tick_do_timer_cpu) { + if (tick_do_timer_cpu == smp_processor_id()) { int cpu = cpumask_first(cpu_online_mask); tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : TICK_DO_TIMER_NONE; } } +#endif /* * Shutdown an event device on a given cpu: diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 0266f9dbd114..aabcb5d00cf2 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -20,7 +20,6 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); -extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); extern void tick_resume(void); -- cgit v1.2.3 From a49b116dcb1265f238f3169507424257b0519069 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:38:05 +0200 Subject: clockevents: Cleanup dead cpu explicitely clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the cleanup function for a dead cpu and invoke it directly from the cpu down code. Make it conditional on CPU_HOTPLUG as well. Temporary change, will be refined in the future. Signed-off-by: Thomas Gleixner [ Rebased, added clockevents_notify() removal ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1735025.raBZdQHM3m@vostro.rjw.lan Signed-off-by: Ingo Molnar --- include/linux/clockchips.h | 6 ------ include/linux/tick.h | 2 ++ kernel/cpu.c | 1 + kernel/time/clockevents.c | 51 ++++++++++++++++++-------------------------- kernel/time/hrtimer.c | 3 --- kernel/time/tick-broadcast.c | 39 ++++++++++++++++----------------- kernel/time/tick-common.c | 6 +++--- kernel/time/tick-internal.h | 10 ++++----- 8 files changed, 52 insertions(+), 66 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index f4bde22f8a05..96c280b2c263 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -8,12 +8,6 @@ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H -/* Clock event notification values */ -enum clock_event_nofitiers { - CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_CPU_DEAD, -}; - #ifdef CONFIG_GENERIC_CLOCKEVENTS # include diff --git a/include/linux/tick.h b/include/linux/tick.h index 2c68fa3b9436..f8492da57ad3 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -20,6 +20,7 @@ extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); extern void tick_handover_do_timer(void); +extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_freeze(void) { } @@ -27,6 +28,7 @@ static inline void tick_unfreeze(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } static inline void tick_handover_do_timer(void) { } +static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #ifdef CONFIG_TICK_ONESHOT diff --git a/kernel/cpu.c b/kernel/cpu.c index eba7eaa1341d..82eea9c5af61 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -419,6 +419,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) __cpu_die(cpu); /* CPU is completely dead: tell everyone. Too late to complain. */ + tick_cleanup_dead_cpu(cpu); cpu_notify_nofail(CPU_DEAD | mod, hcpu); check_for_tasks(cpu); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 88fb3b96c7cc..25d942d1da27 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -642,49 +642,40 @@ void clockevents_resume(void) dev->resume(dev); } +#ifdef CONFIG_HOTPLUG_CPU /** - * clockevents_notify - notification about relevant events - * Returns 0 on success, any other value on error + * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu */ -int clockevents_notify(unsigned long reason, void *arg) +void tick_cleanup_dead_cpu(int cpu) { struct clock_event_device *dev, *tmp; unsigned long flags; - int cpu, ret = 0; raw_spin_lock_irqsave(&clockevents_lock, flags); - switch (reason) { - case CLOCK_EVT_NOTIFY_CPU_DEAD: - tick_shutdown_broadcast_oneshot(arg); - tick_shutdown_broadcast(arg); - tick_shutdown(arg); - /* - * Unregister the clock event devices which were - * released from the users in the notify chain. - */ - list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + tick_shutdown_broadcast_oneshot(cpu); + tick_shutdown_broadcast(cpu); + tick_shutdown(cpu); + /* + * Unregister the clock event devices which were + * released from the users in the notify chain. + */ + list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + list_del(&dev->list); + /* + * Now check whether the CPU has left unused per cpu devices + */ + list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { + if (cpumask_test_cpu(cpu, dev->cpumask) && + cpumask_weight(dev->cpumask) == 1 && + !tick_is_broadcast_device(dev)) { + BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); list_del(&dev->list); - /* - * Now check whether the CPU has left unused per cpu devices - */ - cpu = *((int *)arg); - list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { - if (cpumask_test_cpu(cpu, dev->cpumask) && - cpumask_weight(dev->cpumask) == 1 && - !tick_is_broadcast_device(dev)) { - BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); - list_del(&dev->list); - } } - break; - default: - break; } raw_spin_unlock_irqrestore(&clockevents_lock, flags); - return ret; } -EXPORT_SYMBOL_GPL(clockevents_notify); +#endif #ifdef CONFIG_SYSFS struct bus_type clockevents_subsys = { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 6a7a64ec7d1b..76d4bd962b19 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1709,11 +1709,8 @@ static int hrtimer_cpu_notify(struct notifier_block *self, #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: case CPU_DEAD_FROZEN: - { - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); migrate_hrtimers(scpu); break; - } #endif default: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 55e43f20987a..7e8ca4f448a8 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -410,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) dev->event_handler = tick_handle_periodic_broadcast; } +#ifdef CONFIG_HOTPLUG_CPU /* * Remove a CPU from broadcasting */ -void tick_shutdown_broadcast(unsigned int *cpup) +void tick_shutdown_broadcast(unsigned int cpu) { struct clock_event_device *bc; unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -432,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif void tick_suspend_broadcast(void) { @@ -672,21 +673,6 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -void hotplug_cpu__broadcast_tick_pull(int deadcpu) -{ - struct clock_event_device *bc; - unsigned long flags; - - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - bc = tick_broadcast_device.evtdev; - - if (bc && broadcast_needs_cpu(bc, deadcpu)) { - /* This moves the broadcast assignment to this CPU: */ - clockevents_program_event(bc, bc->next_event, 1); - } - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); -} - /** * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode * @state: The target state (enter/exit) @@ -908,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#ifdef CONFIG_HOTPLUG_CPU +void hotplug_cpu__broadcast_tick_pull(int deadcpu) +{ + struct clock_event_device *bc; + unsigned long flags; + + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + bc = tick_broadcast_device.evtdev; + + if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* This moves the broadcast assignment to this CPU: */ + clockevents_program_event(bc, bc->next_event, 1); + } + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); +} /* * Remove a dead CPU from broadcasting */ -void tick_shutdown_broadcast_oneshot(unsigned int *cpup) +void tick_shutdown_broadcast_oneshot(unsigned int cpu) { unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -929,6 +929,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif /* * Check, whether the broadcast device is in one shot mode diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 055c868f3ec9..fac3e98fec49 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -348,7 +348,6 @@ void tick_handover_do_timer(void) TICK_DO_TIMER_NONE; } } -#endif /* * Shutdown an event device on a given cpu: @@ -357,9 +356,9 @@ void tick_handover_do_timer(void) * access the hardware device itself. * We just set the mode and remove it from the lists. */ -void tick_shutdown(unsigned int *cpup) +void tick_shutdown(unsigned int cpu) { - struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); + struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct clock_event_device *dev = td->evtdev; td->mode = TICKDEV_MODE_PERIODIC; @@ -375,6 +374,7 @@ void tick_shutdown(unsigned int *cpup) td->evtdev = NULL; } } +#endif /** * tick_suspend_local - Suspend the local tick device diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index aabcb5d00cf2..b64fdd8054c5 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -20,7 +20,7 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); -extern void tick_shutdown(unsigned int *cpup); +extern void tick_shutdown(unsigned int cpu); extern void tick_suspend(void); extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, @@ -52,7 +52,7 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); -extern void tick_shutdown_broadcast(unsigned int *cpup); +extern void tick_shutdown_broadcast(unsigned int cpu); extern void tick_suspend_broadcast(void); extern void tick_resume_broadcast(void); extern bool tick_resume_check_broadcast(void); @@ -66,7 +66,7 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev) static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } -static inline void tick_shutdown_broadcast(unsigned int *cpup) { } +static inline void tick_shutdown_broadcast(unsigned int cpu) { } static inline void tick_suspend_broadcast(void) { } static inline void tick_resume_broadcast(void) { } static inline bool tick_resume_check_broadcast(void) { return false; } @@ -117,7 +117,7 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_switch_to_oneshot(void); -extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); +extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); @@ -125,7 +125,7 @@ extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } -- cgit v1.2.3 From 00df35f991914db6b8bde8cf09808e19a9cffc3d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 12 Apr 2015 08:06:55 -0700 Subject: cpu: Defer smpboot kthread unparking until CPU known to scheduler Currently, smpboot_unpark_threads() is invoked before the incoming CPU has been added to the scheduler's runqueue structures. This might potentially cause the unparked kthread to run on the wrong CPU, since the correct CPU isn't fully set up yet. That causes a sporadic, hard to debug boot crash triggering on some systems, reported by Borislav Petkov, and bisected down to: 2a442c9c6453 ("x86: Use common outgoing-CPU-notification code") This patch places smpboot_unpark_threads() in a CPU hotplug notifier with priority set so that these kthreads are unparked just after the CPU has been added to the runqueues. Reported-and-tested-by: Borislav Petkov Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 2 ++ init/main.c | 1 + kernel/cpu.c | 34 +++++++++++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) (limited to 'kernel/cpu.c') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d028721748d4..091badf0f6ba 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -73,6 +73,7 @@ enum { /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, + CPU_PRI_SMPBOOT = 9, /* bring up workqueues before normal notifiers and down after */ CPU_PRI_WORKQUEUE_UP = 5, CPU_PRI_WORKQUEUE_DOWN = -5, @@ -165,6 +166,7 @@ static inline void __unregister_cpu_notifier(struct notifier_block *nb) } #endif +void smpboot_thread_init(void); int cpu_up(unsigned int cpu); void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); diff --git a/init/main.c b/init/main.c index 6f0f1c5ff8cc..a12548c93318 100644 --- a/init/main.c +++ b/init/main.c @@ -384,6 +384,7 @@ static noinline void __init_refok rest_init(void) int pid; rcu_scheduler_starting(); + smpboot_thread_init(); /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if diff --git a/kernel/cpu.c b/kernel/cpu.c index d46b4dae0ca0..57858cebd6b5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -448,6 +448,37 @@ out: EXPORT_SYMBOL(cpu_down); #endif /*CONFIG_HOTPLUG_CPU*/ +/* + * Unpark per-CPU smpboot kthreads at CPU-online time. + */ +static int smpboot_thread_call(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + + case CPU_ONLINE: + smpboot_unpark_threads(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block smpboot_thread_notifier = { + .notifier_call = smpboot_thread_call, + .priority = CPU_PRI_SMPBOOT, +}; + +void __cpuinit smpboot_thread_init(void) +{ + register_cpu_notifier(&smpboot_thread_notifier); +} + /* Requires cpu_add_remove_lock to be held */ static int _cpu_up(unsigned int cpu, int tasks_frozen) { @@ -487,9 +518,6 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; BUG_ON(!cpu_online(cpu)); - /* Wake the per cpu threads */ - smpboot_unpark_threads(cpu); - /* Now call notifier in preparation. */ cpu_notify(CPU_ONLINE | mod, hcpu); -- cgit v1.2.3