diff options
-rw-r--r-- | arch/mips/kvm/mips.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 6 | ||||
-rw-r--r-- | arch/s390/kvm/interrupt.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 2 | ||||
-rw-r--r-- | include/linux/cpuhotplug.h | 1 | ||||
-rw-r--r-- | include/linux/nmi.h | 10 | ||||
-rw-r--r-- | include/linux/smpboot.h | 15 | ||||
-rw-r--r-- | include/linux/swait.h | 36 | ||||
-rw-r--r-- | kernel/cpu.c | 5 | ||||
-rw-r--r-- | kernel/kthread.c | 6 | ||||
-rw-r--r-- | kernel/power/suspend.c | 4 | ||||
-rw-r--r-- | kernel/rcu/srcutiny.c | 4 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 8 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 4 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 12 | ||||
-rw-r--r-- | kernel/sched/debug.c | 31 | ||||
-rw-r--r-- | kernel/sched/fair.c | 1 | ||||
-rw-r--r-- | kernel/sched/swait.c | 32 | ||||
-rw-r--r-- | kernel/smpboot.c | 54 | ||||
-rw-r--r-- | kernel/watchdog.c | 137 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 4 | ||||
-rw-r--r-- | virt/kvm/arm/psci.c | 2 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 4 |
25 files changed, 170 insertions, 220 deletions
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 7cd76f93a438..f7ea8e21656b 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -515,7 +515,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, dvcpu->arch.wait = 0; if (swq_has_sleeper(&dvcpu->wq)) - swake_up(&dvcpu->wq); + swake_up_one(&dvcpu->wq); return 0; } @@ -1204,7 +1204,7 @@ static void kvm_mips_comparecount_func(unsigned long data) vcpu->arch.wait = 0; if (swq_has_sleeper(&vcpu->wq)) - swake_up(&vcpu->wq); + swake_up_one(&vcpu->wq); } /* low level hrtimer wake routine */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index de686b340f4a..ee4a8854985e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -216,7 +216,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) wqp = kvm_arch_vcpu_wq(vcpu); if (swq_has_sleeper(wqp)) { - swake_up(wqp); + swake_up_one(wqp); ++vcpu->stat.halt_wakeup; } @@ -3188,7 +3188,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) } } - prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE); if (kvmppc_vcore_check_block(vc)) { finish_swait(&vc->wq, &wait); @@ -3311,7 +3311,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { - swake_up(&vc->wq); + swake_up_one(&vc->wq); } } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index daa09f89ca2d..fcb55b02990e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1145,7 +1145,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) * yield-candidate. */ vcpu->preempted = true; - swake_up(&vcpu->wq); + swake_up_one(&vcpu->wq); vcpu->stat.halt_wakeup++; } /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5b2300b818af..a37bda38d205 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -154,7 +154,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) for (;;) { if (!n.halted) - prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); if (hlist_unhashed(&n.link)) break; @@ -188,7 +188,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n) if (n->halted) smp_send_reschedule(n->cpu); else if (swq_has_sleeper(&n->wq)) - swake_up(&n->wq); + swake_up_one(&n->wq); } static void apf_task_wake_all(void) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b5cd8465d44f..d536d457517b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1379,7 +1379,7 @@ static void apic_timer_expired(struct kvm_lapic *apic) * using swait_active() is safe. */ if (swait_active(q)) - swake_up(q); + swake_up_one(q); if (apic_lvtt_tscdeadline(apic)) ktimer->expired_tscdeadline = ktimer->tscdeadline; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8796ba387152..4cf06a64bc02 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -164,6 +164,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, + CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b8d868d23e79..08f9247e9827 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -45,12 +45,18 @@ extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -#else + +extern int lockup_detector_online_cpu(unsigned int cpu); +extern int lockup_detector_offline_cpu(unsigned int cpu); +#else /* CONFIG_SOFTLOCKUP_DETECTOR */ static inline void touch_softlockup_watchdog_sched(void) { } static inline void touch_softlockup_watchdog(void) { } static inline void touch_softlockup_watchdog_sync(void) { } static inline void touch_all_softlockup_watchdogs(void) { } -#endif + +#define lockup_detector_online_cpu NULL +#define lockup_detector_offline_cpu NULL +#endif /* CONFIG_SOFTLOCKUP_DETECTOR */ #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index c174844cf663..d0884b525001 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -25,8 +25,6 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) - * @cpumask: Internal state. To update which threads are unparked, - * call smpboot_update_cpumask_percpu_thread(). * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ @@ -40,23 +38,12 @@ struct smp_hotplug_thread { void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); - cpumask_var_t cpumask; bool selfparking; const char *thread_comm; }; -int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, - const struct cpumask *cpumask); - -static inline int -smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) -{ - return smpboot_register_percpu_thread_cpumask(plug_thread, - cpu_possible_mask); -} +int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread); void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); -void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *); #endif diff --git a/include/linux/swait.h b/include/linux/swait.h index bf8cb0dee23c..73e06e9986d4 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -16,7 +16,7 @@ * wait-queues, but the semantics are actually completely different, and * every single user we have ever had has been buggy (or pointless). * - * A "swake_up()" only wakes up _one_ waiter, which is not at all what + * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what * "wake_up()" does, and has led to problems. In other cases, it has * been fine, because there's only ever one waiter (kvm), but in that * case gthe whole "simple" wait-queue is just pointless to begin with, @@ -38,8 +38,8 @@ * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right * sleeper state. * - * - the exclusive mode; because this requires preserving the list order - * and this is hard. + * - the !exclusive mode; because that leads to O(n) wakeups, everything is + * exclusive. * * - custom wake callback functions; because you cannot give any guarantees * about random code. This also allows swait to be used in RT, such that @@ -115,7 +115,7 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name * CPU0 - waker CPU1 - waiter * * for (;;) { - * @cond = true; prepare_to_swait(&wq_head, &wait, state); + * @cond = true; prepare_to_swait_exclusive(&wq_head, &wait, state); * smp_mb(); // smp_mb() from set_current_state() * if (swait_active(wq_head)) if (@cond) * wake_up(wq_head); break; @@ -157,20 +157,20 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq) return swait_active(wq); } -extern void swake_up(struct swait_queue_head *q); +extern void swake_up_one(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); -extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); -extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); +extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); -/* as per ___wait_event() but for swait, therefore "exclusive == 0" */ +/* as per ___wait_event() but for swait, therefore "exclusive == 1" */ #define ___swait_event(wq, condition, state, ret, cmd) \ ({ \ + __label__ __out; \ struct swait_queue __wait; \ long __ret = ret; \ \ @@ -183,20 +183,20 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); \ if (___wait_is_interruptible(state) && __int) { \ __ret = __int; \ - break; \ + goto __out; \ } \ \ cmd; \ } \ finish_swait(&wq, &__wait); \ - __ret; \ +__out: __ret; \ }) #define __swait_event(wq, condition) \ (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ schedule()) -#define swait_event(wq, condition) \ +#define swait_event_exclusive(wq, condition) \ do { \ if (condition) \ break; \ @@ -208,7 +208,7 @@ do { \ TASK_UNINTERRUPTIBLE, timeout, \ __ret = schedule_timeout(__ret)) -#define swait_event_timeout(wq, condition, timeout) \ +#define swait_event_timeout_exclusive(wq, condition, timeout) \ ({ \ long __ret = timeout; \ if (!___wait_cond_timeout(condition)) \ @@ -220,7 +220,7 @@ do { \ ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ schedule()) -#define swait_event_interruptible(wq, condition) \ +#define swait_event_interruptible_exclusive(wq, condition) \ ({ \ int __ret = 0; \ if (!(condition)) \ @@ -233,7 +233,7 @@ do { \ TASK_INTERRUPTIBLE, timeout, \ __ret = schedule_timeout(__ret)) -#define swait_event_interruptible_timeout(wq, condition, timeout) \ +#define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\ ({ \ long __ret = timeout; \ if (!___wait_cond_timeout(condition)) \ @@ -246,7 +246,7 @@ do { \ (void)___swait_event(wq, condition, TASK_IDLE, 0, schedule()) /** - * swait_event_idle - wait without system load contribution + * swait_event_idle_exclusive - wait without system load contribution * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @@ -257,7 +257,7 @@ do { \ * condition and doesn't want to contribute to system load. Signals are * ignored. */ -#define swait_event_idle(wq, condition) \ +#define swait_event_idle_exclusive(wq, condition) \ do { \ if (condition) \ break; \ @@ -270,7 +270,7 @@ do { \ __ret = schedule_timeout(__ret)) /** - * swait_event_idle_timeout - wait up to timeout without load contribution + * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution * @wq: the waitqueue to wait on * @condition: a C expression for the event to wait for * @timeout: timeout at which we'll give up in jiffies @@ -288,7 +288,7 @@ do { \ * or the remaining jiffies (at least 1) if the @condition evaluated * to %true before the @timeout elapsed. */ -#define swait_event_idle_timeout(wq, condition, timeout) \ +#define swait_event_idle_timeout_exclusive(wq, condition, timeout) \ ({ \ long __ret = timeout; \ if (!___wait_cond_timeout(condition)) \ diff --git a/kernel/cpu.c b/kernel/cpu.c index 0db8938fbb23..191097c45fb1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, + [CPUHP_AP_WATCHDOG_ONLINE] = { + .name = "lockup_detector:online", + .startup.single = lockup_detector_online_cpu, + .teardown.single = lockup_detector_offline_cpu, + }, [CPUHP_AP_WORKQUEUE_ONLINE] = { .name = "workqueue:online", .startup.single = workqueue_online_cpu, diff --git a/kernel/kthread.c b/kernel/kthread.c index 750cb8082694..11b591ee51ab 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -190,7 +190,7 @@ static void __kthread_parkme(struct kthread *self) if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) break; - complete_all(&self->parked); + complete(&self->parked); schedule(); } __set_current_state(TASK_RUNNING); @@ -465,7 +465,6 @@ void kthread_unpark(struct task_struct *k) if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) __kthread_bind(k, kthread->cpu, TASK_PARKED); - reinit_completion(&kthread->parked); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); /* * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup. @@ -493,6 +492,9 @@ int kthread_park(struct task_struct *k) if (WARN_ON(k->flags & PF_EXITING)) return -ENOSYS; + if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))) + return -EBUSY; + set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); if (k != current) { wake_up_process(k); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 87331565e505..70178f6ffdc4 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -92,7 +92,7 @@ static void s2idle_enter(void) /* Push all the CPUs into the idle loop. */ wake_up_all_idle_cpus(); /* Make the current CPU wait so it can enter the idle loop too. */ - swait_event(s2idle_wait_head, + swait_event_exclusive(s2idle_wait_head, s2idle_state == S2IDLE_STATE_WAKE); cpuidle_pause(); @@ -160,7 +160,7 @@ void s2idle_wake(void) raw_spin_lock_irqsave(&s2idle_lock, flags); if (s2idle_state > S2IDLE_STATE_NONE) { s2idle_state = S2IDLE_STATE_WAKE; - swake_up(&s2idle_wait_head); + swake_up_one(&s2idle_wait_head); } raw_spin_unlock_irqrestore(&s2idle_lock, flags); } diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 622792abe41a..04fc2ed71af8 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -110,7 +110,7 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx) WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); if (!newval && READ_ONCE(sp->srcu_gp_waiting)) - swake_up(&sp->srcu_wq); + swake_up_one(&sp->srcu_wq); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -140,7 +140,7 @@ void srcu_drive_gp(struct work_struct *wp) idx = sp->srcu_idx; WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ - swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); + swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ /* Invoke the callbacks we removed above. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index aa7cade1b9f3..91f888d3b23a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1727,7 +1727,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp) !READ_ONCE(rsp->gp_flags) || !rsp->gp_kthread) return; - swake_up(&rsp->gp_wq); + swake_up_one(&rsp->gp_wq); } /* @@ -2002,7 +2002,7 @@ static bool rcu_gp_init(struct rcu_state *rsp) } /* - * Helper function for swait_event_idle() wakeup at force-quiescent-state + * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state * time. */ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) @@ -2144,7 +2144,7 @@ static int __noreturn rcu_gp_kthread(void *arg) READ_ONCE(rsp->gpnum), TPS("reqwait")); rsp->gp_state = RCU_GP_WAIT_GPS; - swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & + swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_INIT); rsp->gp_state = RCU_GP_DONE_GPS; /* Locking provides needed memory barrier. */ @@ -2176,7 +2176,7 @@ static int __noreturn rcu_gp_kthread(void *arg) READ_ONCE(rsp->gpnum), TPS("fqswait")); rsp->gp_state = RCU_GP_WAIT_FQS; - ret = swait_event_idle_timeout(rsp->gp_wq, + ret = swait_event_idle_timeout_exclusive(rsp->gp_wq, rcu_gp_fqs_check_wake(rsp, &gf), j); rsp->gp_state = RCU_GP_DOING_FQS; /* Locking provides needed memory barriers. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d40708e8c5d6..d428cc1064c8 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -212,7 +212,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - swake_up(&rsp->expedited_wq); + swake_up_one(&rsp->expedited_wq); } break; } @@ -518,7 +518,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) jiffies_start = jiffies; for (;;) { - ret = swait_event_timeout( + ret = swait_event_timeout_exclusive( rsp->expedited_wq, sync_rcu_preempt_exp_done_unlocked(rnp_root), jiffies_stall); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7fd12039e512..ad53d133f709 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1854,8 +1854,8 @@ static void __wake_nocb_leader(struct rcu_data *rdp, bool force, WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); del_timer(&rdp->nocb_timer); raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); - smp_mb(); /* ->nocb_leader_sleep before swake_up(). */ - swake_up(&rdp_leader->nocb_wq); + smp_mb(); /* ->nocb_leader_sleep before swake_up_one(). */ + swake_up_one(&rdp_leader->nocb_wq); } else { raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); } @@ -2082,7 +2082,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) */ trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait")); for (;;) { - swait_event_interruptible( + swait_event_interruptible_exclusive( rnp->nocb_gp_wq[c & 0x1], (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c))); if (likely(d)) @@ -2111,7 +2111,7 @@ wait_again: /* Wait for callbacks to appear. */ if (!rcu_nocb_poll) { trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep")); - swait_event_interruptible(my_rdp->nocb_wq, + swait_event_interruptible_exclusive(my_rdp->nocb_wq, !READ_ONCE(my_rdp->nocb_leader_sleep)); raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags); my_rdp->nocb_leader_sleep = true; @@ -2176,7 +2176,7 @@ wait_again: raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { /* List was empty, so wake up the follower. */ - swake_up(&rdp->nocb_wq); + swake_up_one(&rdp->nocb_wq); } } @@ -2193,7 +2193,7 @@ static void nocb_follower_wait(struct rcu_data *rdp) { for (;;) { trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep")); - swait_event_interruptible(rdp->nocb_wq, + swait_event_interruptible_exclusive(rdp->nocb_wq, READ_ONCE(rdp->nocb_follower_head)); if (smp_load_acquire(&rdp->nocb_follower_head)) { /* ^^^ Ensure CB invocation follows _head test. */ diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index e593b4118578..c96e89cc4bc7 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -111,20 +111,19 @@ static int sched_feat_set(char *cmp) cmp += 3; } - for (i = 0; i < __SCHED_FEAT_NR; i++) { - if (strcmp(cmp, sched_feat_names[i]) == 0) { - if (neg) { - sysctl_sched_features &= ~(1UL << i); - sched_feat_disable(i); - } else { - sysctl_sched_features |= (1UL << i); - sched_feat_enable(i); - } - break; - } + i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp); + if (i < 0) + return i; + + if (neg) { + sysctl_sched_features &= ~(1UL << i); + sched_feat_disable(i); + } else { + sysctl_sched_features |= (1UL << i); + sched_feat_enable(i); } - return i; + return 0; } static ssize_t @@ -133,7 +132,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, { char buf[64]; char *cmp; - int i; + int ret; struct inode *inode; if (cnt > 63) @@ -148,10 +147,10 @@ sched_feat_write(struct file *filp, const char __user *ubuf, /* Ensure the static_key remains in a consistent state */ inode = file_inode(filp); inode_lock(inode); - i = sched_feat_set(cmp); + ret = sched_feat_set(cmp); inode_unlock(inode); - if (i == __SCHED_FEAT_NR) - return -EINVAL; + if (ret < 0) + return ret; *ppos += cnt; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2f0a0be4d344..321cd5dcf2e8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4726,7 +4726,6 @@ static inline int throttled_lb_pair(struct task_group *tg, throttled_hierarchy(dest_cfs_rq); } -/* updated child weight may affect parent so we have to do this bottom up */ static int tg_unthrottle_up(struct task_group *tg, void *data) { struct rq *rq = data; diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index b6fb2c3b3ff7..66b59ac77c22 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -32,7 +32,7 @@ void swake_up_locked(struct swait_queue_head *q) } EXPORT_SYMBOL(swake_up_locked); -void swake_up(struct swait_queue_head *q) +void swake_up_one(struct swait_queue_head *q) { unsigned long flags; @@ -40,7 +40,7 @@ void swake_up(struct swait_queue_head *q) swake_up_locked(q); raw_spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL(swake_up); +EXPORT_SYMBOL(swake_up_one); /* * Does not allow usage from IRQ disabled, since we must be able to @@ -69,14 +69,14 @@ void swake_up_all(struct swait_queue_head *q) } EXPORT_SYMBOL(swake_up_all); -void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) +static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) { wait->task = current; if (list_empty(&wait->task_list)) - list_add(&wait->task_list, &q->task_list); + list_add_tail(&wait->task_list, &q->task_list); } -void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state) +void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state) { unsigned long flags; @@ -85,16 +85,28 @@ void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int set_current_state(state); raw_spin_unlock_irqrestore(&q->lock, flags); } -EXPORT_SYMBOL(prepare_to_swait); +EXPORT_SYMBOL(prepare_to_swait_exclusive); long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state) { - if (signal_pending_state(state, current)) - return -ERESTARTSYS; + unsigned long flags; + long ret = 0; - prepare_to_swait(q, wait, state); + raw_spin_lock_irqsave(&q->lock, flags); + if (unlikely(signal_pending_state(state, current))) { + /* + * See prepare_to_wait_event(). TL;DR, subsequent swake_up_one() + * must not see us. + */ + list_del_init(&wait->task_list); + ret = -ERESTARTSYS; + } else { + __prepare_to_swait(q, wait); + set_current_state(state); + } + raw_spin_unlock_irqrestore(&q->lock, flags); - return 0; + return ret; } EXPORT_SYMBOL(prepare_to_swait_event); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 5043e7433f4b..c230c2dd48e1 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -238,8 +238,7 @@ int smpboot_unpark_threads(unsigned int cpu) mutex_lock(&smpboot_threads_lock); list_for_each_entry(cur, &hotplug_threads, list) - if (cpumask_test_cpu(cpu, cur->cpumask)) - smpboot_unpark_thread(cur, cpu); + smpboot_unpark_thread(cur, cpu); mutex_unlock(&smpboot_threads_lock); return 0; } @@ -280,34 +279,26 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) } /** - * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related + * smpboot_register_percpu_thread - Register a per_cpu thread related * to hotplug * @plug_thread: Hotplug thread descriptor - * @cpumask: The cpumask where threads run * * Creates and starts the threads on all online cpus. */ -int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, - const struct cpumask *cpumask) +int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) { unsigned int cpu; int ret = 0; - if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL)) - return -ENOMEM; - cpumask_copy(plug_thread->cpumask, cpumask); - get_online_cpus(); mutex_lock(&smpboot_threads_lock); for_each_online_cpu(cpu) { ret = __smpboot_create_thread(plug_thread, cpu); if (ret) { smpboot_destroy_threads(plug_thread); - free_cpumask_var(plug_thread->cpumask); goto out; } - if (cpumask_test_cpu(cpu, cpumask)) - smpboot_unpark_thread(plug_thread, cpu); + smpboot_unpark_thread(plug_thread, cpu); } list_add(&plug_thread->list, &hotplug_threads); out: @@ -315,7 +306,7 @@ out: put_online_cpus(); return ret; } -EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread_cpumask); +EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); /** * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug @@ -331,44 +322,9 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) smpboot_destroy_threads(plug_thread); mutex_unlock(&smpboot_threads_lock); put_online_cpus(); - free_cpumask_var(plug_thread->cpumask); } EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); -/** - * smpboot_update_cpumask_percpu_thread - Adjust which per_cpu hotplug threads stay parked - * @plug_thread: Hotplug thread descriptor - * @new: Revised mask to use - * - * The cpumask field in the smp_hotplug_thread must not be updated directly - * by the client, but only by calling this function. - * This function can only be called on a registered smp_hotplug_thread. - */ -void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *new) -{ - struct cpumask *old = plug_thread->cpumask; - static struct cpumask tmp; - unsigned int cpu; - - lockdep_assert_cpus_held(); - mutex_lock(&smpboot_threads_lock); - - /* Park threads that were exclusively enabled on the old mask. */ - cpumask_andnot(&tmp, old, new); - for_each_cpu_and(cpu, &tmp, cpu_online_mask) - smpboot_park_thread(plug_thread, cpu); - - /* Unpark threads that are exclusively enabled on the new mask. */ - cpumask_andnot(&tmp, new, old); - for_each_cpu_and(cpu, &tmp, cpu_online_mask) - smpboot_unpark_thread(plug_thread, cpu); - - cpumask_copy(old, new); - - mutex_unlock(&smpboot_threads_lock); -} - static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); /* diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 576d18045811..b81f777838d5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -18,18 +18,14 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/sysctl.h> -#include <linux/smpboot.h> -#include <linux/sched/rt.h> -#include <uapi/linux/sched/types.h> #include <linux/tick.h> -#include <linux/workqueue.h> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/isolation.h> +#include <linux/stop_machine.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> -#include <linux/kthread.h> static DEFINE_MUTEX(watchdog_mutex); @@ -169,11 +165,10 @@ static void lockup_detector_update_enable(void) unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; -static bool softlockup_threads_initialized __read_mostly; +static bool softlockup_initialized __read_mostly; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); -static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); static DEFINE_PER_CPU(bool, softlockup_touch_sync); static DEFINE_PER_CPU(bool, soft_watchdog_warn); @@ -335,6 +330,25 @@ static void watchdog_interrupt_count(void) __this_cpu_inc(hrtimer_interrupts); } +/* + * The watchdog thread function - touches the timestamp. + * + * It only runs once every sample_period seconds (4 seconds by + * default) to reset the softlockup timestamp. If this gets delayed + * for more than 2*watchdog_thresh seconds then the debug-printout + * triggers in watchdog_timer_fn(). + */ +static int softlockup_fn(void *data) +{ + __this_cpu_write(soft_lockup_hrtimer_cnt, + __this_cpu_read(hrtimer_interrupts)); + __touch_watchdog(); + + return 0; +} + +static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); + /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -350,7 +364,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) watchdog_interrupt_count(); /* kick the softlockup detector */ - wake_up_process(__this_cpu_read(softlockup_watchdog)); + stop_one_cpu_nowait(smp_processor_id(), + softlockup_fn, NULL, + this_cpu_ptr(&softlockup_stop_work)); /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); @@ -448,17 +464,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) return HRTIMER_RESTART; } -static void watchdog_set_prio(unsigned int policy, unsigned int prio) -{ - struct sched_param param = { .sched_priority = prio }; - - sched_setscheduler(current, policy, ¶m); -} - static void watchdog_enable(unsigned int cpu) { struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); + WARN_ON_ONCE(cpu != smp_processor_id()); + /* * Start the timer first to prevent the NMI watchdog triggering * before the timer has a chance to fire. @@ -473,15 +484,14 @@ static void watchdog_enable(unsigned int cpu) /* Enable the perf event */ if (watchdog_enabled & NMI_WATCHDOG_ENABLED) watchdog_nmi_enable(cpu); - - watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); } static void watchdog_disable(unsigned int cpu) { struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); - watchdog_set_prio(SCHED_NORMAL, 0); + WARN_ON_ONCE(cpu != smp_processor_id()); + /* * Disable the perf event first. That prevents that a large delay * between disabling the timer and disabling the perf event causes @@ -491,77 +501,63 @@ static void watchdog_disable(unsigned int cpu) hrtimer_cancel(hrtimer); } -static void watchdog_cleanup(unsigned int cpu, bool online) +static int softlockup_stop_fn(void *data) { - watchdog_disable(cpu); + watchdog_disable(smp_processor_id()); + return 0; } -static int watchdog_should_run(unsigned int cpu) +static void softlockup_stop_all(void) { - return __this_cpu_read(hrtimer_interrupts) != - __this_cpu_read(soft_lockup_hrtimer_cnt); + int cpu; + + if (!softlockup_initialized) + return; + + for_each_cpu(cpu, &watchdog_allowed_mask) + smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false); + + cpumask_clear(&watchdog_allowed_mask); } -/* - * The watchdog thread function - touches the timestamp. - * - * It only runs once every sample_period seconds (4 seconds by - * default) to reset the softlockup timestamp. If this gets delayed - * for more than 2*watchdog_thresh seconds then the debug-printout - * triggers in watchdog_timer_fn(). - */ -static void watchdog(unsigned int cpu) +static int softlockup_start_fn(void *data) { - __this_cpu_write(soft_lockup_hrtimer_cnt, - __this_cpu_read(hrtimer_interrupts)); - __touch_watchdog(); + watchdog_enable(smp_processor_id()); + return 0; } -static struct smp_hotplug_thread watchdog_threads = { - .store = &softlockup_watchdog, - .thread_should_run = watchdog_should_run, - .thread_fn = watchdog, - .thread_comm = "watchdog/%u", - .setup = watchdog_enable, - .cleanup = watchdog_cleanup, - .park = watchdog_disable, - .unpark = watchdog_enable, -}; - -static void softlockup_update_smpboot_threads(void) +static void softlockup_start_all(void) { - lockdep_assert_held(&watchdog_mutex); - - if (!softlockup_threads_initialized) - return; + int cpu; - smpboot_update_cpumask_percpu_thread(&watchdog_threads, - &watchdog_allowed_mask); + cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); + for_each_cpu(cpu, &watchdog_allowed_mask) + smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false); } -/* Temporarily park all watchdog threads */ -static void softlockup_park_all_threads(void) +int lockup_detector_online_cpu(unsigned int cpu) { - cpumask_clear(&watchdog_allowed_mask); - softlockup_update_smpboot_threads(); + watchdog_enable(cpu); + return 0; } -/* Unpark enabled threads */ -static void softlockup_unpark_threads(void) +int lockup_detector_offline_cpu(unsigned int cpu) { - cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); - softlockup_update_smpboot_threads(); + watchdog_disable(cpu); + return 0; } static void lockup_detector_reconfigure(void) { cpus_read_lock(); watchdog_nmi_stop(); - softlockup_park_all_threads(); + + softlockup_stop_all(); set_sample_period(); lockup_detector_update_enable(); if (watchdog_enabled && watchdog_thresh) - softlockup_unpark_threads(); + softlockup_start_all(); + watchdog_nmi_start(); cpus_read_unlock(); /* @@ -580,8 +576,6 @@ static void lockup_detector_reconfigure(void) */ static __init void lockup_detector_setup(void) { - int ret; - /* * If sysctl is off and watchdog got disabled on the command line, * nothing to do here. @@ -592,24 +586,13 @@ static __init void lockup_detector_setup(void) !(watchdog_enabled && watchdog_thresh)) return; - ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads, - &watchdog_allowed_mask); - if (ret) { - pr_err("Failed to initialize soft lockup detector threads\n"); - return; - } - mutex_lock(&watchdog_mutex); - softlockup_threads_initialized = true; lockup_detector_reconfigure(); + softlockup_initialized = true; mutex_unlock(&watchdog_mutex); } #else /* CONFIG_SOFTLOCKUP_DETECTOR */ -static inline int watchdog_park_threads(void) { return 0; } -static inline void watchdog_unpark_threads(void) { } -static inline int watchdog_enable_all_cpus(void) { return 0; } -static inline void watchdog_disable_all_cpus(void) { } static void lockup_detector_reconfigure(void) { cpus_read_lock(); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 04e554cae3a2..108250e4d376 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -604,7 +604,7 @@ void kvm_arm_resume_guest(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.pause = false; - swake_up(kvm_arch_vcpu_wq(vcpu)); + swake_up_one(kvm_arch_vcpu_wq(vcpu)); } } @@ -612,7 +612,7 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu) { struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); - swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && + swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) && (!vcpu->arch.pause))); if (vcpu->arch.power_off || vcpu->arch.pause) { diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index c95ab4c5a475..9b73d3ad918a 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -155,7 +155,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) smp_mb(); /* Make sure the above is visible */ wq = kvm_arch_vcpu_wq(vcpu); - swake_up(wq); + swake_up_one(wq); return PSCI_RET_SUCCESS; } diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 57bcb27dcf30..23c2519c5b32 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -107,7 +107,7 @@ static void async_pf_execute(struct work_struct *work) trace_kvm_async_pf_completed(addr, gva); if (swq_has_sleeper(&vcpu->wq)) - swake_up(&vcpu->wq); + swake_up_one(&vcpu->wq); mmput(mm); kvm_put_kvm(vcpu->kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8b47507faab5..3d233ebfbee9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2172,7 +2172,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) kvm_arch_vcpu_blocking(vcpu); for (;;) { - prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); + prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); if (kvm_vcpu_check_block(vcpu) < 0) break; @@ -2214,7 +2214,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) wqp = kvm_arch_vcpu_wq(vcpu); if (swq_has_sleeper(wqp)) { - swake_up(wqp); + swake_up_one(wqp); ++vcpu->stat.halt_wakeup; return true; } |