From 1e8e6951a5774c8dd9d1f14af9c5b7d66130d96f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 15 Nov 2023 14:11:28 -0500 Subject: rcu/nocb: Remove needless full barrier after callback advancing A full barrier is issued from nocb_gp_wait() upon callbacks advancing to order grace period completion with callbacks execution. However these two events are already ordered by the smp_mb__after_unlock_lock() barrier within the call to raw_spin_lock_rcu_node() that is necessary for callbacks advancing to happen. The following litmus test shows the kind of guarantee that this barrier provides: C smp_mb__after_unlock_lock {} // rcu_gp_cleanup() P0(spinlock_t *rnp_lock, int *gpnum) { // Grace period cleanup increase gp sequence number spin_lock(rnp_lock); WRITE_ONCE(*gpnum, 1); spin_unlock(rnp_lock); } // nocb_gp_wait() P1(spinlock_t *rnp_lock, spinlock_t *nocb_lock, int *gpnum, int *cb_ready) { int r1; // Call rcu_advance_cbs() from nocb_gp_wait() spin_lock(nocb_lock); spin_lock(rnp_lock); smp_mb__after_unlock_lock(); r1 = READ_ONCE(*gpnum); WRITE_ONCE(*cb_ready, 1); spin_unlock(rnp_lock); spin_unlock(nocb_lock); } // nocb_cb_wait() P2(spinlock_t *nocb_lock, int *cb_ready, int *cb_executed) { int r2; // rcu_do_batch() -> rcu_segcblist_extract_done_cbs() spin_lock(nocb_lock); r2 = READ_ONCE(*cb_ready); spin_unlock(nocb_lock); // Actual callback execution WRITE_ONCE(*cb_executed, 1); } P3(int *cb_executed, int *gpnum) { int r3; WRITE_ONCE(*cb_executed, 2); smp_mb(); r3 = READ_ONCE(*gpnum); } exists (1:r1=1 /\ 2:r2=1 /\ cb_executed=2 /\ 3:r3=0) (* Bad outcome. *) Here the bad outcome only occurs if the smp_mb__after_unlock_lock() is removed. This barrier orders the grace period completion against callbacks advancing and even later callbacks invocation, thanks to the opportunistic propagation via the ->nocb_lock to nocb_cb_wait(). Therefore the smp_mb() placed after callbacks advancing can be safely removed. Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b2bccfd37c38..d540d210e5c7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2145,6 +2145,12 @@ static void rcu_do_batch(struct rcu_data *rdp) * Extract the list of ready callbacks, disabling IRQs to prevent * races with call_rcu() from interrupt handlers. Leave the * callback counts, as rcu_barrier() needs to be conservative. + * + * Callbacks execution is fully ordered against preceding grace period + * completion (materialized by rnp->gp_seq update) thanks to the + * smp_mb__after_unlock_lock() upon node locking required for callbacks + * advancing. In NOCB mode this ordering is then further relayed through + * the nocb locking that protects both callbacks advancing and extraction. */ rcu_nocb_lock_irqsave(rdp, flags); WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); -- cgit v1.2.3 From b913c3fe685e0aec80130975b0f330fd709ff324 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 9 Jan 2024 23:24:00 +0100 Subject: rcu/nocb: Make IRQs disablement symmetric Currently IRQs are disabled on call_rcu() and then depending on the context: * If the CPU is in nocb mode: - If the callback is enqueued in the bypass list, IRQs are re-enabled implictly by rcu_nocb_try_bypass() - If the callback is enqueued in the normal list, IRQs are re-enabled implicitly by __call_rcu_nocb_wake() * If the CPU is NOT in nocb mode, IRQs are reenabled explicitly from call_rcu() This makes the code a bit hard to follow, especially as it interleaves with nocb locking. To make the IRQ flags coverage clearer and also in order to prepare for moving all the nocb enqueue code to its own function, always re-enable the IRQ flags explicitly from call_rcu(). Reviewed-by: Neeraj Upadhyay (AMD) Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 9 ++++++--- kernel/rcu/tree_nocb.h | 20 +++++++++----------- 2 files changed, 15 insertions(+), 14 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d540d210e5c7..a402dc4e9a9c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2735,8 +2735,10 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) } check_cb_ovld(rdp); - if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) + if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) { + local_irq_restore(flags); return; // Enqueued onto ->nocb_bypass, so just leave. + } // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. rcu_segcblist_enqueue(&rdp->cblist, head); if (__is_kvfree_rcu_offset((unsigned long)func)) @@ -2754,8 +2756,8 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ } else { __call_rcu_core(rdp, head, flags); - local_irq_restore(flags); } + local_irq_restore(flags); } #ifdef CONFIG_RCU_LAZY @@ -4646,8 +4648,9 @@ void rcutree_migrate_callbacks(int cpu) __call_rcu_nocb_wake(my_rdp, true, flags); } else { rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */ - raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags); + raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */ } + local_irq_restore(flags); if (needwake) rcu_gp_kthread_wake(); lockdep_assert_irqs_enabled(); diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index b2c3145c4c13..1d5c03c5c702 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -532,9 +532,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, // 2. Both of these conditions are met: // a. The bypass list previously had only lazy CBs, and: // b. The new CB is non-lazy. - if (ncbs && (!bypass_is_lazy || lazy)) { - local_irq_restore(flags); - } else { + if (!ncbs || (bypass_is_lazy && !lazy)) { // No-CBs GP kthread might be indefinitely asleep, if so, wake. rcu_nocb_lock(rdp); // Rare during call_rcu() flood. if (!rcu_segcblist_pend_cbs(&rdp->cblist)) { @@ -544,7 +542,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, } else { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQnoWake")); - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); } } return true; // Callback already enqueued. @@ -570,7 +568,7 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, // If we are being polled or there is no kthread, just leave. t = READ_ONCE(rdp->nocb_gp_kthread); if (rcu_nocb_poll || !t) { - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNotPoll")); return; @@ -583,17 +581,17 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rdp->qlen_last_fqs_check = len; // Only lazy CBs in bypass list if (lazy_len && bypass_len == lazy_len) { - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, TPS("WakeLazy")); } else if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); wake_nocb_gp(rdp, false); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeEmpty")); } else { - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, TPS("WakeEmptyIsDeferred")); } @@ -611,15 +609,15 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, if ((rdp->nocb_cb_sleep || !rcu_segcblist_ready_cbs(&rdp->cblist)) && !timer_pending(&rdp->nocb_timer)) { - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE, TPS("WakeOvfIsDeferred")); } else { - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); } } else { - rcu_nocb_unlock_irqrestore(rdp, flags); + rcu_nocb_unlock(rdp); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); } } -- cgit v1.2.3 From afd4e6964745ed98b74cacdcce21d73280a0a253 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 9 Jan 2024 23:24:01 +0100 Subject: rcu/nocb: Re-arrange call_rcu() NOCB specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the call_rcu() function interleaves NOCB and !NOCB enqueue code in a complicated way such that: * The bypass enqueue code may or may not have enqueued and may or may not have locked the ->nocb_lock. Everything that follows is in a Schrödinger locking state for the unwary reviewer's eyes. * The was_alldone is always set but only used in NOCB related code. * The NOCB wake up is distantly related to the locking hopefully performed by the bypass enqueue code that did not enqueue on the bypass list. Unconfuse the whole and gather NOCB and !NOCB specific enqueue code to their own functions. Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 44 ++++++++++++++++++++------------------------ kernel/rcu/tree.h | 9 ++++----- kernel/rcu/tree_nocb.h | 18 +++++++++++++++--- 3 files changed, 39 insertions(+), 32 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a402dc4e9a9c..cc0e169e299a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2597,12 +2597,26 @@ static int __init rcu_spawn_core_kthreads(void) return 0; } +static void rcutree_enqueue(struct rcu_data *rdp, struct rcu_head *head, rcu_callback_t func) +{ + rcu_segcblist_enqueue(&rdp->cblist, head); + if (__is_kvfree_rcu_offset((unsigned long)func)) + trace_rcu_kvfree_callback(rcu_state.name, head, + (unsigned long)func, + rcu_segcblist_n_cbs(&rdp->cblist)); + else + trace_rcu_callback(rcu_state.name, head, + rcu_segcblist_n_cbs(&rdp->cblist)); + trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued")); +} + /* * Handle any core-RCU processing required by a call_rcu() invocation. */ -static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, - unsigned long flags) +static void call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, + rcu_callback_t func, unsigned long flags) { + rcutree_enqueue(rdp, head, func); /* * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. @@ -2698,7 +2712,6 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) unsigned long flags; bool lazy; struct rcu_data *rdp; - bool was_alldone; /* Misaligned rcu_head! */ WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); @@ -2735,28 +2748,11 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) } check_cb_ovld(rdp); - if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) { - local_irq_restore(flags); - return; // Enqueued onto ->nocb_bypass, so just leave. - } - // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. - rcu_segcblist_enqueue(&rdp->cblist, head); - if (__is_kvfree_rcu_offset((unsigned long)func)) - trace_rcu_kvfree_callback(rcu_state.name, head, - (unsigned long)func, - rcu_segcblist_n_cbs(&rdp->cblist)); - else - trace_rcu_callback(rcu_state.name, head, - rcu_segcblist_n_cbs(&rdp->cblist)); - - trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued")); - /* Go handle any RCU core processing required. */ - if (unlikely(rcu_rdp_is_offloaded(rdp))) { - __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ - } else { - __call_rcu_core(rdp, head, flags); - } + if (unlikely(rcu_rdp_is_offloaded(rdp))) + call_rcu_nocb(rdp, head, func, flags, lazy); + else + call_rcu_core(rdp, head, func, flags); local_irq_restore(flags); } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e9821a8422db..bf478da89a8f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -467,11 +467,10 @@ static void rcu_init_one_nocb(struct rcu_node *rnp); static bool wake_nocb_gp(struct rcu_data *rdp, bool force); static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, unsigned long j, bool lazy); -static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags, - bool lazy); -static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, - unsigned long flags); +static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, + rcu_callback_t func, unsigned long flags, bool lazy); +static void __maybe_unused __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, + unsigned long flags); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 1d5c03c5c702..9e8052ba14b9 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -622,6 +622,18 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, } } +static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, + rcu_callback_t func, unsigned long flags, bool lazy) +{ + bool was_alldone; + + if (!rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) { + /* Not enqueued on bypass but locked, do regular enqueue */ + rcutree_enqueue(rdp, head, func); + __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ + } +} + static int nocb_gp_toggle_rdp(struct rcu_data *rdp, bool *wake_state) { @@ -1764,10 +1776,10 @@ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, return true; } -static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags, bool lazy) +static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, + rcu_callback_t func, unsigned long flags, bool lazy) { - return false; + WARN_ON_ONCE(1); /* Should be dead code! */ } static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, -- cgit v1.2.3 From a636c5e6f8fc34be520277e69c7c6ee1d4fc1d17 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:15 +0100 Subject: rcu/exp: Fix RCU expedited parallel grace period kworker allocation failure recovery Under CONFIG_RCU_EXP_KTHREAD=y, the nodes initialization for expedited grace periods is queued to a kworker. However if the allocation of that kworker failed, the nodes initialization is performed synchronously by the caller instead. Now the check for kworker initialization failure relies on the kworker pointer to be NULL while its value might actually encapsulate an allocation failure error. Make sure to handle this case. Reviewed-by: Kalesh Singh Fixes: 9621fbee44df ("rcu: Move expedited grace period (GP) work to RT kthread_worker") Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b2bccfd37c38..38c86f2c040b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4749,6 +4749,7 @@ static void __init rcu_start_exp_gp_kworkers(void) rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { pr_err("Failed to create %s!\n", par_gp_kworker_name); + rcu_exp_par_gp_kworker = NULL; kthread_destroy_worker(rcu_exp_gp_kworker); return; } -- cgit v1.2.3 From e7539ffc9a770f36bacedcf0fbfb4bf2f244f4a5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:16 +0100 Subject: rcu/exp: Handle RCU expedited grace period kworker allocation failure Just like is done for the kworker performing nodes initialization, gracefully handle the possible allocation failure of the RCU expedited grace period main kworker. While at it perform a rename of the related checking functions to better reflect the expedited specifics. Reviewed-by: Kalesh Singh Fixes: 9621fbee44df ("rcu: Move expedited grace period (GP) work to RT kthread_worker") Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 2 ++ kernel/rcu/tree_exp.h | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 38c86f2c040b..f2c10d351b59 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4743,6 +4743,7 @@ static void __init rcu_start_exp_gp_kworkers(void) rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { pr_err("Failed to create %s!\n", gp_kworker_name); + rcu_exp_gp_kworker = NULL; return; } @@ -4751,6 +4752,7 @@ static void __init rcu_start_exp_gp_kworkers(void) pr_err("Failed to create %s!\n", par_gp_kworker_name); rcu_exp_par_gp_kworker = NULL; kthread_destroy_worker(rcu_exp_gp_kworker); + rcu_exp_gp_kworker = NULL; return; } diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 014ddf672165..6123a60d9a4d 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -427,7 +427,12 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_exp_gp_kworker); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_exp_par_gp_kworker); } @@ -477,7 +482,12 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) __sync_rcu_exp_select_node_cpus(rewp); } -static inline bool rcu_gp_par_worker_started(void) +static inline bool rcu_exp_worker_started(void) +{ + return !!READ_ONCE(rcu_gp_wq); +} + +static inline bool rcu_exp_par_worker_started(void) { return !!READ_ONCE(rcu_par_gp_wq); } @@ -540,7 +550,7 @@ static void sync_rcu_exp_select_cpus(void) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - if (!rcu_gp_par_worker_started() || + if (!rcu_exp_par_worker_started() || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { /* No worker started yet or last leaf, do direct call. */ @@ -955,7 +965,7 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) */ void synchronize_rcu_expedited(void) { - bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT); + bool use_worker; unsigned long flags; struct rcu_exp_work rew; struct rcu_node *rnp; @@ -966,6 +976,9 @@ void synchronize_rcu_expedited(void) lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu_expedited() in RCU read-side critical section"); + use_worker = (rcu_scheduler_active != RCU_SCHEDULER_INIT) && + rcu_exp_worker_started(); + /* Is the state is such that the call is a grace period? */ if (rcu_blocking_is_gp()) { // Note well that this code runs with !PREEMPT && !SMP. @@ -995,7 +1008,7 @@ void synchronize_rcu_expedited(void) return; /* Someone else did our work for us. */ /* Ensure that load happens before action based on it. */ - if (unlikely(boottime)) { + if (unlikely(!use_worker)) { /* Direct call during scheduler init and early_initcalls(). */ rcu_exp_sel_wait_wake(s); } else { @@ -1013,7 +1026,7 @@ void synchronize_rcu_expedited(void) /* Let the next expedited grace period start. */ mutex_unlock(&rcu_state.exp_mutex); - if (likely(!boottime)) + if (likely(use_worker)) synchronize_rcu_expedited_destroy_work(&rew); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 7836b270607676ed1c0c6a4a840a2ede9437a6a1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:17 +0100 Subject: rcu: s/boost_kthread_mutex/kthread_mutex This mutex is currently protecting per node boost kthreads creation and affinity setting across CPU hotplug operations. Since the expedited kworkers will soon be split per node as well, they will be subject to the same concurrency constraints against hotplug. Therefore their creation and affinity tuning operations will be grouped with those of boost kthreads and then rely on the same mutex. To prepare for that, generalize its name. Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 2 +- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f2c10d351b59..cdb80835c469 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4918,7 +4918,7 @@ static void __init rcu_init_one(void) init_waitqueue_head(&rnp->exp_wq[2]); init_waitqueue_head(&rnp->exp_wq[3]); spin_lock_init(&rnp->exp_lock); - mutex_init(&rnp->boost_kthread_mutex); + mutex_init(&rnp->kthread_mutex); raw_spin_lock_init(&rnp->exp_poll_lock); rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED; INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e9821a8422db..13e7b0d907ab 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -113,7 +113,7 @@ struct rcu_node { /* side effect, not as a lock. */ unsigned long boost_time; /* When to start boosting (jiffies). */ - struct mutex boost_kthread_mutex; + struct mutex kthread_mutex; /* Exclusion for thread spawning and affinity */ /* manipulation. */ struct task_struct *boost_kthread_task; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 41021080ad25..0d307674915c 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1195,7 +1195,7 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) struct sched_param sp; struct task_struct *t; - mutex_lock(&rnp->boost_kthread_mutex); + mutex_lock(&rnp->kthread_mutex); if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) goto out; @@ -1212,7 +1212,7 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ out: - mutex_unlock(&rnp->boost_kthread_mutex); + mutex_unlock(&rnp->kthread_mutex); } /* @@ -1224,7 +1224,7 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) * no outgoing CPU. If there are no CPUs left in the affinity set, * this function allows the kthread to execute on any CPU. * - * Any future concurrent calls are serialized via ->boost_kthread_mutex. + * Any future concurrent calls are serialized via ->kthread_mutex. */ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { @@ -1237,7 +1237,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) return; if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) return; - mutex_lock(&rnp->boost_kthread_mutex); + mutex_lock(&rnp->kthread_mutex); mask = rcu_rnp_online_cpus(rnp); for_each_leaf_node_possible_cpu(rnp, cpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && @@ -1250,7 +1250,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) cpumask_clear_cpu(outgoingcpu, cm); } set_cpus_allowed_ptr(t, cm); - mutex_unlock(&rnp->boost_kthread_mutex); + mutex_unlock(&rnp->kthread_mutex); free_cpumask_var(cm); } -- cgit v1.2.3 From c19e5d3b497a3036f800edf751dc7814e3e887e1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:18 +0100 Subject: rcu/exp: Move expedited kthread worker creation functions above rcutree_prepare_cpu() The expedited kthread worker performing the per node initialization is going to be split into per node kthreads. As such, the future per node kthread creation will need to be called from CPU hotplug callbacks instead of an initcall, right beside the per node boost kthread creation. To prepare for that, move the kthread worker creation above rcutree_prepare_cpu() as a first step to make the review smoother for the upcoming modifications. No intended functional change. Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 96 +++++++++++++++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cdb80835c469..657ac12f9e27 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4394,6 +4394,54 @@ rcu_boot_init_percpu_data(int cpu) rcu_boot_init_nocb_percpu_data(rdp); } +#ifdef CONFIG_RCU_EXP_KTHREAD +struct kthread_worker *rcu_exp_gp_kworker; +struct kthread_worker *rcu_exp_par_gp_kworker; + +static void __init rcu_start_exp_gp_kworkers(void) +{ + const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker"; + const char *gp_kworker_name = "rcu_exp_gp_kthread_worker"; + struct sched_param param = { .sched_priority = kthread_prio }; + + rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); + if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { + pr_err("Failed to create %s!\n", gp_kworker_name); + rcu_exp_gp_kworker = NULL; + return; + } + + rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); + if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { + pr_err("Failed to create %s!\n", par_gp_kworker_name); + rcu_exp_par_gp_kworker = NULL; + kthread_destroy_worker(rcu_exp_gp_kworker); + rcu_exp_gp_kworker = NULL; + return; + } + + sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); + sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO, + ¶m); +} + +static inline void rcu_alloc_par_gp_wq(void) +{ +} +#else /* !CONFIG_RCU_EXP_KTHREAD */ +struct workqueue_struct *rcu_par_gp_wq; + +static void __init rcu_start_exp_gp_kworkers(void) +{ +} + +static inline void rcu_alloc_par_gp_wq(void) +{ + rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); + WARN_ON(!rcu_par_gp_wq); +} +#endif /* CONFIG_RCU_EXP_KTHREAD */ + /* * Invoked early in the CPU-online process, when pretty much all services * are available. The incoming CPU is not present. @@ -4730,54 +4778,6 @@ static int rcu_pm_notify(struct notifier_block *self, return NOTIFY_OK; } -#ifdef CONFIG_RCU_EXP_KTHREAD -struct kthread_worker *rcu_exp_gp_kworker; -struct kthread_worker *rcu_exp_par_gp_kworker; - -static void __init rcu_start_exp_gp_kworkers(void) -{ - const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker"; - const char *gp_kworker_name = "rcu_exp_gp_kthread_worker"; - struct sched_param param = { .sched_priority = kthread_prio }; - - rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); - if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { - pr_err("Failed to create %s!\n", gp_kworker_name); - rcu_exp_gp_kworker = NULL; - return; - } - - rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); - if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { - pr_err("Failed to create %s!\n", par_gp_kworker_name); - rcu_exp_par_gp_kworker = NULL; - kthread_destroy_worker(rcu_exp_gp_kworker); - rcu_exp_gp_kworker = NULL; - return; - } - - sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); - sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO, - ¶m); -} - -static inline void rcu_alloc_par_gp_wq(void) -{ -} -#else /* !CONFIG_RCU_EXP_KTHREAD */ -struct workqueue_struct *rcu_par_gp_wq; - -static void __init rcu_start_exp_gp_kworkers(void) -{ -} - -static inline void rcu_alloc_par_gp_wq(void) -{ - rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); - WARN_ON(!rcu_par_gp_wq); -} -#endif /* CONFIG_RCU_EXP_KTHREAD */ - /* * Spawn the kthreads that handle RCU's grace periods. */ -- cgit v1.2.3 From 8e5e621566485a3e160c0d8bfba206cb1d6b980d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:19 +0100 Subject: rcu/exp: Make parallel exp gp kworker per rcu node When CONFIG_RCU_EXP_KTHREAD=n, the expedited grace period per node initialization is performed in parallel via workqueues (one work per node). However in CONFIG_RCU_EXP_KTHREAD=y, this per node initialization is performed by a single kworker serializing each node initialization (one work for all nodes). The second part is certainly less scalable and efficient beyond a single leaf node. To improve this, expand this single kworker into per-node kworkers. This new layout is eventually intended to remove the workqueues based implementation since it will essentially now become duplicate code. Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/rcu.h | 1 - kernel/rcu/tree.c | 61 ++++++++++++++++++++++++++++++++---------------- kernel/rcu/tree.h | 3 +++ kernel/rcu/tree_exp.h | 10 ++++---- kernel/rcu/tree_plugin.h | 10 +++----- 5 files changed, 52 insertions(+), 33 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index f94f65877f2b..6beaf70d629f 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -625,7 +625,6 @@ void rcu_force_quiescent_state(void); extern struct workqueue_struct *rcu_gp_wq; #ifdef CONFIG_RCU_EXP_KTHREAD extern struct kthread_worker *rcu_exp_gp_kworker; -extern struct kthread_worker *rcu_exp_par_gp_kworker; #else /* !CONFIG_RCU_EXP_KTHREAD */ extern struct workqueue_struct *rcu_par_gp_wq; #endif /* CONFIG_RCU_EXP_KTHREAD */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 657ac12f9e27..398c099d45d9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4396,33 +4396,39 @@ rcu_boot_init_percpu_data(int cpu) #ifdef CONFIG_RCU_EXP_KTHREAD struct kthread_worker *rcu_exp_gp_kworker; -struct kthread_worker *rcu_exp_par_gp_kworker; -static void __init rcu_start_exp_gp_kworkers(void) +static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) { - const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker"; - const char *gp_kworker_name = "rcu_exp_gp_kthread_worker"; + struct kthread_worker *kworker; + const char *name = "rcu_exp_par_gp_kthread_worker/%d"; struct sched_param param = { .sched_priority = kthread_prio }; + int rnp_index = rnp - rcu_get_root(); - rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); - if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { - pr_err("Failed to create %s!\n", gp_kworker_name); - rcu_exp_gp_kworker = NULL; + if (rnp->exp_kworker) + return; + + kworker = kthread_create_worker(0, name, rnp_index); + if (IS_ERR_OR_NULL(kworker)) { + pr_err("Failed to create par gp kworker on %d/%d\n", + rnp->grplo, rnp->grphi); return; } + WRITE_ONCE(rnp->exp_kworker, kworker); + sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); +} - rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); - if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { - pr_err("Failed to create %s!\n", par_gp_kworker_name); - rcu_exp_par_gp_kworker = NULL; - kthread_destroy_worker(rcu_exp_gp_kworker); +static void __init rcu_start_exp_gp_kworker(void) +{ + const char *name = "rcu_exp_gp_kthread_worker"; + struct sched_param param = { .sched_priority = kthread_prio }; + + rcu_exp_gp_kworker = kthread_create_worker(0, name); + if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { + pr_err("Failed to create %s!\n", name); rcu_exp_gp_kworker = NULL; return; } - sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); - sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO, - ¶m); } static inline void rcu_alloc_par_gp_wq(void) @@ -4431,7 +4437,11 @@ static inline void rcu_alloc_par_gp_wq(void) #else /* !CONFIG_RCU_EXP_KTHREAD */ struct workqueue_struct *rcu_par_gp_wq; -static void __init rcu_start_exp_gp_kworkers(void) +static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) +{ +} + +static void __init rcu_start_exp_gp_kworker(void) { } @@ -4442,6 +4452,17 @@ static inline void rcu_alloc_par_gp_wq(void) } #endif /* CONFIG_RCU_EXP_KTHREAD */ +static void rcu_spawn_rnp_kthreads(struct rcu_node *rnp) +{ + if ((IS_ENABLED(CONFIG_RCU_EXP_KTHREAD) || + IS_ENABLED(CONFIG_RCU_BOOST)) && rcu_scheduler_fully_active) { + mutex_lock(&rnp->kthread_mutex); + rcu_spawn_one_boost_kthread(rnp); + rcu_spawn_exp_par_gp_kworker(rnp); + mutex_unlock(&rnp->kthread_mutex); + } +} + /* * Invoked early in the CPU-online process, when pretty much all services * are available. The incoming CPU is not present. @@ -4490,7 +4511,7 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - rcu_spawn_one_boost_kthread(rnp); + rcu_spawn_rnp_kthreads(rnp); rcu_spawn_cpu_nocb_kthread(cpu); WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1); @@ -4812,10 +4833,10 @@ static int __init rcu_spawn_gp_kthread(void) * due to rcu_scheduler_fully_active. */ rcu_spawn_cpu_nocb_kthread(smp_processor_id()); - rcu_spawn_one_boost_kthread(rdp->mynode); + rcu_spawn_rnp_kthreads(rdp->mynode); rcu_spawn_core_kthreads(); /* Create kthread worker for expedited GPs */ - rcu_start_exp_gp_kworkers(); + rcu_start_exp_gp_kworker(); return 0; } early_initcall(rcu_spawn_gp_kthread); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 13e7b0d907ab..e173808f486f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -72,6 +72,9 @@ struct rcu_node { /* Online CPUs for next expedited GP. */ /* Any CPU that has ever been online will */ /* have its bit set. */ + struct kthread_worker *exp_kworker; + /* Workers performing per node expedited GP */ + /* initialization. */ unsigned long cbovldmask; /* CPUs experiencing callback overload. */ unsigned long ffmask; /* Fully functional CPUs. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6123a60d9a4d..0318a8a062d5 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -432,9 +432,9 @@ static inline bool rcu_exp_worker_started(void) return !!READ_ONCE(rcu_exp_gp_kworker); } -static inline bool rcu_exp_par_worker_started(void) +static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp) { - return !!READ_ONCE(rcu_exp_par_gp_kworker); + return !!READ_ONCE(rnp->exp_kworker); } static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) @@ -445,7 +445,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) * another work item on the same kthread worker can result in * deadlock. */ - kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work); + kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work); } static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) @@ -487,7 +487,7 @@ static inline bool rcu_exp_worker_started(void) return !!READ_ONCE(rcu_gp_wq); } -static inline bool rcu_exp_par_worker_started(void) +static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp) { return !!READ_ONCE(rcu_par_gp_wq); } @@ -550,7 +550,7 @@ static void sync_rcu_exp_select_cpus(void) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - if (!rcu_exp_par_worker_started() || + if (!rcu_exp_par_worker_started(rnp) || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { /* No worker started yet or last leaf, do direct call. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 0d307674915c..09bdd36ca9ff 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) struct sched_param sp; struct task_struct *t; - mutex_lock(&rnp->kthread_mutex); - if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) - goto out; + if (rnp->boost_kthread_task) + return; t = kthread_create(rcu_boost_kthread, (void *)rnp, "rcub/%d", rnp_index); if (WARN_ON_ONCE(IS_ERR(t))) - goto out; + return; raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->boost_kthread_task = t; @@ -1210,9 +1209,6 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ - - out: - mutex_unlock(&rnp->kthread_mutex); } /* -- cgit v1.2.3 From b67cffcbbf9dc759d95d330a5af5d1480af2b1f1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:20 +0100 Subject: rcu/exp: Handle parallel exp gp kworkers affinity Affine the parallel expedited gp kworkers to their respective RCU node in order to make them close to the cache their are playing with. This reuses the boost kthreads machinery that probe into CPU hotplug operations such that the kthreads become/stay affine to their respective node as soon/long as they contain online CPUs. Otherwise and if the current CPU going down was the last online on the leaf node, the related kthread is affine to the housekeeping CPUs. In the long run, this affinity VS CPU hotplug operation game should probably be implemented at the generic kthread level. Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney [boqun: s/* rcu_boost_task/*rcu_boost_task as reported by checkpatch] Signed-off-by: Boqun Feng --- kernel/rcu/tree.c | 79 +++++++++++++++++++++++++++++++++++++++++++++--- kernel/rcu/tree_plugin.h | 42 +++---------------------- 2 files changed, 78 insertions(+), 43 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 398c099d45d9..312c4c5d4509 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -145,7 +145,7 @@ static int rcu_scheduler_fully_active __read_mostly; static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp, unsigned long gps, unsigned long flags); -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); +static struct task_struct *rcu_boost_task(struct rcu_node *rnp); static void invoke_rcu_core(void); static void rcu_report_exp_rdp(struct rcu_data *rdp); static void sync_sched_exp_online_cleanup(int cpu); @@ -4417,6 +4417,16 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); } +static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp) +{ + struct kthread_worker *kworker = READ_ONCE(rnp->exp_kworker); + + if (!kworker) + return NULL; + + return kworker->task; +} + static void __init rcu_start_exp_gp_kworker(void) { const char *name = "rcu_exp_gp_kthread_worker"; @@ -4441,6 +4451,11 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) { } +static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp) +{ + return NULL; +} + static void __init rcu_start_exp_gp_kworker(void) { } @@ -4519,13 +4534,67 @@ int rcutree_prepare_cpu(unsigned int cpu) } /* - * Update RCU priority boot kthread affinity for CPU-hotplug changes. + * Update kthreads affinity during CPU-hotplug changes. + * + * Set the per-rcu_node kthread's affinity to cover all CPUs that are + * served by the rcu_node in question. The CPU hotplug lock is still + * held, so the value of rnp->qsmaskinit will be stable. + * + * We don't include outgoingcpu in the affinity set, use -1 if there is + * no outgoing CPU. If there are no CPUs left in the affinity set, + * this function allows the kthread to execute on any CPU. + * + * Any future concurrent calls are serialized via ->kthread_mutex. */ -static void rcutree_affinity_setting(unsigned int cpu, int outgoing) +static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu) { - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + cpumask_var_t cm; + unsigned long mask; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct task_struct *task_boost, *task_exp; + + if (!IS_ENABLED(CONFIG_RCU_EXP_KTHREAD) && !IS_ENABLED(CONFIG_RCU_BOOST)) + return; + + rdp = per_cpu_ptr(&rcu_data, cpu); + rnp = rdp->mynode; + + task_boost = rcu_boost_task(rnp); + task_exp = rcu_exp_par_gp_task(rnp); + + /* + * If CPU is the boot one, those tasks are created later from early + * initcall since kthreadd must be created first. + */ + if (!task_boost && !task_exp) + return; + + if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) + return; + + mutex_lock(&rnp->kthread_mutex); + mask = rcu_rnp_online_cpus(rnp); + for_each_leaf_node_possible_cpu(rnp, cpu) + if ((mask & leaf_node_cpu_bit(rnp, cpu)) && + cpu != outgoingcpu) + cpumask_set_cpu(cpu, cm); + cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU)); + if (cpumask_empty(cm)) { + cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU)); + if (outgoingcpu >= 0) + cpumask_clear_cpu(outgoingcpu, cm); + } + + if (task_exp) + set_cpus_allowed_ptr(task_exp, cm); + + if (task_boost) + set_cpus_allowed_ptr(task_boost, cm); + + mutex_unlock(&rnp->kthread_mutex); - rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); + free_cpumask_var(cm); } /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 09bdd36ca9ff..36a8b5dbf5b5 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1211,43 +1211,9 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ } -/* - * Set the per-rcu_node kthread's affinity to cover all CPUs that are - * served by the rcu_node in question. The CPU hotplug lock is still - * held, so the value of rnp->qsmaskinit will be stable. - * - * We don't include outgoingcpu in the affinity set, use -1 if there is - * no outgoing CPU. If there are no CPUs left in the affinity set, - * this function allows the kthread to execute on any CPU. - * - * Any future concurrent calls are serialized via ->kthread_mutex. - */ -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +static struct task_struct *rcu_boost_task(struct rcu_node *rnp) { - struct task_struct *t = rnp->boost_kthread_task; - unsigned long mask; - cpumask_var_t cm; - int cpu; - - if (!t) - return; - if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) - return; - mutex_lock(&rnp->kthread_mutex); - mask = rcu_rnp_online_cpus(rnp); - for_each_leaf_node_possible_cpu(rnp, cpu) - if ((mask & leaf_node_cpu_bit(rnp, cpu)) && - cpu != outgoingcpu) - cpumask_set_cpu(cpu, cm); - cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU)); - if (cpumask_empty(cm)) { - cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU)); - if (outgoingcpu >= 0) - cpumask_clear_cpu(outgoingcpu, cm); - } - set_cpus_allowed_ptr(t, cm); - mutex_unlock(&rnp->kthread_mutex); - free_cpumask_var(cm); + return READ_ONCE(rnp->boost_kthread_task); } #else /* #ifdef CONFIG_RCU_BOOST */ @@ -1266,10 +1232,10 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) { } -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +static struct task_struct *rcu_boost_task(struct rcu_node *rnp) { + return NULL; } - #endif /* #else #ifdef CONFIG_RCU_BOOST */ /* -- cgit v1.2.3 From 23da2ad64dbe9f3fab10af90484fe41e144337b1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Jan 2024 16:46:21 +0100 Subject: rcu/exp: Remove rcu_par_gp_wq TREE04 running on short iterations can produce writer stalls of the following kind: ??? Writer stall state RTWS_EXP_SYNC(4) g3968 f0x0 ->state 0x2 cpu 0 task:rcu_torture_wri state:D stack:14568 pid:83 ppid:2 flags:0x00004000 Call Trace: __schedule+0x2de/0x850 ? trace_event_raw_event_rcu_exp_funnel_lock+0x6d/0xb0 schedule+0x4f/0x90 synchronize_rcu_expedited+0x430/0x670 ? __pfx_autoremove_wake_function+0x10/0x10 ? __pfx_synchronize_rcu_expedited+0x10/0x10 do_rtws_sync.constprop.0+0xde/0x230 rcu_torture_writer+0x4b4/0xcd0 ? __pfx_rcu_torture_writer+0x10/0x10 kthread+0xc7/0xf0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2f/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Waiting for an expedited grace period and polling for an expedited grace period both are operations that internally rely on the same workqueue performing necessary asynchronous work. However, a dependency chain is involved between those two operations, as depicted below: ====== CPU 0 ======= ====== CPU 1 ======= synchronize_rcu_expedited() exp_funnel_lock() mutex_lock(&rcu_state.exp_mutex); start_poll_synchronize_rcu_expedited queue_work(rcu_gp_wq, &rnp->exp_poll_wq); synchronize_rcu_expedited_queue_work() queue_work(rcu_gp_wq, &rew->rew_work); wait_event() // A, wait for &rew->rew_work completion mutex_unlock() // B //======> switch to kworker sync_rcu_do_polled_gp() { synchronize_rcu_expedited() exp_funnel_lock() mutex_lock(&rcu_state.exp_mutex); // C, wait B .... } // D Since workqueues are usually implemented on top of several kworkers handling the queue concurrently, the above situation wouldn't deadlock most of the time because A then doesn't depend on D. But in case of memory stress, a single kworker may end up handling alone all the works in a serialized way. In that case the above layout becomes a problem because A then waits for D, closing a circular dependency: A -> D -> C -> B -> A This however only happens when CONFIG_RCU_EXP_KTHREAD=n. Indeed synchronize_rcu_expedited() is otherwise implemented on top of a kthread worker while polling still relies on rcu_gp_wq workqueue, breaking the above circular dependency chain. Fix this with making expedited grace period to always rely on kthread worker. The workqueue based implementation is essentially a duplicate anyway now that the per-node initialization is performed by per-node kthread workers. Meanwhile the CONFIG_RCU_EXP_KTHREAD switch is still kept around to manage the scheduler policy of these kthread workers. Reported-by: Anna-Maria Behnsen Reported-by: Thomas Gleixner Suggested-by: Joel Fernandes Suggested-by: Paul E. McKenney Suggested-by: Neeraj upadhyay Signed-off-by: Frederic Weisbecker Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/rcu.h | 4 --- kernel/rcu/tree.c | 40 +++++----------------------- kernel/rcu/tree.h | 6 +---- kernel/rcu/tree_exp.h | 73 +-------------------------------------------------- 4 files changed, 8 insertions(+), 115 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 6beaf70d629f..99032b9cb667 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -623,11 +623,7 @@ int rcu_get_gp_kthreads_prio(void); void rcu_fwd_progress_check(unsigned long j); void rcu_force_quiescent_state(void); extern struct workqueue_struct *rcu_gp_wq; -#ifdef CONFIG_RCU_EXP_KTHREAD extern struct kthread_worker *rcu_exp_gp_kworker; -#else /* !CONFIG_RCU_EXP_KTHREAD */ -extern struct workqueue_struct *rcu_par_gp_wq; -#endif /* CONFIG_RCU_EXP_KTHREAD */ void rcu_gp_slow_register(atomic_t *rgssp); void rcu_gp_slow_unregister(atomic_t *rgssp); #endif /* #else #ifdef CONFIG_TINY_RCU */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 312c4c5d4509..9591c22408a1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4394,7 +4394,6 @@ rcu_boot_init_percpu_data(int cpu) rcu_boot_init_nocb_percpu_data(rdp); } -#ifdef CONFIG_RCU_EXP_KTHREAD struct kthread_worker *rcu_exp_gp_kworker; static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) @@ -4414,7 +4413,9 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) return; } WRITE_ONCE(rnp->exp_kworker, kworker); - sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); + + if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD)) + sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); } static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp) @@ -4438,39 +4439,14 @@ static void __init rcu_start_exp_gp_kworker(void) rcu_exp_gp_kworker = NULL; return; } - sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); -} - -static inline void rcu_alloc_par_gp_wq(void) -{ -} -#else /* !CONFIG_RCU_EXP_KTHREAD */ -struct workqueue_struct *rcu_par_gp_wq; - -static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) -{ -} - -static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp) -{ - return NULL; -} - -static void __init rcu_start_exp_gp_kworker(void) -{ -} -static inline void rcu_alloc_par_gp_wq(void) -{ - rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); - WARN_ON(!rcu_par_gp_wq); + if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD)) + sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); } -#endif /* CONFIG_RCU_EXP_KTHREAD */ static void rcu_spawn_rnp_kthreads(struct rcu_node *rnp) { - if ((IS_ENABLED(CONFIG_RCU_EXP_KTHREAD) || - IS_ENABLED(CONFIG_RCU_BOOST)) && rcu_scheduler_fully_active) { + if (rcu_scheduler_fully_active) { mutex_lock(&rnp->kthread_mutex); rcu_spawn_one_boost_kthread(rnp); rcu_spawn_exp_par_gp_kworker(rnp); @@ -4554,9 +4530,6 @@ static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu) struct rcu_node *rnp; struct task_struct *task_boost, *task_exp; - if (!IS_ENABLED(CONFIG_RCU_EXP_KTHREAD) && !IS_ENABLED(CONFIG_RCU_BOOST)) - return; - rdp = per_cpu_ptr(&rcu_data, cpu); rnp = rdp->mynode; @@ -5245,7 +5218,6 @@ void __init rcu_init(void) /* Create workqueue for Tree SRCU and for expedited GPs. */ rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); WARN_ON(!rcu_gp_wq); - rcu_alloc_par_gp_wq(); /* Fill in default value for rcutree.qovld boot parameter. */ /* -After- the rcu_node ->lock fields are initialized! */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e173808f486f..f35e47f24d80 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -21,14 +21,10 @@ #include "rcu_segcblist.h" -/* Communicate arguments to a workqueue handler. */ +/* Communicate arguments to a kthread worker handler. */ struct rcu_exp_work { unsigned long rew_s; -#ifdef CONFIG_RCU_EXP_KTHREAD struct kthread_work rew_work; -#else - struct work_struct rew_work; -#endif /* CONFIG_RCU_EXP_KTHREAD */ }; /* RCU's kthread states for tracing. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 0318a8a062d5..6b83537480b1 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -418,7 +418,6 @@ retry_ipi: static void rcu_exp_sel_wait_wake(unsigned long s); -#ifdef CONFIG_RCU_EXP_KTHREAD static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) { struct rcu_exp_work *rewp = @@ -470,69 +469,6 @@ static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work); } -static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew) -{ -} -#else /* !CONFIG_RCU_EXP_KTHREAD */ -static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) -{ - struct rcu_exp_work *rewp = - container_of(wp, struct rcu_exp_work, rew_work); - - __sync_rcu_exp_select_node_cpus(rewp); -} - -static inline bool rcu_exp_worker_started(void) -{ - return !!READ_ONCE(rcu_gp_wq); -} - -static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp) -{ - return !!READ_ONCE(rcu_par_gp_wq); -} - -static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) -{ - int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1); - - INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); - /* If all offline, queue the work on an unbound CPU. */ - if (unlikely(cpu > rnp->grphi - rnp->grplo)) - cpu = WORK_CPU_UNBOUND; - else - cpu += rnp->grplo; - queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); -} - -static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) -{ - flush_work(&rnp->rew.rew_work); -} - -/* - * Work-queue handler to drive an expedited grace period forward. - */ -static void wait_rcu_exp_gp(struct work_struct *wp) -{ - struct rcu_exp_work *rewp; - - rewp = container_of(wp, struct rcu_exp_work, rew_work); - rcu_exp_sel_wait_wake(rewp->rew_s); -} - -static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew) -{ - INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp); - queue_work(rcu_gp_wq, &rew->rew_work); -} - -static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew) -{ - destroy_work_on_stack(&rew->rew_work); -} -#endif /* CONFIG_RCU_EXP_KTHREAD */ - /* * Select the nodes that the upcoming expedited grace period needs * to wait for. @@ -965,7 +901,6 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) */ void synchronize_rcu_expedited(void) { - bool use_worker; unsigned long flags; struct rcu_exp_work rew; struct rcu_node *rnp; @@ -976,9 +911,6 @@ void synchronize_rcu_expedited(void) lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu_expedited() in RCU read-side critical section"); - use_worker = (rcu_scheduler_active != RCU_SCHEDULER_INIT) && - rcu_exp_worker_started(); - /* Is the state is such that the call is a grace period? */ if (rcu_blocking_is_gp()) { // Note well that this code runs with !PREEMPT && !SMP. @@ -1008,7 +940,7 @@ void synchronize_rcu_expedited(void) return; /* Someone else did our work for us. */ /* Ensure that load happens before action based on it. */ - if (unlikely(!use_worker)) { + if (unlikely((rcu_scheduler_active == RCU_SCHEDULER_INIT) || !rcu_exp_worker_started())) { /* Direct call during scheduler init and early_initcalls(). */ rcu_exp_sel_wait_wake(s); } else { @@ -1025,9 +957,6 @@ void synchronize_rcu_expedited(void) /* Let the next expedited grace period start. */ mutex_unlock(&rcu_state.exp_mutex); - - if (likely(use_worker)) - synchronize_rcu_expedited_destroy_work(&rew); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 7f66f099de4dc4b1a66a3f94e6db16409924a6f8 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Sun, 3 Dec 2023 01:12:52 +0000 Subject: rcu: Provide a boot time parameter to control lazy RCU To allow more flexible arrangements while still provide a single kernel for distros, provide a boot time parameter to enable/disable lazy RCU. Specify: rcutree.enable_rcu_lazy=[y|1|n|0] Which also requires rcu_nocbs=all at boot time to enable/disable lazy RCU. To disable it by default at build time when CONFIG_RCU_LAZY=y, the new CONFIG_RCU_LAZY_DEFAULT_OFF can be used. Signed-off-by: Qais Yousef (Google) Tested-by: Andrea Righi Reviewed-by: Paul E. McKenney Signed-off-by: Boqun Feng --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ kernel/rcu/Kconfig | 13 +++++++++++++ kernel/rcu/tree.c | 7 ++++++- 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 31b3a25680d0..b6c848c29a53 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5034,6 +5034,11 @@ this kernel boot parameter, forcibly setting it to zero. + rcutree.enable_rcu_lazy= [KNL] + To save power, batch RCU callbacks and flush after + delay, memory pressure or callback list growing too + big. + rcuscale.gp_async= [KNL] Measure performance of asynchronous grace-period primitives such as call_rcu(). diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index bdd7eadb33d8..e7d2dd267593 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -314,6 +314,19 @@ config RCU_LAZY To save power, batch RCU callbacks and flush after delay, memory pressure, or callback list growing too big. + Requires rcu_nocbs=all to be set. + + Use rcutree.enable_rcu_lazy=0 to turn it off at boot time. + +config RCU_LAZY_DEFAULT_OFF + bool "Turn RCU lazy invocation off by default" + depends on RCU_LAZY + default n + help + Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default + off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch + it back on. + config RCU_DOUBLE_CHECK_CB_TIME bool "RCU callback-batch backup time check" depends on RCU_EXPERT diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b2bccfd37c38..41c50a6c607e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2753,6 +2753,9 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) } #ifdef CONFIG_RCU_LAZY +static bool enable_rcu_lazy __read_mostly = !IS_ENABLED(CONFIG_RCU_LAZY_DEFAULT_OFF); +module_param(enable_rcu_lazy, bool, 0444); + /** * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and * flush all lazy callbacks (including the new one) to the main ->cblist while @@ -2778,6 +2781,8 @@ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) __call_rcu_common(head, func, false); } EXPORT_SYMBOL_GPL(call_rcu_hurry); +#else +#define enable_rcu_lazy false #endif /** @@ -2826,7 +2831,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry); */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { - __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); + __call_rcu_common(head, func, enable_rcu_lazy); } EXPORT_SYMBOL_GPL(call_rcu); -- cgit v1.2.3 From 30ef09635b9ed3ebca4f677495332a2e444a5cda Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 22 Feb 2024 12:29:54 -0800 Subject: rcu-tasks: Initialize callback lists at rcu_init() time In order for RCU Tasks to reliably maintain per-CPU lists of exiting tasks, those lists must be initialized before it is possible for tasks to exit, especially given that the boot CPU is not necessarily CPU 0 (an example being, powerpc kexec() kernels). And at the time that rcu_init_tasks_generic() is called, a task could potentially exit, unconventional though that sort of thing might be. This commit therefore moves the calls to cblist_init_generic() from functions called from rcu_init_tasks_generic() to a new function named tasks_cblist_init_generic() that is invoked from rcu_init(). This constituted a bug in a commit that never went to mainline, so there is no need for any backporting to -stable. Reported-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Signed-off-by: Boqun Feng --- kernel/rcu/rcu.h | 6 ++++++ kernel/rcu/tasks.h | 24 ++++++++++++++++++------ kernel/rcu/tiny.c | 1 + kernel/rcu/tree.c | 2 ++ 4 files changed, 27 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index f94f65877f2b..ef63ea59c8b6 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -528,6 +528,12 @@ struct task_struct *get_rcu_tasks_gp_kthread(void); struct task_struct *get_rcu_tasks_rude_gp_kthread(void); #endif // # ifdef CONFIG_TASKS_RUDE_RCU +#ifdef CONFIG_TASKS_RCU_GENERIC +void tasks_cblist_init_generic(void); +#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ +static inline void tasks_cblist_init_generic(void) { } +#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ + #define RCU_SCHEDULER_INACTIVE 0 #define RCU_SCHEDULER_INIT 1 #define RCU_SCHEDULER_RUNNING 2 diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index b7d5f2757053..6961a1b5b783 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -242,7 +242,6 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp) static void cblist_init_generic(struct rcu_tasks *rtp) { int cpu; - unsigned long flags; int lim; int shift; @@ -268,10 +267,8 @@ static void cblist_init_generic(struct rcu_tasks *rtp) WARN_ON_ONCE(!rtpcp); if (cpu) raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock)); - local_irq_save(flags); // serialize initialization if (rcu_segcblist_empty(&rtpcp->cblist)) rcu_segcblist_init(&rtpcp->cblist); - local_irq_restore(flags); INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq); rtpcp->cpu = cpu; rtpcp->rtpp = rtp; @@ -1120,7 +1117,6 @@ module_param(rcu_tasks_lazy_ms, int, 0444); static int __init rcu_spawn_tasks_kthread(void) { - cblist_init_generic(&rcu_tasks); rcu_tasks.gp_sleep = HZ / 10; rcu_tasks.init_fract = HZ / 10; if (rcu_tasks_lazy_ms >= 0) @@ -1284,7 +1280,6 @@ module_param(rcu_tasks_rude_lazy_ms, int, 0444); static int __init rcu_spawn_tasks_rude_kthread(void) { - cblist_init_generic(&rcu_tasks_rude); rcu_tasks_rude.gp_sleep = HZ / 10; if (rcu_tasks_rude_lazy_ms >= 0) rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms); @@ -1916,7 +1911,6 @@ module_param(rcu_tasks_trace_lazy_ms, int, 0444); static int __init rcu_spawn_tasks_trace_kthread(void) { - cblist_init_generic(&rcu_tasks_trace); if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) { rcu_tasks_trace.gp_sleep = HZ / 10; rcu_tasks_trace.init_fract = HZ / 10; @@ -2088,6 +2082,24 @@ late_initcall(rcu_tasks_verify_schedule_work); static void rcu_tasks_initiate_self_tests(void) { } #endif /* #else #ifdef CONFIG_PROVE_RCU */ +void __init tasks_cblist_init_generic(void) +{ + lockdep_assert_irqs_disabled(); + WARN_ON(num_online_cpus() > 1); + +#ifdef CONFIG_TASKS_RCU + cblist_init_generic(&rcu_tasks); +#endif + +#ifdef CONFIG_TASKS_RUDE_RCU + cblist_init_generic(&rcu_tasks_rude); +#endif + +#ifdef CONFIG_TASKS_TRACE_RCU + cblist_init_generic(&rcu_tasks_trace); +#endif +} + void __init rcu_init_tasks_generic(void) { #ifdef CONFIG_TASKS_RCU diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index fec804b79080..705c0d16850a 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -261,4 +261,5 @@ void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); rcu_early_boot_tests(); + tasks_cblist_init_generic(); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b2bccfd37c38..ba9137f39d14 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -5165,6 +5165,8 @@ void __init rcu_init(void) (void)start_poll_synchronize_rcu_expedited(); rcu_test_sync_prims(); + + tasks_cblist_init_generic(); } #include "tree_stall.h" -- cgit v1.2.3