From f4ecea309d3e17ba5e90082308125ad23bd5701b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Jul 2015 17:28:11 -0700 Subject: rcu: Use rsp->expedited_wq instead of sync_rcu_preempt_exp_wq Now that there is an ->expedited_wq waitqueue in each rcu_state structure, there is no need for the sync_rcu_preempt_exp_wq global variable. This commit therefore substitutes ->expedited_wq for sync_rcu_preempt_exp_wq. It also initializes ->expedited_wq only once at boot instead of at the start of each expedited grace period. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b2bf3963a0ae..72df006de798 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -535,8 +535,6 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); -static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); - /* * Return non-zero if there are any tasks in RCU read-side critical * sections blocking the current preemptible-RCU expedited grace period. @@ -590,7 +588,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, raw_spin_unlock_irqrestore(&rnp->lock, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - wake_up(&sync_rcu_preempt_exp_wq); + wake_up(&rsp->expedited_wq); } break; } @@ -729,7 +727,7 @@ void synchronize_rcu_expedited(void) /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); - wait_event(sync_rcu_preempt_exp_wq, + wait_event(rsp->expedited_wq, sync_rcu_preempt_exp_done(rnp)); /* Clean up and exit. */ -- cgit v1.2.3 From 7922cd0e562cb2b8da2c8a0afda2e1c9bb4a94e2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jul 2015 13:34:32 -0700 Subject: rcu: Move rcu_report_exp_rnp() to allow consolidation This is a nearly pure code-movement commit, moving rcu_report_exp_rnp(), sync_rcu_preempt_exp_done(), and rcu_preempted_readers_exp() so that later commits can make synchronize_sched_expedited() use them. The non-code-movement portion of this commit tags rcu_report_exp_rnp() as __maybe_unused to avoid build errors when CONFIG_PREEMPT=n. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcu/tree_plugin.h | 66 ------------------------------------------------ 2 files changed, 66 insertions(+), 66 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 53d66ebb4811..59af27d8bc6a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3379,6 +3379,72 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) return rcu_seq_done(&rsp->expedited_sequence, s); } +/* + * Return non-zero if there are any tasks in RCU read-side critical + * sections blocking the current preemptible-RCU expedited grace period. + * If there is no preemptible-RCU expedited grace period currently in + * progress, returns zero unconditionally. + */ +static int rcu_preempted_readers_exp(struct rcu_node *rnp) +{ + return rnp->exp_tasks != NULL; +} + +/* + * return non-zero if there is no RCU expedited grace period in progress + * for the specified rcu_node structure, in other words, if all CPUs and + * tasks covered by the specified rcu_node structure have done their bit + * for the current expedited grace period. Works only for preemptible + * RCU -- other RCU implementation use other means. + * + * Caller must hold the root rcu_node's exp_funnel_mutex. + */ +static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) +{ + return !rcu_preempted_readers_exp(rnp) && + READ_ONCE(rnp->expmask) == 0; +} + +/* + * Report the exit from RCU read-side critical section for the last task + * that queued itself during or before the current expedited preemptible-RCU + * grace period. This event is reported either to the rcu_node structure on + * which the task was queued or to one of that rcu_node structure's ancestors, + * recursively up the tree. (Calm down, calm down, we do the recursion + * iteratively!) + * + * Caller must hold the root rcu_node's exp_funnel_mutex. + */ +static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, + struct rcu_node *rnp, bool wake) +{ + unsigned long flags; + unsigned long mask; + + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); + for (;;) { + if (!sync_rcu_preempt_exp_done(rnp)) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + break; + } + if (rnp->parent == NULL) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (wake) { + smp_mb(); /* EGP done before wake_up(). */ + wake_up(&rsp->expedited_wq); + } + break; + } + mask = rnp->grpmask; + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ + rnp = rnp->parent; + raw_spin_lock(&rnp->lock); /* irqs already disabled */ + smp_mb__after_unlock_lock(); + rnp->expmask &= ~mask; + } +} + /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp, diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 72df006de798..e73be8539978 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -535,72 +535,6 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); -/* - * Return non-zero if there are any tasks in RCU read-side critical - * sections blocking the current preemptible-RCU expedited grace period. - * If there is no preemptible-RCU expedited grace period currently in - * progress, returns zero unconditionally. - */ -static int rcu_preempted_readers_exp(struct rcu_node *rnp) -{ - return rnp->exp_tasks != NULL; -} - -/* - * return non-zero if there is no RCU expedited grace period in progress - * for the specified rcu_node structure, in other words, if all CPUs and - * tasks covered by the specified rcu_node structure have done their bit - * for the current expedited grace period. Works only for preemptible - * RCU -- other RCU implementation use other means. - * - * Caller must hold the root rcu_node's exp_funnel_mutex. - */ -static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) -{ - return !rcu_preempted_readers_exp(rnp) && - READ_ONCE(rnp->expmask) == 0; -} - -/* - * Report the exit from RCU read-side critical section for the last task - * that queued itself during or before the current expedited preemptible-RCU - * grace period. This event is reported either to the rcu_node structure on - * which the task was queued or to one of that rcu_node structure's ancestors, - * recursively up the tree. (Calm down, calm down, we do the recursion - * iteratively!) - * - * Caller must hold the root rcu_node's exp_funnel_mutex. - */ -static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, - bool wake) -{ - unsigned long flags; - unsigned long mask; - - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - for (;;) { - if (!sync_rcu_preempt_exp_done(rnp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - break; - } - if (rnp->parent == NULL) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - if (wake) { - smp_mb(); /* EGP done before wake_up(). */ - wake_up(&rsp->expedited_wq); - } - break; - } - mask = rnp->grpmask; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ - rnp = rnp->parent; - raw_spin_lock(&rnp->lock); /* irqs already disabled */ - smp_mb__after_unlock_lock(); - rnp->expmask &= ~mask; - } -} - /* * Snapshot the tasks blocking the newly started preemptible-RCU expedited * grace period for the specified rcu_node structure, phase 1. If there -- cgit v1.2.3 From b9585e940a0d78770cda8f9aebf81b17b4d19e6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jul 2015 16:04:45 -0700 Subject: rcu: Consolidate tree setup for synchronize_rcu_expedited() This commit replaces sync_rcu_preempt_exp_init1(() and sync_rcu_preempt_exp_init2() with sync_exp_reset_tree_hotplug() and sync_exp_reset_tree(), which will also be used by synchronize_sched_expedited(), and sync_rcu_exp_select_nodes(), which contains code specific to synchronize_rcu_expedited(). Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 86 ++++++++++++++++++++++++++++++++++++++- kernel/rcu/tree.h | 17 +++++--- kernel/rcu/tree_plugin.h | 102 +++++++++-------------------------------------- 3 files changed, 115 insertions(+), 90 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 59af27d8bc6a..8526896afea7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3379,6 +3379,87 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) return rcu_seq_done(&rsp->expedited_sequence, s); } +/* + * Reset the ->expmaskinit values in the rcu_node tree to reflect any + * recent CPU-online activity. Note that these masks are not cleared + * when CPUs go offline, so they reflect the union of all CPUs that have + * ever been online. This means that this function normally takes its + * no-work-to-do fastpath. + */ +static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) +{ + bool done; + unsigned long flags; + unsigned long mask; + unsigned long oldmask; + int ncpus = READ_ONCE(rsp->ncpus); + struct rcu_node *rnp; + struct rcu_node *rnp_up; + + /* If no new CPUs onlined since last time, nothing to do. */ + if (likely(ncpus == rsp->ncpus_snap)) + return; + rsp->ncpus_snap = ncpus; + + /* + * Each pass through the following loop propagates newly onlined + * CPUs for the current rcu_node structure up the rcu_node tree. + */ + rcu_for_each_leaf_node(rsp, rnp) { + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); + if (rnp->expmaskinit == rnp->expmaskinitnext) { + raw_spin_unlock_irqrestore(&rnp->lock, flags); + continue; /* No new CPUs, nothing to do. */ + } + + /* Update this node's mask, track old value for propagation. */ + oldmask = rnp->expmaskinit; + rnp->expmaskinit = rnp->expmaskinitnext; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + + /* If was already nonzero, nothing to propagate. */ + if (oldmask) + continue; + + /* Propagate the new CPU up the tree. */ + mask = rnp->grpmask; + rnp_up = rnp->parent; + done = false; + while (rnp_up) { + raw_spin_lock_irqsave(&rnp_up->lock, flags); + smp_mb__after_unlock_lock(); + if (rnp_up->expmaskinit) + done = true; + rnp_up->expmaskinit |= mask; + raw_spin_unlock_irqrestore(&rnp_up->lock, flags); + if (done) + break; + mask = rnp_up->grpmask; + rnp_up = rnp_up->parent; + } + } +} + +/* + * Reset the ->expmask values in the rcu_node tree in preparation for + * a new expedited grace period. + */ +static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_node *rnp; + + sync_exp_reset_tree_hotplug(rsp); + rcu_for_each_node_breadth_first(rsp, rnp) { + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); + WARN_ON_ONCE(rnp->expmask); + rnp->expmask = rnp->expmaskinit; + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } +} + /* * Return non-zero if there are any tasks in RCU read-side critical * sections blocking the current preemptible-RCU expedited grace period. @@ -3971,7 +4052,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); - rdp->beenonline = 1; /* We have now been online. */ rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; @@ -3993,6 +4073,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) raw_spin_lock(&rnp->lock); /* irqs already disabled. */ smp_mb__after_unlock_lock(); rnp->qsmaskinitnext |= mask; + rnp->expmaskinitnext |= mask; + if (!rdp->beenonline) + WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1); + rdp->beenonline = true; /* We have now been online. */ rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; rdp->passed_quiesce = false; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 2e991f8361e4..a57f25ecca58 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -171,16 +171,21 @@ struct rcu_node { /* an rcu_data structure, otherwise, each */ /* bit corresponds to a child rcu_node */ /* structure. */ - unsigned long expmask; /* Groups that have ->blkd_tasks */ - /* elements that need to drain to allow the */ - /* current expedited grace period to */ - /* complete (only for PREEMPT_RCU). */ unsigned long qsmaskinit; - /* Per-GP initial value for qsmask & expmask. */ + /* Per-GP initial value for qsmask. */ /* Initialized from ->qsmaskinitnext at the */ /* beginning of each grace period. */ unsigned long qsmaskinitnext; /* Online CPUs for next grace period. */ + unsigned long expmask; /* CPUs or groups that need to check in */ + /* to allow the current expedited GP */ + /* to complete. */ + unsigned long expmaskinit; + /* Per-GP initial values for expmask. */ + /* Initialized from ->expmaskinitnext at the */ + /* beginning of each expedited GP. */ + unsigned long expmaskinitnext; + /* Online CPUs for next expedited GP. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ /* Only one bit will be set in this mask. */ int grplo; /* lowest-numbered CPU or group here. */ @@ -466,6 +471,7 @@ struct rcu_state { struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ void (*func)(struct rcu_head *head)); + int ncpus; /* # CPUs seen so far. */ /* The following fields are guarded by the root rcu_node's lock. */ @@ -508,6 +514,7 @@ struct rcu_state { atomic_long_t expedited_normal; /* # fallbacks to normal. */ atomic_t expedited_need_qs; /* # CPUs left to check in. */ wait_queue_head_t expedited_wq; /* Wait for check-ins. */ + int ncpus_snap; /* # CPUs seen last time. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e73be8539978..62d05413b7ba 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -536,86 +536,28 @@ void synchronize_rcu(void) EXPORT_SYMBOL_GPL(synchronize_rcu); /* - * Snapshot the tasks blocking the newly started preemptible-RCU expedited - * grace period for the specified rcu_node structure, phase 1. If there - * are such tasks, set the ->expmask bits up the rcu_node tree and also - * set the ->expmask bits on the leaf rcu_node structures to tell phase 2 - * that work is needed here. - * - * Caller must hold the root rcu_node's exp_funnel_mutex. + * Select the nodes that the upcoming expedited grace period needs + * to wait for. */ -static void -sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp) +static void sync_rcu_exp_select_nodes(struct rcu_state *rsp) { unsigned long flags; - unsigned long mask; - struct rcu_node *rnp_up; + struct rcu_node *rnp; - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - WARN_ON_ONCE(rnp->expmask); - WARN_ON_ONCE(rnp->exp_tasks); - if (!rcu_preempt_has_tasks(rnp)) { - /* No blocked tasks, nothing to do. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } - /* Call for Phase 2 and propagate ->expmask bits up the tree. */ - rnp->expmask = 1; - rnp_up = rnp; - while (rnp_up->parent) { - mask = rnp_up->grpmask; - rnp_up = rnp_up->parent; - if (rnp_up->expmask & mask) - break; - raw_spin_lock(&rnp_up->lock); /* irqs already off */ + sync_exp_reset_tree(rsp); + rcu_for_each_leaf_node(rsp, rnp) { + raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - rnp_up->expmask |= mask; - raw_spin_unlock(&rnp_up->lock); /* irqs still off */ - } - raw_spin_unlock_irqrestore(&rnp->lock, flags); -} - -/* - * Snapshot the tasks blocking the newly started preemptible-RCU expedited - * grace period for the specified rcu_node structure, phase 2. If the - * leaf rcu_node structure has its ->expmask field set, check for tasks. - * If there are some, clear ->expmask and set ->exp_tasks accordingly, - * then initiate RCU priority boosting. Otherwise, clear ->expmask and - * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits, - * enabling rcu_read_unlock_special() to do the bit-clearing. - * - * Caller must hold the root rcu_node's exp_funnel_mutex. - */ -static void -sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - if (!rnp->expmask) { - /* Phase 1 didn't do anything, so Phase 2 doesn't either. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } - - /* Phase 1 is over. */ - rnp->expmask = 0; - - /* - * If there are still blocked tasks, set up ->exp_tasks so that - * rcu_read_unlock_special() will wake us and then boost them. - */ - if (rcu_preempt_has_tasks(rnp)) { - rnp->exp_tasks = rnp->blkd_tasks.next; - rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ - return; + rnp->expmask = 0; /* No per-CPU component yet. */ + if (!rcu_preempt_has_tasks(rnp)) { + /* FIXME: Want __rcu_report_exp_rnp() here. */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } else { + rnp->exp_tasks = rnp->blkd_tasks.next; + rcu_initiate_boost(rnp, flags); + } + rcu_report_exp_rnp(rsp, rnp, false); } - - /* No longer any blocked tasks, so undo bit setting. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - rcu_report_exp_rnp(rsp, rnp, false); } /** @@ -648,16 +590,8 @@ void synchronize_rcu_expedited(void) /* force all RCU readers onto ->blkd_tasks lists. */ synchronize_sched_expedited(); - /* - * Snapshot current state of ->blkd_tasks lists into ->expmask. - * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special() - * to start clearing them. Doing this in one phase leads to - * strange races between setting and clearing bits, so just say "no"! - */ - rcu_for_each_leaf_node(rsp, rnp) - sync_rcu_preempt_exp_init1(rsp, rnp); - rcu_for_each_leaf_node(rsp, rnp) - sync_rcu_preempt_exp_init2(rsp, rnp); + /* Initialize the rcu_node tree in preparation for the wait. */ + sync_rcu_exp_select_nodes(rsp); /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); -- cgit v1.2.3 From 8203d6d0ee784cfb2ebf89053f7fe399abc867d7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 2 Aug 2015 13:53:17 -0700 Subject: rcu: Use single-stage IPI algorithm for RCU expedited grace period The current preemptible-RCU expedited grace-period algorithm invokes synchronize_sched_expedited() to enqueue all tasks currently running in a preemptible-RCU read-side critical section, then waits for all the ->blkd_tasks lists to drain. This works, but results in both an IPI and a double context switch even on CPUs that do not happen to be running in a preemptible RCU read-side critical section. This commit implements a new algorithm that causes less OS jitter. This new algorithm IPIs all online CPUs that are not idle (from an RCU perspective), but refrains from self-IPIs. If a CPU receiving this IPI is not in a preemptible RCU read-side critical section (or is just now exiting one), it pushes quiescence up the rcu_node tree, otherwise, it sets a flag that will be handled by the upcoming outermost rcu_read_unlock(), which will then push quiescence up the tree. The expedited grace period must of course wait on any pre-existing blocked readers, and newly blocked readers must be queued carefully based on the state of both the normal and the expedited grace periods. This new queueing approach also avoids the need to update boost state, courtesy of the fact that blocked tasks are no longer ever migrated to the root rcu_node structure. Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 10 +- kernel/rcu/tree.c | 75 ++++++++---- kernel/rcu/tree_plugin.h | 296 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 311 insertions(+), 70 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index a4ab9daa387c..7fa8c4d372e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1330,10 +1330,12 @@ struct sched_dl_entity { union rcu_special { struct { - bool blocked; - bool need_qs; - } b; - short s; + u8 blocked; + u8 need_qs; + u8 exp_need_qs; + u8 pad; /* Otherwise the compiler can store garbage here. */ + } b; /* Bits. */ + u32 s; /* Set of bits. */ }; struct rcu_node; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8526896afea7..6e92bf4337bd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3461,18 +3461,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) } /* - * Return non-zero if there are any tasks in RCU read-side critical - * sections blocking the current preemptible-RCU expedited grace period. - * If there is no preemptible-RCU expedited grace period currently in - * progress, returns zero unconditionally. - */ -static int rcu_preempted_readers_exp(struct rcu_node *rnp) -{ - return rnp->exp_tasks != NULL; -} - -/* - * return non-zero if there is no RCU expedited grace period in progress + * Return non-zero if there is no RCU expedited grace period in progress * for the specified rcu_node structure, in other words, if all CPUs and * tasks covered by the specified rcu_node structure have done their bit * for the current expedited grace period. Works only for preemptible @@ -3482,7 +3471,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp) */ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) { - return !rcu_preempted_readers_exp(rnp) && + return rnp->exp_tasks == NULL && READ_ONCE(rnp->expmask) == 0; } @@ -3494,19 +3483,21 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) * recursively up the tree. (Calm down, calm down, we do the recursion * iteratively!) * - * Caller must hold the root rcu_node's exp_funnel_mutex. + * Caller must hold the root rcu_node's exp_funnel_mutex and the + * specified rcu_node structure's ->lock. */ -static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, - struct rcu_node *rnp, bool wake) +static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, + bool wake, unsigned long flags) + __releases(rnp->lock) { - unsigned long flags; unsigned long mask; - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); for (;;) { if (!sync_rcu_preempt_exp_done(rnp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (!rnp->expmask) + rcu_initiate_boost(rnp, flags); + else + raw_spin_unlock_irqrestore(&rnp->lock, flags); break; } if (rnp->parent == NULL) { @@ -3522,10 +3513,54 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, rnp = rnp->parent; raw_spin_lock(&rnp->lock); /* irqs already disabled */ smp_mb__after_unlock_lock(); + WARN_ON_ONCE(!(rnp->expmask & mask)); rnp->expmask &= ~mask; } } +/* + * Report expedited quiescent state for specified node. This is a + * lock-acquisition wrapper function for __rcu_report_exp_rnp(). + * + * Caller must hold the root rcu_node's exp_funnel_mutex. + */ +static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, + struct rcu_node *rnp, bool wake) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); + __rcu_report_exp_rnp(rsp, rnp, wake, flags); +} + +/* + * Report expedited quiescent state for multiple CPUs, all covered by the + * specified leaf rcu_node structure. Caller must hold the root + * rcu_node's exp_funnel_mutex. + */ +static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, + unsigned long mask, bool wake) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rnp->lock, flags); + smp_mb__after_unlock_lock(); + WARN_ON_ONCE((rnp->expmask & mask) != mask); + rnp->expmask &= ~mask; + __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */ +} + +/* + * Report expedited quiescent state for specified rcu_data (CPU). + * Caller must hold the root rcu_node's exp_funnel_mutex. + */ +static void __maybe_unused rcu_report_exp_rdp(struct rcu_state *rsp, + struct rcu_data *rdp, bool wake) +{ + rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake); +} + /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp, diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 62d05413b7ba..6f7500f9387c 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -101,7 +101,6 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); static struct rcu_state *const rcu_state_p = &rcu_preempt_state; static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data; -static int rcu_preempted_readers_exp(struct rcu_node *rnp); static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake); @@ -114,6 +113,147 @@ static void __init rcu_bootup_announce(void) rcu_bootup_announce_oddness(); } +/* Flags for rcu_preempt_ctxt_queue() decision table. */ +#define RCU_GP_TASKS 0x8 +#define RCU_EXP_TASKS 0x4 +#define RCU_GP_BLKD 0x2 +#define RCU_EXP_BLKD 0x1 + +/* + * Queues a task preempted within an RCU-preempt read-side critical + * section into the appropriate location within the ->blkd_tasks list, + * depending on the states of any ongoing normal and expedited grace + * periods. The ->gp_tasks pointer indicates which element the normal + * grace period is waiting on (NULL if none), and the ->exp_tasks pointer + * indicates which element the expedited grace period is waiting on (again, + * NULL if none). If a grace period is waiting on a given element in the + * ->blkd_tasks list, it also waits on all subsequent elements. Thus, + * adding a task to the tail of the list blocks any grace period that is + * already waiting on one of the elements. In contrast, adding a task + * to the head of the list won't block any grace period that is already + * waiting on one of the elements. + * + * This queuing is imprecise, and can sometimes make an ongoing grace + * period wait for a task that is not strictly speaking blocking it. + * Given the choice, we needlessly block a normal grace period rather than + * blocking an expedited grace period. + * + * Note that an endless sequence of expedited grace periods still cannot + * indefinitely postpone a normal grace period. Eventually, all of the + * fixed number of preempted tasks blocking the normal grace period that are + * not also blocking the expedited grace period will resume and complete + * their RCU read-side critical sections. At that point, the ->gp_tasks + * pointer will equal the ->exp_tasks pointer, at which point the end of + * the corresponding expedited grace period will also be the end of the + * normal grace period. + */ +static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, + unsigned long flags) __releases(rnp->lock) +{ + int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) + + (rnp->exp_tasks ? RCU_EXP_TASKS : 0) + + (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) + + (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0); + struct task_struct *t = current; + + /* + * Decide where to queue the newly blocked task. In theory, + * this could be an if-statement. In practice, when I tried + * that, it was quite messy. + */ + switch (blkd_state) { + case 0: + case RCU_EXP_TASKS: + case RCU_EXP_TASKS + RCU_GP_BLKD: + case RCU_GP_TASKS: + case RCU_GP_TASKS + RCU_EXP_TASKS: + + /* + * Blocking neither GP, or first task blocking the normal + * GP but not blocking the already-waiting expedited GP. + * Queue at the head of the list to avoid unnecessarily + * blocking the already-waiting GPs. + */ + list_add(&t->rcu_node_entry, &rnp->blkd_tasks); + break; + + case RCU_EXP_BLKD: + case RCU_GP_BLKD: + case RCU_GP_BLKD + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD: + + /* + * First task arriving that blocks either GP, or first task + * arriving that blocks the expedited GP (with the normal + * GP already waiting), or a task arriving that blocks + * both GPs with both GPs already waiting. Queue at the + * tail of the list to avoid any GP waiting on any of the + * already queued tasks that are not blocking it. + */ + list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks); + break; + + case RCU_EXP_TASKS + RCU_EXP_BLKD: + case RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD: + case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_EXP_BLKD: + + /* + * Second or subsequent task blocking the expedited GP. + * The task either does not block the normal GP, or is the + * first task blocking the normal GP. Queue just after + * the first task blocking the expedited GP. + */ + list_add(&t->rcu_node_entry, rnp->exp_tasks); + break; + + case RCU_GP_TASKS + RCU_GP_BLKD: + case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD: + + /* + * Second or subsequent task blocking the normal GP. + * The task does not block the expedited GP. Queue just + * after the first task blocking the normal GP. + */ + list_add(&t->rcu_node_entry, rnp->gp_tasks); + break; + + default: + + /* Yet another exercise in excessive paranoia. */ + WARN_ON_ONCE(1); + break; + } + + /* + * We have now queued the task. If it was the first one to + * block either grace period, update the ->gp_tasks and/or + * ->exp_tasks pointers, respectively, to reference the newly + * blocked tasks. + */ + if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) + rnp->gp_tasks = &t->rcu_node_entry; + if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) + rnp->exp_tasks = &t->rcu_node_entry; + raw_spin_unlock(&rnp->lock); + + /* + * Report the quiescent state for the expedited GP. This expedited + * GP should not be able to end until we report, so there should be + * no need to check for a subsequent expedited GP. (Though we are + * still in a quiescent state in any case.) + */ + if (blkd_state & RCU_EXP_BLKD && + t->rcu_read_unlock_special.b.exp_need_qs) { + t->rcu_read_unlock_special.b.exp_need_qs = false; + rcu_report_exp_rdp(rdp->rsp, rdp, true); + } else { + WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs); + } + local_irq_restore(flags); +} + /* * Record a preemptible-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -167,42 +307,18 @@ static void rcu_preempt_note_context_switch(void) t->rcu_blocked_node = rnp; /* - * If this CPU has already checked in, then this task - * will hold up the next grace period rather than the - * current grace period. Queue the task accordingly. - * If the task is queued for the current grace period - * (i.e., this CPU has not yet passed through a quiescent - * state for the current grace period), then as long - * as that task remains queued, the current grace period - * cannot end. Note that there is some uncertainty as - * to exactly when the current grace period started. - * We take a conservative approach, which can result - * in unnecessarily waiting on tasks that started very - * slightly after the current grace period began. C'est - * la vie!!! - * - * But first, note that the current CPU must still be - * on line! + * Verify the CPU's sanity, trace the preemption, and + * then queue the task as required based on the states + * of any ongoing and expedited grace periods. */ WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); - if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { - list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); - rnp->gp_tasks = &t->rcu_node_entry; - if (IS_ENABLED(CONFIG_RCU_BOOST) && - rnp->boost_tasks != NULL) - rnp->boost_tasks = rnp->gp_tasks; - } else { - list_add(&t->rcu_node_entry, &rnp->blkd_tasks); - if (rnp->qsmask & rdp->grpmask) - rnp->gp_tasks = &t->rcu_node_entry; - } trace_rcu_preempt_task(rdp->rsp->name, t->pid, (rnp->qsmask & rdp->grpmask) ? rnp->gpnum : rnp->gpnum + 1); - raw_spin_unlock_irqrestore(&rnp->lock, flags); + rcu_preempt_ctxt_queue(rnp, rdp, flags); } else if (t->rcu_read_lock_nesting < 0 && t->rcu_read_unlock_special.s) { @@ -272,6 +388,7 @@ void rcu_read_unlock_special(struct task_struct *t) unsigned long flags; struct list_head *np; bool drop_boost_mutex = false; + struct rcu_data *rdp; struct rcu_node *rnp; union rcu_special special; @@ -282,8 +399,8 @@ void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); /* - * If RCU core is waiting for this CPU to exit critical section, - * let it know that we have done so. Because irqs are disabled, + * If RCU core is waiting for this CPU to exit its critical section, + * report the fact that it has exited. Because irqs are disabled, * t->rcu_read_unlock_special cannot change. */ special = t->rcu_read_unlock_special; @@ -296,13 +413,32 @@ void rcu_read_unlock_special(struct task_struct *t) } } + /* + * Respond to a request for an expedited grace period, but only if + * we were not preempted, meaning that we were running on the same + * CPU throughout. If we were preempted, the exp_need_qs flag + * would have been cleared at the time of the first preemption, + * and the quiescent state would be reported when we were dequeued. + */ + if (special.b.exp_need_qs) { + WARN_ON_ONCE(special.b.blocked); + t->rcu_read_unlock_special.b.exp_need_qs = false; + rdp = this_cpu_ptr(rcu_state_p->rda); + rcu_report_exp_rdp(rcu_state_p, rdp, true); + if (!t->rcu_read_unlock_special.s) { + local_irq_restore(flags); + return; + } + } + /* Hardware IRQ handlers cannot block, complain if they get here. */ if (in_irq() || in_serving_softirq()) { lockdep_rcu_suspicious(__FILE__, __LINE__, "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); - pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n", + pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", t->rcu_read_unlock_special.s, t->rcu_read_unlock_special.b.blocked, + t->rcu_read_unlock_special.b.exp_need_qs, t->rcu_read_unlock_special.b.need_qs); local_irq_restore(flags); return; @@ -329,7 +465,7 @@ void rcu_read_unlock_special(struct task_struct *t) raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); - empty_exp = !rcu_preempted_readers_exp(rnp); + empty_exp = sync_rcu_preempt_exp_done(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); list_del_init(&t->rcu_node_entry); @@ -353,7 +489,7 @@ void rcu_read_unlock_special(struct task_struct *t) * Note that rcu_report_unblock_qs_rnp() releases rnp->lock, * so we must take a snapshot of the expedited state. */ - empty_exp_now = !rcu_preempted_readers_exp(rnp); + empty_exp_now = sync_rcu_preempt_exp_done(rnp); if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) { trace_rcu_quiescent_state_report(TPS("preempt_rcu"), rnp->gpnum, @@ -535,28 +671,99 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); +/* + * Remote handler for smp_call_function_single(). If there is an + * RCU read-side critical section in effect, request that the + * next rcu_read_unlock() record the quiescent state up the + * ->expmask fields in the rcu_node tree. Otherwise, immediately + * report the quiescent state. + */ +static void sync_rcu_exp_handler(void *info) +{ + struct rcu_data *rdp; + struct rcu_state *rsp = info; + struct task_struct *t = current; + + /* + * Within an RCU read-side critical section, request that the next + * rcu_read_unlock() report. Unless this RCU read-side critical + * section has already blocked, in which case it is already set + * up for the expedited grace period to wait on it. + */ + if (t->rcu_read_lock_nesting > 0 && + !t->rcu_read_unlock_special.b.blocked) { + t->rcu_read_unlock_special.b.exp_need_qs = true; + return; + } + + /* + * We are either exiting an RCU read-side critical section (negative + * values of t->rcu_read_lock_nesting) or are not in one at all + * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU + * read-side critical section that blocked before this expedited + * grace period started. Either way, we can immediately report + * the quiescent state. + */ + rdp = this_cpu_ptr(rsp->rda); + rcu_report_exp_rdp(rsp, rdp, true); +} + /* * Select the nodes that the upcoming expedited grace period needs * to wait for. */ -static void sync_rcu_exp_select_nodes(struct rcu_state *rsp) +static void sync_rcu_exp_select_cpus(struct rcu_state *rsp) { + int cpu; unsigned long flags; + unsigned long mask; + unsigned long mask_ofl_test; + unsigned long mask_ofl_ipi; + int ret; struct rcu_node *rnp; sync_exp_reset_tree(rsp); rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - rnp->expmask = 0; /* No per-CPU component yet. */ - if (!rcu_preempt_has_tasks(rnp)) { - /* FIXME: Want __rcu_report_exp_rnp() here. */ - raw_spin_unlock_irqrestore(&rnp->lock, flags); - } else { + + /* Each pass checks a CPU for identity, offline, and idle. */ + mask_ofl_test = 0; + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + if (raw_smp_processor_id() == cpu || + cpu_is_offline(cpu) || + !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) + mask_ofl_test |= rdp->grpmask; + } + mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; + + /* + * Need to wait for any blocked tasks as well. Note that + * additional blocking tasks will also block the expedited + * GP until such time as the ->expmask bits are cleared. + */ + if (rcu_preempt_has_tasks(rnp)) rnp->exp_tasks = rnp->blkd_tasks.next; - rcu_initiate_boost(rnp, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + + /* IPI the remaining CPUs for expedited quiescent state. */ + mask = 1; + for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { + if (!(mask_ofl_ipi & mask)) + continue; + ret = smp_call_function_single(cpu, + sync_rcu_exp_handler, + rsp, 0); + if (!ret) + mask_ofl_ipi &= ~mask; } - rcu_report_exp_rnp(rsp, rnp, false); + /* Report quiescent states for those that went offline. */ + mask_ofl_test |= mask_ofl_ipi; + if (mask_ofl_test) + rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); } } @@ -587,11 +794,8 @@ void synchronize_rcu_expedited(void) rcu_exp_gp_seq_start(rsp); - /* force all RCU readers onto ->blkd_tasks lists. */ - synchronize_sched_expedited(); - /* Initialize the rcu_node tree in preparation for the wait. */ - sync_rcu_exp_select_nodes(rsp); + sync_rcu_exp_select_cpus(rsp); /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); -- cgit v1.2.3 From 97c668b8e983b722e2ed765b98b05f644aff1b13 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 11:31:51 -0700 Subject: rcu: Rename qs_pending to core_needs_qs An upcoming commit needs to invert the sense of the ->passed_quiesce rcu_data structure field, so this commit is taking this opportunity to clarify things a bit by renaming ->qs_pending to ->core_needs_qs. So if !rdp->core_needs_qs, then this CPU need not concern itself with quiescent states, in particular, it need not acquire its leaf rcu_node structure's ->lock to check. Otherwise, it needs to report the next quiescent state. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 14 +++++++------- kernel/rcu/tree.h | 4 ++-- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/tree_trace.c | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d2cdcada6fe0..7c158ffc7769 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1746,7 +1746,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); rdp->passed_quiesce = 0; rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); - rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask); + rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); WRITE_ONCE(rdp->gpwrap, false); } @@ -2357,7 +2357,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) if ((rnp->qsmask & mask) == 0) { raw_spin_unlock_irqrestore(&rnp->lock, flags); } else { - rdp->qs_pending = 0; + rdp->core_needs_qs = 0; /* * This GP can't end until cpu checks in, so all of our @@ -2388,7 +2388,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Does this CPU still need to do its part for current grace period? * If no, return and let the other CPUs do their part as well. */ - if (!rdp->qs_pending) + if (!rdp->core_needs_qs) return; /* @@ -3828,10 +3828,10 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && - rdp->qs_pending && !rdp->passed_quiesce && + rdp->core_needs_qs && !rdp->passed_quiesce && rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { - rdp->n_rp_qs_pending++; - } else if (rdp->qs_pending && + rdp->n_rp_core_needs_qs++; + } else if (rdp->core_needs_qs && (rdp->passed_quiesce || rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { rdp->n_rp_report_qs++; @@ -4157,7 +4157,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->completed = rnp->completed; rdp->passed_quiesce = false; rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); - rdp->qs_pending = false; + rdp->core_needs_qs = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore(&rnp->lock, flags); } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index efe361c764ab..4a0f30676ba8 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -303,7 +303,7 @@ struct rcu_data { unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ /* for rcu_all_qs() invocations. */ bool passed_quiesce; /* User-mode/idle loop etc. */ - bool qs_pending; /* Core waits for quiesc state. */ + bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible gpnum/completed wrap. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ @@ -368,7 +368,7 @@ struct rcu_data { /* 5) __rcu_pending() statistics. */ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ - unsigned long n_rp_qs_pending; + unsigned long n_rp_core_needs_qs; unsigned long n_rp_report_qs; unsigned long n_rp_cb_ready; unsigned long n_rp_cpu_needs_gp; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6f7500f9387c..e33b4f3b8e0a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -619,7 +619,7 @@ static void rcu_preempt_check_callbacks(void) return; } if (t->rcu_read_lock_nesting > 0 && - __this_cpu_read(rcu_data_p->qs_pending) && + __this_cpu_read(rcu_data_p->core_needs_qs) && !__this_cpu_read(rcu_data_p->passed_quiesce)) t->rcu_read_unlock_special.b.need_qs = true; } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 6fc4c5ff3bb5..4ac25f8520d6 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -123,7 +123,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) ulong2long(rdp->completed), ulong2long(rdp->gpnum), rdp->passed_quiesce, rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), - rdp->qs_pending); + rdp->core_needs_qs); seq_printf(m, " dt=%d/%llx/%d df=%lu", atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, @@ -361,7 +361,7 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->n_rcu_pending); seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ", - rdp->n_rp_qs_pending, + rdp->n_rp_core_needs_qs, rdp->n_rp_report_qs, rdp->n_rp_cb_ready, rdp->n_rp_cpu_needs_gp); -- cgit v1.2.3 From 0d43eb34f9aabcddf41c99b7af2d0ced33e9a3cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 14:17:29 -0700 Subject: rcu: Invert passed_quiesce and rename to cpu_no_qs This commit inverts the sense of the rcu_data structure's ->passed_quiesce field and renames it to ->cpu_no_qs. This will allow a later commit to use an "aggregate OR" operation to test expedited as well as normal grace periods without added overhead. Signed-off-by: Paul E. McKenney --- Documentation/RCU/trace.txt | 32 ++++++++++++++++---------------- kernel/rcu/tree.c | 22 +++++++++++----------- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 6 +++--- kernel/rcu/tree_trace.c | 4 ++-- 5 files changed, 33 insertions(+), 33 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 97f17e9decda..ec6998b1b6d0 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -56,14 +56,14 @@ rcuboost: The output of "cat rcu/rcu_preempt/rcudata" looks as follows: - 0!c=30455 g=30456 pq=1/0 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 - 1!c=30719 g=30720 pq=1/0 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 - 2!c=30150 g=30151 pq=1/1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 - 3 c=31249 g=31250 pq=1/1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 - 4!c=29502 g=29503 pq=1/0 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 - 5 c=31201 g=31202 pq=1/0 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 - 6!c=30253 g=30254 pq=1/0 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 - 7 c=31178 g=31178 pq=1/0 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 + 0!c=30455 g=30456 cnq=1/0:1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716 + 1!c=30719 g=30720 cnq=1/0:0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982 + 2!c=30150 g=30151 cnq=1/1:1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458 + 3 c=31249 g=31250 cnq=1/1:0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622 + 4!c=29502 g=29503 cnq=1/0:1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521 + 5 c=31201 g=31202 cnq=1/0:1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698 + 6!c=30253 g=30254 cnq=1/0:1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353 + 7 c=31178 g=31178 cnq=1/0:0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969 This file has one line per CPU, or eight for this 8-CPU system. The fields are as follows: @@ -188,14 +188,14 @@ o "ca" is the number of RCU callbacks that have been adopted by this Kernels compiled with CONFIG_RCU_BOOST=y display the following from /debug/rcu/rcu_preempt/rcudata: - 0!c=12865 g=12866 pq=1/0 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 - 1 c=14407 g=14408 pq=1/0 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 - 2 c=14407 g=14408 pq=1/0 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 - 3 c=14407 g=14408 pq=1/0 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 - 4 c=14405 g=14406 pq=1/0 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 - 5!c=14168 g=14169 pq=1/0 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 - 6 c=14404 g=14405 pq=1/0 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 - 7 c=14407 g=14408 pq=1/0 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 + 0!c=12865 g=12866 cnq=1/0:1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871 + 1 c=14407 g=14408 cnq=1/0:0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485 + 2 c=14407 g=14408 cnq=1/0:0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490 + 3 c=14407 g=14408 cnq=1/0:0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290 + 4 c=14405 g=14406 cnq=1/0:1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114 + 5!c=14168 g=14169 cnq=1/0:0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722 + 6 c=14404 g=14405 cnq=1/0:0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811 + 7 c=14407 g=14408 cnq=1/0:1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042 This is similar to the output discussed above, but contains the following additional fields: diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7c158ffc7769..31e7021ced4d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -245,21 +245,21 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(void) { - if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) { + if (__this_cpu_read(rcu_sched_data.cpu_no_qs)) { trace_rcu_grace_period(TPS("rcu_sched"), __this_cpu_read(rcu_sched_data.gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_sched_data.passed_quiesce, 1); + __this_cpu_write(rcu_sched_data.cpu_no_qs, false); } } void rcu_bh_qs(void) { - if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) { + if (__this_cpu_read(rcu_bh_data.cpu_no_qs)) { trace_rcu_grace_period(TPS("rcu_bh"), __this_cpu_read(rcu_bh_data.gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_bh_data.passed_quiesce, 1); + __this_cpu_write(rcu_bh_data.cpu_no_qs, false); } } @@ -1744,7 +1744,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, */ rdp->gpnum = rnp->gpnum; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); - rdp->passed_quiesce = 0; + rdp->cpu_no_qs = true; rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); @@ -2337,7 +2337,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if ((rdp->passed_quiesce == 0 && + if ((rdp->cpu_no_qs && rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || rdp->gpwrap) { @@ -2348,7 +2348,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * We will instead need a new quiescent state that lies * within the current grace period. */ - rdp->passed_quiesce = 0; /* need qs for new gp. */ + rdp->cpu_no_qs = true; /* need qs for new gp. */ rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); raw_spin_unlock_irqrestore(&rnp->lock, flags); return; @@ -2395,7 +2395,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Was there a quiescent state since the beginning of the grace * period? If no, then exit and wait for the next call. */ - if (!rdp->passed_quiesce && + if (rdp->cpu_no_qs && rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) return; @@ -3828,11 +3828,11 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && - rdp->core_needs_qs && !rdp->passed_quiesce && + rdp->core_needs_qs && rdp->cpu_no_qs && rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { rdp->n_rp_core_needs_qs++; } else if (rdp->core_needs_qs && - (rdp->passed_quiesce || + (!rdp->cpu_no_qs || rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { rdp->n_rp_report_qs++; return 1; @@ -4155,7 +4155,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->beenonline = true; /* We have now been online. */ rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; - rdp->passed_quiesce = false; + rdp->cpu_no_qs = true; rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); rdp->core_needs_qs = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 4a0f30676ba8..ded4ceebed76 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -302,7 +302,7 @@ struct rcu_data { /* is aware of having started. */ unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ /* for rcu_all_qs() invocations. */ - bool passed_quiesce; /* User-mode/idle loop etc. */ + bool cpu_no_qs; /* No QS yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible gpnum/completed wrap. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e33b4f3b8e0a..6977ff0dccb9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -265,11 +265,11 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, */ static void rcu_preempt_qs(void) { - if (!__this_cpu_read(rcu_data_p->passed_quiesce)) { + if (__this_cpu_read(rcu_data_p->cpu_no_qs)) { trace_rcu_grace_period(TPS("rcu_preempt"), __this_cpu_read(rcu_data_p->gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_data_p->passed_quiesce, 1); + __this_cpu_write(rcu_data_p->cpu_no_qs, false); barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ current->rcu_read_unlock_special.b.need_qs = false; } @@ -620,7 +620,7 @@ static void rcu_preempt_check_callbacks(void) } if (t->rcu_read_lock_nesting > 0 && __this_cpu_read(rcu_data_p->core_needs_qs) && - !__this_cpu_read(rcu_data_p->passed_quiesce)) + __this_cpu_read(rcu_data_p->cpu_no_qs)) t->rcu_read_unlock_special.b.need_qs = true; } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 4ac25f8520d6..d373e57109b8 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -117,11 +117,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) if (!rdp->beenonline) return; - seq_printf(m, "%3d%cc=%ld g=%ld pq=%d/%d qp=%d", + seq_printf(m, "%3d%cc=%ld g=%ld cnq=%d/%d:%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', ulong2long(rdp->completed), ulong2long(rdp->gpnum), - rdp->passed_quiesce, + rdp->cpu_no_qs, rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), rdp->core_needs_qs); seq_printf(m, " dt=%d/%llx/%d df=%lu", -- cgit v1.2.3 From 5b74c458906fc4a62f932ee8bb801d29baf15fec Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Aug 2015 15:16:57 -0700 Subject: rcu: Make ->cpu_no_qs be a union for aggregate OR This commit converts the rcu_data structure's ->cpu_no_qs field to a union. The bytewise side of this union allows individual access to indications as to whether this CPU needs to find a quiescent state for a normal (.norm) and/or expedited (.exp) grace period. The setwise side of the union allows testing whether or not a quiescent state is needed at all, for either type of grace period. For now, only .norm is used. A later commit will introduce the expedited usage. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 22 +++++++++++----------- kernel/rcu/tree.h | 14 +++++++++++++- kernel/rcu/tree_plugin.h | 6 +++--- kernel/rcu/tree_trace.c | 2 +- 4 files changed, 28 insertions(+), 16 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 31e7021ced4d..3e2875b38eae 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -245,21 +245,21 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(void) { - if (__this_cpu_read(rcu_sched_data.cpu_no_qs)) { + if (__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_sched"), __this_cpu_read(rcu_sched_data.gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_sched_data.cpu_no_qs, false); + __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false); } } void rcu_bh_qs(void) { - if (__this_cpu_read(rcu_bh_data.cpu_no_qs)) { + if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_bh"), __this_cpu_read(rcu_bh_data.gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_bh_data.cpu_no_qs, false); + __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); } } @@ -1744,7 +1744,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, */ rdp->gpnum = rnp->gpnum; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart")); - rdp->cpu_no_qs = true; + rdp->cpu_no_qs.b.norm = true; rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask); zero_cpu_stall_ticks(rdp); @@ -2337,7 +2337,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if ((rdp->cpu_no_qs && + if ((rdp->cpu_no_qs.b.norm && rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || rdp->gpwrap) { @@ -2348,7 +2348,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * We will instead need a new quiescent state that lies * within the current grace period. */ - rdp->cpu_no_qs = true; /* need qs for new gp. */ + rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); raw_spin_unlock_irqrestore(&rnp->lock, flags); return; @@ -2395,7 +2395,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Was there a quiescent state since the beginning of the grace * period? If no, then exit and wait for the next call. */ - if (rdp->cpu_no_qs && + if (rdp->cpu_no_qs.b.norm && rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) return; @@ -3828,11 +3828,11 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) /* Is the RCU core waiting for a quiescent state from this CPU? */ if (rcu_scheduler_fully_active && - rdp->core_needs_qs && rdp->cpu_no_qs && + rdp->core_needs_qs && rdp->cpu_no_qs.b.norm && rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { rdp->n_rp_core_needs_qs++; } else if (rdp->core_needs_qs && - (!rdp->cpu_no_qs || + (!rdp->cpu_no_qs.b.norm || rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { rdp->n_rp_report_qs++; return 1; @@ -4155,7 +4155,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->beenonline = true; /* We have now been online. */ rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; - rdp->cpu_no_qs = true; + rdp->cpu_no_qs.b.norm = true; rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); rdp->core_needs_qs = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ded4ceebed76..3eee48bcf52b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -286,6 +286,18 @@ struct rcu_node { for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) +/* + * Union to allow "aggregate OR" operation on the need for a quiescent + * state by the normal and expedited grace periods. + */ +union rcu_noqs { + struct { + u8 norm; + u8 exp; + } b; /* Bits. */ + u16 s; /* Set of bits, aggregate OR here. */ +}; + /* Index values for nxttail array in struct rcu_data. */ #define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ #define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ @@ -302,7 +314,7 @@ struct rcu_data { /* is aware of having started. */ unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */ /* for rcu_all_qs() invocations. */ - bool cpu_no_qs; /* No QS yet for this CPU. */ + union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible gpnum/completed wrap. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6977ff0dccb9..7880202f1e38 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -265,11 +265,11 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, */ static void rcu_preempt_qs(void) { - if (__this_cpu_read(rcu_data_p->cpu_no_qs)) { + if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { trace_rcu_grace_period(TPS("rcu_preempt"), __this_cpu_read(rcu_data_p->gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_data_p->cpu_no_qs, false); + __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false); barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ current->rcu_read_unlock_special.b.need_qs = false; } @@ -620,7 +620,7 @@ static void rcu_preempt_check_callbacks(void) } if (t->rcu_read_lock_nesting > 0 && __this_cpu_read(rcu_data_p->core_needs_qs) && - __this_cpu_read(rcu_data_p->cpu_no_qs)) + __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm)) t->rcu_read_unlock_special.b.need_qs = true; } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index d373e57109b8..999c3672f990 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -121,7 +121,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', ulong2long(rdp->completed), ulong2long(rdp->gpnum), - rdp->cpu_no_qs, + rdp->cpu_no_qs.b.norm, rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), rdp->core_needs_qs); seq_printf(m, " dt=%d/%llx/%d df=%lu", -- cgit v1.2.3 From b6a4ae766e3133a4db73fabc81e522d1601cb623 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 29 Jul 2015 13:29:38 +0800 Subject: rcu: Use rcu_callback_t in call_rcu*() and friends As we now have rcu_callback_t typedefs as the type of rcu callbacks, we should use it in call_rcu*() and friends as the type of parameters. This could save us a few lines of code and make it clear which function requires an rcu callbacks rather than other callbacks as its argument. Besides, this can also help cscope to generate a better database for code reading. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 10 +++++----- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 2 +- kernel/rcu/rcutorture.c | 4 ++-- kernel/rcu/srcu.c | 2 +- kernel/rcu/tiny.c | 8 ++++---- kernel/rcu/tree.c | 8 ++++---- kernel/rcu/tree.h | 2 +- kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/update.c | 2 +- 10 files changed, 21 insertions(+), 21 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 581abf848566..d63bb77dab35 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -160,7 +160,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename, * more than one CPU). */ void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + rcu_callback_t func); #else /* #ifdef CONFIG_PREEMPT_RCU */ @@ -191,7 +191,7 @@ void call_rcu(struct rcu_head *head, * memory ordering guarantees. */ void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + rcu_callback_t func); /** * call_rcu_sched() - Queue an RCU for invocation after sched grace period. @@ -213,7 +213,7 @@ void call_rcu_bh(struct rcu_head *head, * memory ordering guarantees. */ void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); + rcu_callback_t func); void synchronize_sched(void); @@ -274,7 +274,7 @@ do { \ * See the description of call_rcu() for more detailed information on * memory ordering guarantees. */ -void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head)); +void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); void rcu_barrier_tasks(void); @@ -1065,7 +1065,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define __kfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ + kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \ } while (0) /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ff968b7af3a4..c8a0722f77ea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -83,7 +83,7 @@ static inline void synchronize_sched_expedited(void) } static inline void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) + rcu_callback_t func) { call_rcu(head, func); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5abec82f325e..60d15a080d7c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -48,7 +48,7 @@ void synchronize_rcu_bh(void); void synchronize_sched_expedited(void); void synchronize_rcu_expedited(void); -void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); /** * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 77192953dee5..51c8e7f02f48 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -448,7 +448,7 @@ static void synchronize_rcu_busted(void) } static void -call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +call_rcu_busted(struct rcu_head *head, rcu_callback_t func) { /* This is a deliberate bug for testing purposes only! */ func(head); @@ -523,7 +523,7 @@ static void srcu_torture_synchronize(void) } static void srcu_torture_call(struct rcu_head *head, - void (*func)(struct rcu_head *head)) + rcu_callback_t func) { call_srcu(srcu_ctlp, head, func); } diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index d3fcb2ec8536..9e6122540d28 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -387,7 +387,7 @@ static void srcu_flip(struct srcu_struct *sp) * srcu_struct structure. */ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, - void (*func)(struct rcu_head *head)) + rcu_callback_t func) { unsigned long flags; diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index d0471056d0af..944b1b491ed8 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -44,7 +44,7 @@ struct rcu_ctrlblk; static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); static void rcu_process_callbacks(struct softirq_action *unused); static void __call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu), + rcu_callback_t func, struct rcu_ctrlblk *rcp); #include "tiny_plugin.h" @@ -203,7 +203,7 @@ EXPORT_SYMBOL_GPL(synchronize_sched); * Helper function for call_rcu() and call_rcu_bh(). */ static void __call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu), + rcu_callback_t func, struct rcu_ctrlblk *rcp) { unsigned long flags; @@ -229,7 +229,7 @@ static void __call_rcu(struct rcu_head *head, * period. But since we have but one CPU, that would be after any * quiescent state. */ -void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_sched_ctrlblk); } @@ -239,7 +239,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); * Post an RCU bottom-half callback to be invoked after any subsequent * quiescent state. */ -void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_bh_ctrlblk); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 775d36cc0050..b9d9e0249e2f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3017,7 +3017,7 @@ static void rcu_leak_callback(struct rcu_head *rhp) * is expected to specify a CPU. */ static void -__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), +__call_rcu(struct rcu_head *head, rcu_callback_t func, struct rcu_state *rsp, int cpu, bool lazy) { unsigned long flags; @@ -3088,7 +3088,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), /* * Queue an RCU-sched callback for invocation after a grace period. */ -void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_sched_state, -1, 0); } @@ -3097,7 +3097,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); /* * Queue an RCU callback for invocation after a quicker grace period. */ -void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, &rcu_bh_state, -1, 0); } @@ -3111,7 +3111,7 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); * function may only be called from __kfree_rcu(). */ void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) + rcu_callback_t func) { __call_rcu(head, func, rcu_state_p, -1, 1); } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 2e991f8361e4..ad11529375cc 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -584,7 +584,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_preempt_check_callbacks(void); -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +void call_rcu(struct rcu_head *head, rcu_callback_t func); static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b2bf3963a0ae..06116ae6dfd7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -500,7 +500,7 @@ static void rcu_preempt_do_callbacks(void) /* * Queue a preemptible-RCU callback for invocation after a grace period. */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu(struct rcu_head *head, rcu_callback_t func) { __call_rcu(head, func, rcu_state_p, -1, 0); } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 7a0b3bc7c5ed..5f748c5a40f0 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -534,7 +534,7 @@ static void rcu_spawn_tasks_kthread(void); * Post an RCU-tasks callback. First call must be from process context * after the scheduler if fully operational. */ -void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp)) +void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; bool needwake; -- cgit v1.2.3 From 7f21aeef722d598ba350d1834f6e4134c7b5e8de Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Aug 2015 14:49:50 -0700 Subject: rcu: Add online/offline info to stall warning message This commit makes the RCU CPU stall warning message print online/offline indications immediately after a hyphen following the CPU number. A "O" indicates that the global CPU-hotplug system believes that the CPU is online, a "o" that RCU perceived the CPU to be online at the beginning of the current expedited grace period, and an "N" that RCU currently believes that it will perceive the CPU as being online at the beginning of the next expedited grace period, with "." otherwise for all three indications. So for CPU 10, you would normally see "10-OoN:" indicating that everything believes that the CPU is online. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 06116ae6dfd7..57ed9c13ae5a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1702,8 +1702,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) ticks_value = rsp->gpnum - rdp->gpnum; } print_cpu_stall_fast_no_hz(fast_no_hz, cpu); - pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", - cpu, ticks_value, ticks_title, + pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n", + cpu, + "O."[!!cpu_online(cpu)], + "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)], + "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)], + ticks_value, ticks_title, atomic_read(&rdtp->dynticks) & 0xfff, rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), -- cgit v1.2.3 From dcdb8807ba0f5127d4dc67daeeb154edf50a93d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Aug 2015 19:00:31 -0700 Subject: rcu: Consolidate expedited CPU selection Now that sync_sched_exp_select_cpus() and sync_rcu_exp_select_cpus() are identical aside from the the argument to smp_call_function_single(), this commit consolidates them with a functional argument. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 7 +++--- kernel/rcu/tree_plugin.h | 61 +----------------------------------------------- 2 files changed, 5 insertions(+), 63 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ae582e3d4136..f44f4b30c68a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3654,7 +3654,8 @@ static void synchronize_sched_expedited_cpu_stop(void *data) * Select the nodes that the upcoming expedited grace period needs * to wait for. */ -static void sync_sched_exp_select_cpus(struct rcu_state *rsp) +static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, + smp_call_func_t func) { int cpu; unsigned long flags; @@ -3696,7 +3697,7 @@ static void sync_sched_exp_select_cpus(struct rcu_state *rsp) for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { if (!(mask_ofl_ipi & mask)) continue; - ret = smp_call_function_single(cpu, synchronize_sched_expedited_cpu_stop, NULL, 0); + ret = smp_call_function_single(cpu, func, rsp, 0); if (!ret) mask_ofl_ipi &= ~mask; } @@ -3788,7 +3789,7 @@ void synchronize_sched_expedited(void) return; /* Someone else did our work for us. */ rcu_exp_gp_seq_start(rsp); - sync_sched_exp_select_cpus(rsp); + sync_rcu_exp_select_cpus(rsp, synchronize_sched_expedited_cpu_stop); synchronize_sched_expedited_wait(rsp); rcu_exp_gp_seq_end(rsp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7880202f1e38..6cbfbfc58656 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -708,65 +708,6 @@ static void sync_rcu_exp_handler(void *info) rcu_report_exp_rdp(rsp, rdp, true); } -/* - * Select the nodes that the upcoming expedited grace period needs - * to wait for. - */ -static void sync_rcu_exp_select_cpus(struct rcu_state *rsp) -{ - int cpu; - unsigned long flags; - unsigned long mask; - unsigned long mask_ofl_test; - unsigned long mask_ofl_ipi; - int ret; - struct rcu_node *rnp; - - sync_exp_reset_tree(rsp); - rcu_for_each_leaf_node(rsp, rnp) { - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); - - /* Each pass checks a CPU for identity, offline, and idle. */ - mask_ofl_test = 0; - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { - struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - if (raw_smp_processor_id() == cpu || - cpu_is_offline(cpu) || - !(atomic_add_return(0, &rdtp->dynticks) & 0x1)) - mask_ofl_test |= rdp->grpmask; - } - mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; - - /* - * Need to wait for any blocked tasks as well. Note that - * additional blocking tasks will also block the expedited - * GP until such time as the ->expmask bits are cleared. - */ - if (rcu_preempt_has_tasks(rnp)) - rnp->exp_tasks = rnp->blkd_tasks.next; - raw_spin_unlock_irqrestore(&rnp->lock, flags); - - /* IPI the remaining CPUs for expedited quiescent state. */ - mask = 1; - for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { - if (!(mask_ofl_ipi & mask)) - continue; - ret = smp_call_function_single(cpu, - sync_rcu_exp_handler, - rsp, 0); - if (!ret) - mask_ofl_ipi &= ~mask; - } - /* Report quiescent states for those that went offline. */ - mask_ofl_test |= mask_ofl_ipi; - if (mask_ofl_test) - rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); - } -} - /** * synchronize_rcu_expedited - Brute-force RCU grace period * @@ -795,7 +736,7 @@ void synchronize_rcu_expedited(void) rcu_exp_gp_seq_start(rsp); /* Initialize the rcu_node tree in preparation for the wait. */ - sync_rcu_exp_select_cpus(rsp); + sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); -- cgit v1.2.3 From 74611ecb0fc4c850a8f89a744ce99cbf0dd43cb2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Aug 2015 10:20:43 -0700 Subject: rcu: Add online/offline info to expedited stall warning message This commit makes the RCU CPU stall warning message print online/offline indications immediately after the CPU number. A "O" indicates global offline, a "." global online, and a "o" indicates RCU believes that the CPU is offline for the current grace period and "." otherwise, and an "N" indicates that RCU believes that the CPU will be offline for the next grace period, and "." otherwise, all right after the CPU number. So for CPU 10, you would normally see "10-...:" indicating that everything believes that the CPU is online. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 9 ++++++++- kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f44f4b30c68a..3d033fee0dcb 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3737,11 +3737,18 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) pr_err("INFO: %s detected expedited stalls on CPUs: {", rsp->name); rcu_for_each_leaf_node(rsp, rnp) { + (void)rcu_print_task_exp_stall(rnp); mask = 1; for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { + struct rcu_data *rdp; + if (!(rnp->expmask & mask)) continue; - pr_cont(" %d", cpu); + rdp = per_cpu_ptr(rsp->rda, cpu); + pr_cont(" %d-%c%c%c", cpu, + "O."[cpu_online(cpu)], + "o."[!!(rdp->grpmask & rnp->expmaskinit)], + "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); } mask <<= 1; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6f3b63b68886..191aa3678575 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -589,6 +589,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); static int rcu_print_task_stall(struct rcu_node *rnp); +static int rcu_print_task_exp_stall(struct rcu_node *rnp); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); static void rcu_preempt_check_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6cbfbfc58656..7b61cece80c0 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -585,6 +585,27 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return ndetected; } +/* + * Scan the current list of tasks blocked within RCU read-side critical + * sections, printing out the tid of each that is blocking the current + * expedited grace period. + */ +static int rcu_print_task_exp_stall(struct rcu_node *rnp) +{ + struct task_struct *t; + int ndetected = 0; + + if (!rnp->exp_tasks) + return 0; + t = list_entry(rnp->exp_tasks->prev, + struct task_struct, rcu_node_entry); + list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { + pr_cont(" P%d", t->pid); + ndetected++; + } + return ndetected; +} + /* * Check that the list of blocked tasks for the newly completed grace * period is in fact empty. It is a serious bug to complete a grace @@ -845,6 +866,16 @@ static int rcu_print_task_stall(struct rcu_node *rnp) return 0; } +/* + * Because preemptible RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections that are + * blocking the current expedited grace period. + */ +static int rcu_print_task_exp_stall(struct rcu_node *rnp) +{ + return 0; +} + /* * Because there is no preemptible RCU, there can be no readers blocked, * so there is no need to check for blocked tasks. So check only for -- cgit v1.2.3 From b08517c76d764c373c232cd309ed058c98705219 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Aug 2015 12:17:29 -0700 Subject: rcu: Enable stall warnings for synchronize_rcu_expedited() This commit redirects synchronize_rcu_expedited()'s wait to synchronize_sched_expedited_wait(), thus enabling RCU CPU stall warnings. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7b61cece80c0..ffeb99e550e8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -761,8 +761,7 @@ void synchronize_rcu_expedited(void) /* Wait for snapshotted ->blkd_tasks lists to drain. */ rnp = rcu_get_root(rsp); - wait_event(rsp->expedited_wq, - sync_rcu_preempt_exp_done(rnp)); + synchronize_sched_expedited_wait(rsp); /* Clean up and exit. */ rcu_exp_gp_seq_end(rsp); -- cgit v1.2.3