From 2a67e741bbbc022e0fadf8c6dbc3a76019ecd0cf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Oct 2015 12:24:23 +0200 Subject: rcu: Create transitive rnp->lock acquisition functions Providing RCU's memory-ordering guarantees requires that the rcu_node tree's locking provide transitive memory ordering, which the Linux kernel's spinlocks currently do not provide unless smp_mb__after_unlock_lock() is used. Having a separate smp_mb__after_unlock_lock() after each and every lock acquisition is error-prone, hard to read, and a bit annoying, so this commit provides wrapper functions that pull in the smp_mb__after_unlock_lock() invocations. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 630c19772630..fa0e3b96a9ed 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -301,8 +301,7 @@ static void rcu_preempt_note_context_switch(void) /* Possibly blocking in an RCU read-side critical section. */ rdp = this_cpu_ptr(rcu_state_p->rda); rnp = rdp->mynode; - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); + raw_spin_lock_irqsave_rcu_node(rnp, flags); t->rcu_read_unlock_special.b.blocked = true; t->rcu_blocked_node = rnp; @@ -457,8 +456,7 @@ void rcu_read_unlock_special(struct task_struct *t) */ for (;;) { rnp = t->rcu_blocked_node; - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - smp_mb__after_unlock_lock(); + raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ if (rnp == t->rcu_blocked_node) break; WARN_ON_ONCE(1); @@ -989,8 +987,7 @@ static int rcu_boost(struct rcu_node *rnp) READ_ONCE(rnp->boost_tasks) == NULL) return 0; /* Nothing left to boost. */ - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); + raw_spin_lock_irqsave_rcu_node(rnp, flags); /* * Recheck under the lock: all tasks in need of boosting @@ -1176,8 +1173,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, "rcub/%d", rnp_index); if (IS_ERR(t)) return PTR_ERR(t); - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); + raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = kthread_prio; @@ -1567,8 +1563,7 @@ static void rcu_prepare_for_idle(void) if (!*rdp->nxttail[RCU_DONE_TAIL]) continue; rnp = rdp->mynode; - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - smp_mb__after_unlock_lock(); + raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ needwake = rcu_accelerate_cbs(rsp, rnp, rdp); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ if (needwake) @@ -2068,8 +2063,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) bool needwake; struct rcu_node *rnp = rdp->mynode; - raw_spin_lock_irqsave(&rnp->lock, flags); - smp_mb__after_unlock_lock(); + raw_spin_lock_irqsave_rcu_node(rnp, flags); needwake = rcu_start_future_gp(rnp, rdp, &c); raw_spin_unlock_irqrestore(&rnp->lock, flags); if (needwake) -- cgit v1.2.3 From 6cf10081220ae21175a867d446b3167bcbcb937b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 8 Oct 2015 15:36:54 -0700 Subject: rcu: Add transitivity to remaining rcu_node ->lock acquisitions The rule is that all acquisitions of the rcu_node structure's ->lock must provide transitivity: The lock is not acquired that frequently, and sorting out exactly which required it and which did not would be a maintenance nightmare. This commit therefore supplies the needed transitivity to the remaining ->lock acquisitions. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 24 ++++++++++++------------ kernel/rcu/tree_plugin.h | 2 +- kernel/rcu/tree_trace.c | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index daf17e248757..81aa1cdc6bc9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1214,7 +1214,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp) struct rcu_node *rnp; rcu_for_each_leaf_node(rsp, rnp) { - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); if (rnp->qsmask != 0) { for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) if (rnp->qsmask & (1UL << cpu)) @@ -1237,7 +1237,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) /* Only let one CPU complain about others per time interval. */ - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); delta = jiffies - READ_ONCE(rsp->jiffies_stall); if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -1256,7 +1256,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) rsp->name); print_cpu_stall_info_begin(); rcu_for_each_leaf_node(rsp, rnp) { - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); ndetected += rcu_print_task_stall(rnp); if (rnp->qsmask != 0) { for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) @@ -1327,7 +1327,7 @@ static void print_cpu_stall(struct rcu_state *rsp) rcu_dump_cpu_stacks(rsp); - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) WRITE_ONCE(rsp->jiffies_stall, jiffies + 3 * rcu_jiffies_till_stall_check() + 3); @@ -2897,7 +2897,7 @@ __rcu_process_callbacks(struct rcu_state *rsp) /* Does this CPU require a not-yet-started grace period? */ local_irq_save(flags); if (cpu_needs_another_gp(rsp, rdp)) { - raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ + raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */ needwake = rcu_start_gp(rsp); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); if (needwake) @@ -3718,7 +3718,7 @@ retry_ipi: mask_ofl_ipi &= ~mask; } else { /* Failed, raced with offline. */ - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); if (cpu_online(cpu) && (rnp->expmask & mask)) { raw_spin_unlock_irqrestore(&rnp->lock, @@ -3727,8 +3727,8 @@ retry_ipi: if (cpu_online(cpu) && (rnp->expmask & mask)) goto retry_ipi; - raw_spin_lock_irqsave(&rnp->lock, - flags); + raw_spin_lock_irqsave_rcu_node(rnp, + flags); } if (!(rnp->expmask & mask)) mask_ofl_ipi &= ~mask; @@ -4110,7 +4110,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) rnp = rnp->parent; if (rnp == NULL) return; - raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */ + raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */ rnp->qsmaskinit |= mask; raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ } @@ -4127,7 +4127,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); @@ -4154,7 +4154,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; @@ -4301,7 +4301,7 @@ static int __init rcu_spawn_gp_kthread(void) t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name); BUG_ON(IS_ERR(t)); rnp = rcu_get_root(rsp); - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); rsp->gp_kthread = t; if (kthread_prio) { sp.sched_priority = kthread_prio; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fa0e3b96a9ed..57ba873d2f18 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -525,7 +525,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) unsigned long flags; struct task_struct *t; - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); if (!rcu_preempt_blocked_readers_cgp(rnp)) { raw_spin_unlock_irqrestore(&rnp->lock, flags); return; diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index ef7093cc9b5c..8efaba870d96 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -319,7 +319,7 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp) unsigned long gpmax; struct rcu_node *rnp = &rsp->node[0]; - raw_spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave_rcu_node(rnp, flags); completed = READ_ONCE(rsp->completed); gpnum = READ_ONCE(rsp->gpnum); if (completed == gpnum) -- cgit v1.2.3 From 5a9be7c628c5273f84abacebf7faf2488376e0f0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 Nov 2015 15:44:06 -0800 Subject: rcu: Add rcu_normal kernel parameter to suppress expediting Although expedited grace periods can be quite useful, and although their OS jitter has been greatly reduced, they can still pose problems for extreme real-time workloads. This commit therefore adds a rcu_normal kernel boot parameter (which can also be manipulated via sysfs) to suppress expedited grace periods, that is, to treat requests for expedited grace periods as if they were requests for normal grace periods. If both rcu_expedited and rcu_normal are specified, rcu_normal wins. This means that if you are relying on expedited grace periods to speed up boot, you will want to specify rcu_expedited on the kernel command line, and then specify rcu_normal via sysfs once boot completes. Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 19 ++++++++++++++----- include/linux/rcupdate.h | 6 ++++++ kernel/ksysfs.c | 22 ++++++++++++++++++++-- kernel/rcu/srcu.c | 2 +- kernel/rcu/tree.c | 6 ++++++ kernel/rcu/tree_plugin.h | 6 ++++++ kernel/rcu/update.c | 12 ++++++++++++ 7 files changed, 65 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 742f69d18fc8..7673943d3085 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3296,6 +3296,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutorture.verbose= [KNL] Enable additional printk() statements. + rcupdate.rcu_cpu_stall_suppress= [KNL] + Suppress RCU CPU stall warning messages. + + rcupdate.rcu_cpu_stall_timeout= [KNL] + Set timeout for RCU CPU stall warning messages. + rcupdate.rcu_expedited= [KNL] Use expedited grace-period primitives, for example, synchronize_rcu_expedited() instead @@ -3303,11 +3309,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. but can increase CPU utilization, degrade real-time latency, and degrade energy efficiency. - rcupdate.rcu_cpu_stall_suppress= [KNL] - Suppress RCU CPU stall warning messages. - - rcupdate.rcu_cpu_stall_timeout= [KNL] - Set timeout for RCU CPU stall warning messages. + rcupdate.rcu_normal= [KNL] + Use only normal grace-period primitives, + for example, synchronize_rcu() instead of + synchronize_rcu_expedited(). This improves + real-time latency, CPU utilization, and energy + efficiency, but can expose users to increased + grace-period latency. This parameter overrides + rcupdate.rcu_expedited. rcupdate.rcu_task_stall_timeout= [KNL] Set timeout in jiffies for RCU task stall warning diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a0189ba67fde..98d9f30c02d4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -49,9 +49,14 @@ #include extern int rcu_expedited; /* for sysctl */ +extern int rcu_normal; /* also for sysctl */ #ifdef CONFIG_TINY_RCU /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ +static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */ +{ + return true; +} static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ { return false; @@ -65,6 +70,7 @@ static inline void rcu_unexpedite_gp(void) { } #else /* #ifdef CONFIG_TINY_RCU */ +bool rcu_gp_is_normal(void); /* Internal RCU use. */ bool rcu_gp_is_expedited(void); /* Internal RCU use. */ void rcu_expedite_gp(void); void rcu_unexpedite_gp(void); diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index e83b26464061..b4e2fa52d8bc 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -20,7 +20,7 @@ #include #include -#include /* rcu_expedited */ +#include /* rcu_expedited and rcu_normal */ #define KERNEL_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) @@ -148,7 +148,7 @@ int rcu_expedited; static ssize_t rcu_expedited_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", rcu_expedited); + return sprintf(buf, "%d\n", READ_ONCE(rcu_expedited)); } static ssize_t rcu_expedited_store(struct kobject *kobj, struct kobj_attribute *attr, @@ -161,6 +161,23 @@ static ssize_t rcu_expedited_store(struct kobject *kobj, } KERNEL_ATTR_RW(rcu_expedited); +int rcu_normal; +static ssize_t rcu_normal_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", READ_ONCE(rcu_normal)); +} +static ssize_t rcu_normal_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + if (kstrtoint(buf, 0, &rcu_normal)) + return -EINVAL; + + return count; +} +KERNEL_ATTR_RW(rcu_normal); + /* * Make /sys/kernel/notes give the raw contents of our kernel .notes section. */ @@ -203,6 +220,7 @@ static struct attribute * kernel_attrs[] = { &vmcoreinfo_attr.attr, #endif &rcu_expedited_attr.attr, + &rcu_normal_attr.attr, NULL }; diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index a63a1ea5a41b..9b9cdd549caa 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -489,7 +489,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) */ void synchronize_srcu(struct srcu_struct *sp) { - __synchronize_srcu(sp, rcu_gp_is_expedited() + __synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal()) ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT : SYNCHRONIZE_SRCU_TRYCOUNT); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6a652d1f3d7f..489992997c06 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3841,6 +3841,12 @@ void synchronize_sched_expedited(void) if (rcu_blocking_is_gp()) return; + /* If expedited grace periods are prohibited, fall back to normal. */ + if (rcu_gp_is_normal()) { + wait_rcu_gp(call_rcu_sched); + return; + } + /* Take a snapshot of the sequence number. */ s = rcu_exp_gp_seq_snap(rsp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 57ba873d2f18..d45df3781551 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -746,6 +746,12 @@ void synchronize_rcu_expedited(void) struct rcu_state *rsp = rcu_state_p; unsigned long s; + /* If expedited grace periods are prohibited, fall back to normal. */ + if (rcu_gp_is_normal()) { + wait_rcu_gp(call_rcu); + return; + } + s = rcu_exp_gp_seq_snap(rsp); rnp_unlock = exp_funnel_lock(rsp, s); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 5f748c5a40f0..8fccda3a794d 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -61,6 +61,7 @@ MODULE_ALIAS("rcupdate"); #define MODULE_PARAM_PREFIX "rcupdate." module_param(rcu_expedited, int, 0); +module_param(rcu_normal, int, 0); #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT) /** @@ -113,6 +114,17 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); #ifndef CONFIG_TINY_RCU +/* + * Should expedited grace-period primitives always fall back to their + * non-expedited counterparts? Intended for use within RCU. Note + * that if the user specifies both rcu_expedited and rcu_normal, then + * rcu_normal wins. + */ +bool rcu_gp_is_normal(void) +{ + return READ_ONCE(rcu_normal); +} + static atomic_t rcu_expedited_nesting = ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); -- cgit v1.2.3 From 8ba9153b2c3ab733d64e22adb57820ccb6afc496 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Sep 2015 07:55:41 -0700 Subject: rcu: Remove lock-acquisition loop from rcu_read_unlock_special() Several releases have come and gone without the warning triggering, so remove the lock-acquisition loop. Retain the WARN_ON_ONCE() out of sheer paranoia. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 57ba873d2f18..ae4ce2b665f8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -449,19 +449,13 @@ void rcu_read_unlock_special(struct task_struct *t) /* * Remove this task from the list it blocked on. The task - * now remains queued on the rcu_node corresponding to - * the CPU it first blocked on, so the first attempt to - * acquire the task's rcu_node's ->lock will succeed. - * Keep the loop and add a WARN_ON() out of sheer paranoia. + * now remains queued on the rcu_node corresponding to the + * CPU it first blocked on, so there is no longer any need + * to loop. Retain a WARN_ON_ONCE() out of sheer paranoia. */ - for (;;) { - rnp = t->rcu_blocked_node; - raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ - if (rnp == t->rcu_blocked_node) - break; - WARN_ON_ONCE(1); - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - } + rnp = t->rcu_blocked_node; + raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ + WARN_ON_ONCE(rnp != t->rcu_blocked_node); empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); empty_exp = sync_rcu_preempt_exp_done(rnp); smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ -- cgit v1.2.3 From 699d40352059e64a4d993af170272585c41988d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Sep 2015 08:47:49 -0700 Subject: rcu: Fix obsolete rcu_bootup_announce_oddness() comment This function no longer has #ifdefs, so this commit removes the header comment calling them out. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index ae4ce2b665f8..42df93721e6f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -63,8 +63,7 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ /* * Check the RCU kernel configuration parameters and print informative - * messages about anything out of the ordinary. If you like #ifdef, you - * will love this function. + * messages about anything out of the ordinary. */ static void __init rcu_bootup_announce_oddness(void) { -- cgit v1.2.3 From f0f2e7d307fff226e0c1df5a07101a1216a46d8a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 29 Sep 2015 08:59:32 -0700 Subject: rcu: Avoid tick_nohz_active checks on NOCBs CPUs Currently, rcu_prepare_for_idle() checks for tick_nohz_active, even on individual NOCBs CPUs, unless all CPUs are marked as NOCBs CPUs at build time. This check is pointless on NOCBs CPUs because they never have any callbacks posted, given that all of their callbacks are handed off to the corresponding rcuo kthread. There is a check for individually designated NOCBs CPUs, but it pointelessly follows the check for tick_nohz_active. This commit therefore moves the check for individually designated NOCBs CPUs up with the check for CONFIG_RCU_NOCB_CPU_ALL. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 42df93721e6f..8e9d4a4d0326 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1513,7 +1513,8 @@ static void rcu_prepare_for_idle(void) struct rcu_state *rsp; int tne; - if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || + rcu_is_nocb_cpu(smp_processor_id())) return; /* Handle nohz enablement switches conservatively. */ @@ -1527,10 +1528,6 @@ static void rcu_prepare_for_idle(void) if (!tne) return; - /* If this is a no-CBs CPU, no callbacks, just return. */ - if (rcu_is_nocb_cpu(smp_processor_id())) - return; - /* * If a non-lazy callback arrived at a CPU having only lazy * callbacks, invoke RCU core for the side-effect of recalculating -- cgit v1.2.3 From 46a5d164db53ba6066b11889abb7fa6bddbe5cf7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 7 Oct 2015 09:10:48 -0700 Subject: rcu: Stop disabling interrupts in scheduler fastpaths We need the scheduler's fastpaths to be, well, fast, and unnecessarily disabling and re-enabling interrupts is not necessarily consistent with this goal. Especially given that there are regions of the scheduler that already have interrupts disabled. This commit therefore moves the call to rcu_note_context_switch() to one of the interrupts-disabled regions of the scheduler, and removes the now-redundant disabling and re-enabling of interrupts from rcu_note_context_switch() and the functions it calls. Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney [ paulmck: Shift rcu_note_context_switch() to avoid deadlock, as suggested by Peter Zijlstra. ] --- include/linux/rcutree.h | 2 +- kernel/rcu/tree.c | 27 ++++++++++++--------------- kernel/rcu/tree_plugin.h | 14 ++++++-------- kernel/sched/core.c | 6 ++++-- 4 files changed, 23 insertions(+), 26 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 60d15a080d7c..9d3eda39bcd2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -37,7 +37,7 @@ void rcu_cpu_stall_reset(void); /* * Note a virtualization-based context switch. This is simply a * wrapper around rcu_note_context_switch(), which allows TINY_RCU - * to save a few bytes. + * to save a few bytes. The caller must have disabled interrupts. */ static inline void rcu_virt_note_context_switch(int cpu) { diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index ed3bc0578cc5..93941d3434ad 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -242,8 +242,6 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(void) { - unsigned long flags; - if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) return; trace_rcu_grace_period(TPS("rcu_sched"), @@ -252,13 +250,9 @@ void rcu_sched_qs(void) __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false); if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) return; - local_irq_save(flags); - if (__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) { - __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false); - rcu_report_exp_rdp(&rcu_sched_state, - this_cpu_ptr(&rcu_sched_data), true); - } - local_irq_restore(flags); + __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false); + rcu_report_exp_rdp(&rcu_sched_state, + this_cpu_ptr(&rcu_sched_data), true); } void rcu_bh_qs(void) @@ -295,17 +289,16 @@ EXPORT_PER_CPU_SYMBOL_GPL(rcu_qs_ctr); * We inform the RCU core by emulating a zero-duration dyntick-idle * period, which we in turn do by incrementing the ->dynticks counter * by two. + * + * The caller must have disabled interrupts. */ static void rcu_momentary_dyntick_idle(void) { - unsigned long flags; struct rcu_data *rdp; struct rcu_dynticks *rdtp; int resched_mask; struct rcu_state *rsp; - local_irq_save(flags); - /* * Yes, we can lose flag-setting operations. This is OK, because * the flag will be set again after some delay. @@ -335,13 +328,12 @@ static void rcu_momentary_dyntick_idle(void) smp_mb__after_atomic(); /* Later stuff after QS. */ break; } - local_irq_restore(flags); } /* * Note a context switch. This is a quiescent state for RCU-sched, * and requires special handling for preemptible RCU. - * The caller must have disabled preemption. + * The caller must have disabled interrupts. */ void rcu_note_context_switch(void) { @@ -371,9 +363,14 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); */ void rcu_all_qs(void) { + unsigned long flags; + barrier(); /* Avoid RCU read-side critical sections leaking down. */ - if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) + if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) { + local_irq_save(flags); rcu_momentary_dyntick_idle(); + local_irq_restore(flags); + } this_cpu_inc(rcu_qs_ctr); barrier(); /* Avoid RCU read-side critical sections leaking up. */ } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 8e9d4a4d0326..e6da888cc908 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -146,8 +146,8 @@ static void __init rcu_bootup_announce(void) * the corresponding expedited grace period will also be the end of the * normal grace period. */ -static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, - unsigned long flags) __releases(rnp->lock) +static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) + __releases(rnp->lock) /* But leaves rrupts disabled. */ { int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) + (rnp->exp_tasks ? RCU_EXP_TASKS : 0) + @@ -235,7 +235,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, rnp->gp_tasks = &t->rcu_node_entry; if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) rnp->exp_tasks = &t->rcu_node_entry; - raw_spin_unlock(&rnp->lock); + raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */ /* * Report the quiescent state for the expedited GP. This expedited @@ -250,7 +250,6 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp, } else { WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs); } - local_irq_restore(flags); } /* @@ -285,12 +284,11 @@ static void rcu_preempt_qs(void) * predating the current grace period drain, in other words, until * rnp->gp_tasks becomes NULL. * - * Caller must disable preemption. + * Caller must disable interrupts. */ static void rcu_preempt_note_context_switch(void) { struct task_struct *t = current; - unsigned long flags; struct rcu_data *rdp; struct rcu_node *rnp; @@ -300,7 +298,7 @@ static void rcu_preempt_note_context_switch(void) /* Possibly blocking in an RCU read-side critical section. */ rdp = this_cpu_ptr(rcu_state_p->rda); rnp = rdp->mynode; - raw_spin_lock_irqsave_rcu_node(rnp, flags); + raw_spin_lock_rcu_node(rnp); t->rcu_read_unlock_special.b.blocked = true; t->rcu_blocked_node = rnp; @@ -316,7 +314,7 @@ static void rcu_preempt_note_context_switch(void) (rnp->qsmask & rdp->grpmask) ? rnp->gpnum : rnp->gpnum + 1); - rcu_preempt_ctxt_queue(rnp, rdp, flags); + rcu_preempt_ctxt_queue(rnp, rdp); } else if (t->rcu_read_lock_nesting < 0 && t->rcu_read_unlock_special.s) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d568ac9319e..ec72de234feb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3085,7 +3085,6 @@ static void __sched notrace __schedule(bool preempt) cpu = smp_processor_id(); rq = cpu_rq(cpu); - rcu_note_context_switch(); prev = rq->curr; /* @@ -3104,13 +3103,16 @@ static void __sched notrace __schedule(bool preempt) if (sched_feat(HRTICK)) hrtick_clear(rq); + local_irq_disable(); + rcu_note_context_switch(); + /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) * done by the caller to avoid the race with signal_wake_up(). */ smp_mb__before_spinlock(); - raw_spin_lock_irq(&rq->lock); + raw_spin_lock(&rq->lock); lockdep_pin_lock(&rq->lock); rq->clock_skip_update <<= 1; /* promote REQ to ACT */ -- cgit v1.2.3 From a87f203e2731ab477386c678e59033ee103018c0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 20 Oct 2015 12:38:49 -0700 Subject: rcu: Eliminate unused rcu_init_one() argument Now that the rcu_state structure's ->rda field is compile-time initialized, there is no need to pass the per-CPU rcu_data structure into rcu_init_one(). This commit therefore eliminates this now-unused parameter. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 7 +++---- kernel/rcu/tree_plugin.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 93941d3434ad..9a4c8c0653ff 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4351,8 +4351,7 @@ static void __init rcu_init_levelspread(int *levelspread, const int *levelcnt) /* * Helper function for rcu_init() that initializes one rcu_state structure. */ -static void __init rcu_init_one(struct rcu_state *rsp, - struct rcu_data __percpu *rda) +static void __init rcu_init_one(struct rcu_state *rsp) { static const char * const buf[] = RCU_NODE_NAME_INIT; static const char * const fqs[] = RCU_FQS_NAME_INIT; @@ -4545,8 +4544,8 @@ void __init rcu_init(void) rcu_bootup_announce(); rcu_init_geometry(); - rcu_init_one(&rcu_bh_state, &rcu_bh_data); - rcu_init_one(&rcu_sched_state, &rcu_sched_data); + rcu_init_one(&rcu_bh_state); + rcu_init_one(&rcu_sched_state); if (dump_tree) rcu_dump_rcu_node_tree(&rcu_sched_state); __rcu_init_preempt(); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index e6da888cc908..fccef5d4b198 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -777,7 +777,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); */ static void __init __rcu_init_preempt(void) { - rcu_init_one(rcu_state_p, rcu_data_p); + rcu_init_one(rcu_state_p); } /* -- cgit v1.2.3