From b8c17e6664c461e4aed545a943304c3b32dd309c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 Nov 2016 14:25:21 -0800 Subject: rcu: Maintain special bits at bottom of ->dynticks counter Currently, IPIs are used to force other CPUs to invalidate their TLBs in response to a kernel virtual-memory mapping change. This works, but degrades both battery lifetime (for idle CPUs) and real-time response (for nohz_full CPUs), and in addition results in unnecessary IPIs due to the fact that CPUs executing in usermode are unaffected by stale kernel mappings. It would be better to cause a CPU executing in usermode to wait until it is entering kernel mode to do the flush, first to avoid interrupting usemode tasks and second to handle multiple flush requests with a single flush in the case of a long-running user task. This commit therefore reserves a bit at the bottom of the ->dynticks counter, which is checked upon exit from extended quiescent states. If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is invoked, which, if not supplied, is an empty single-pass do-while loop. If this bottom bit is set on -entry- to an extended quiescent state, then a WARN_ON_ONCE() triggers. This bottom bit may be set using a new rcu_eqs_special_set() function, which returns true if the bit was set, or false if the CPU turned out to not be in an extended quiescent state. Please note that this function refuses to set the bit for a non-nohz_full CPU when that CPU is executing in usermode because usermode execution is tracked by RCU as a dyntick-idle extended quiescent state only for nohz_full CPUs. Reported-by: Andy Lutomirski Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcutiny.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/rcutiny.h') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index b452953e21c8..6c9d941e3962 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) return 0; } +static inline bool rcu_eqs_special_set(int cpu) +{ + return false; /* Never flag non-existent other CPUs! */ +} + static inline unsigned long get_state_synchronize_rcu(void) { return 0; -- cgit v1.2.3 From 900b1028ec388e50c98200641ae4274794c807cf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Feb 2017 14:32:54 -0800 Subject: srcu: Allow SRCU to access rcu_scheduler_active This is primarily a code-movement commit in preparation for allowing SRCU to handle early-boot SRCU grace periods. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 6 +++--- kernel/rcu/tiny_plugin.h | 9 +++++---- kernel/rcu/tree.c | 2 +- kernel/rcu/tree_exp.h | 12 ----------- kernel/rcu/update.c | 52 +++++++++++++++++++++++++++++++----------------- 5 files changed, 43 insertions(+), 38 deletions(-) (limited to 'include/linux/rcutiny.h') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6c9d941e3962..5219be250f00 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -217,14 +217,14 @@ static inline void exit_rcu(void) { } -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) extern int rcu_scheduler_active __read_mostly; void rcu_scheduler_starting(void); -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ static inline void rcu_scheduler_starting(void) { } -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index df3a60e19f07..371034e77f87 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -52,7 +52,7 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { RCU_TRACE(.name = "rcu_bh") }; -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) #include int rcu_scheduler_active __read_mostly; @@ -65,15 +65,16 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. * The reason for this is that Tiny RCU does not need kthreads, so does * not have to care about the fact that the scheduler is half-initialized - * at a certain phase of the boot process. + * at a certain phase of the boot process. Unless SRCU is in the mix. */ void __init rcu_scheduler_starting(void) { WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU) + ? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING; } -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ #ifdef CONFIG_RCU_TRACE diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8cc9d40b41ea..2380d1e3dfb8 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3974,7 +3974,7 @@ early_initcall(rcu_spawn_gp_kthread); * task is booting the system, and such primitives are no-ops). After this * function is called, any synchronous grace-period primitives are run as * expedited, with the requesting task driving the grace period forward. - * A later core_initcall() rcu_exp_runtime_mode() will switch to full + * A later core_initcall() rcu_set_runtime_mode() will switch to full * runtime RCU functionality. */ void rcu_scheduler_starting(void) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index a1f52bbe9db6..51ca287828a2 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -737,15 +737,3 @@ void synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ - -/* - * Switch to run-time mode once Tree RCU has fully initialized. - */ -static int __init rcu_exp_runtime_mode(void) -{ - rcu_test_sync_prims(); - rcu_scheduler_active = RCU_SCHEDULER_RUNNING; - rcu_test_sync_prims(); - return 0; -} -core_initcall(rcu_exp_runtime_mode); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 55c8530316c7..c5df0d756900 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); * non-expedited counterparts? Intended for use within RCU. Note * that if the user specifies both rcu_expedited and rcu_normal, then * rcu_normal wins. (Except during the time period during boot from - * when the first task is spawned until the rcu_exp_runtime_mode() + * when the first task is spawned until the rcu_set_runtime_mode() * core_initcall() is invoked, at which point everything is expedited.) */ bool rcu_gp_is_normal(void) @@ -190,6 +190,39 @@ void rcu_end_inkernel_boot(void) #endif /* #ifndef CONFIG_TINY_RCU */ +/* + * Test each non-SRCU synchronous grace-period wait API. This is + * useful just after a change in mode for these primitives, and + * during early boot. + */ +void rcu_test_sync_prims(void) +{ + if (!IS_ENABLED(CONFIG_PROVE_RCU)) + return; + synchronize_rcu(); + synchronize_rcu_bh(); + synchronize_sched(); + synchronize_rcu_expedited(); + synchronize_rcu_bh_expedited(); + synchronize_sched_expedited(); +} + +#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) + +/* + * Switch to run-time mode once RCU has fully initialized. + */ +static int __init rcu_set_runtime_mode(void) +{ + rcu_test_sync_prims(); + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; + rcu_test_sync_prims(); + return 0; +} +core_initcall(rcu_set_runtime_mode); + +#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */ + #ifdef CONFIG_PREEMPT_RCU /* @@ -817,23 +850,6 @@ static void rcu_spawn_tasks_kthread(void) #endif /* #ifdef CONFIG_TASKS_RCU */ -/* - * Test each non-SRCU synchronous grace-period wait API. This is - * useful just after a change in mode for these primitives, and - * during early boot. - */ -void rcu_test_sync_prims(void) -{ - if (!IS_ENABLED(CONFIG_PROVE_RCU)) - return; - synchronize_rcu(); - synchronize_rcu_bh(); - synchronize_sched(); - synchronize_rcu_expedited(); - synchronize_rcu_bh_expedited(); - synchronize_sched_expedited(); -} - #ifdef CONFIG_PROVE_RCU /* -- cgit v1.2.3 From bcbfdd01dce5556a952fae84ef16fd0f12525e7b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Apr 2017 15:50:41 -0700 Subject: rcu: Make non-preemptive schedule be Tasks RCU quiescent state Currently, a call to schedule() acts as a Tasks RCU quiescent state only if a context switch actually takes place. However, just the call to schedule() guarantees that the calling task has moved off of whatever tracing trampoline that it might have been one previously. This commit therefore plumbs schedule()'s "preempt" parameter into rcu_note_context_switch(), which then records the Tasks RCU quiescent state, but only if this call to schedule() was -not- due to a preemption. To avoid adding overhead to the common-case context-switch path, this commit hides the rcu_note_context_switch() check under an existing non-common-case check. Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 11 ++++++++--- include/linux/rcutiny.h | 13 +++++++++---- include/linux/rcutree.h | 5 +++-- kernel/rcu/tree.c | 22 +++++++++++++++++++++- kernel/rcu/update.c | 1 + kernel/sched/core.c | 2 +- 6 files changed, 43 insertions(+), 11 deletions(-) (limited to 'include/linux/rcutiny.h') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e6146d0074f8..f531b29207da 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -363,15 +363,20 @@ static inline void rcu_init_nohz(void) #ifdef CONFIG_TASKS_RCU #define TASKS_RCU(x) x extern struct srcu_struct tasks_rcu_exit_srcu; -#define rcu_note_voluntary_context_switch(t) \ +#define rcu_note_voluntary_context_switch_lite(t) \ do { \ - rcu_all_qs(); \ if (READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) +#define rcu_note_voluntary_context_switch(t) \ + do { \ + rcu_all_qs(); \ + rcu_note_voluntary_context_switch_lite(t); \ + } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) -#define rcu_note_voluntary_context_switch(t) rcu_all_qs() +#define rcu_note_voluntary_context_switch_lite(t) do { } while (0) +#define rcu_note_voluntary_context_switch(t) rcu_all_qs() #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5219be250f00..74d9c3a1feee 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -92,10 +92,11 @@ static inline void kfree_call_rcu(struct rcu_head *head, call_rcu(head, func); } -static inline void rcu_note_context_switch(void) -{ - rcu_sched_qs(); -} +#define rcu_note_context_switch(preempt) \ + do { \ + rcu_sched_qs(); \ + rcu_note_voluntary_context_switch_lite(current); \ + } while (0) /* * Take advantage of the fact that there is only one CPU, which @@ -242,6 +243,10 @@ static inline bool rcu_is_watching(void) #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ +static inline void rcu_request_urgent_qs_task(struct task_struct *t) +{ +} + static inline void rcu_all_qs(void) { barrier(); /* Avoid RCU read-side critical sections leaking across. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 63a4e4cf40a5..0bacb6b2af69 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,7 +30,7 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -void rcu_note_context_switch(void); +void rcu_note_context_switch(bool preempt); int rcu_needs_cpu(u64 basem, u64 *nextevt); void rcu_cpu_stall_reset(void); @@ -41,7 +41,7 @@ void rcu_cpu_stall_reset(void); */ static inline void rcu_virt_note_context_switch(int cpu) { - rcu_note_context_switch(); + rcu_note_context_switch(false); } void synchronize_rcu_bh(void); @@ -108,6 +108,7 @@ void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; bool rcu_is_watching(void); +void rcu_request_urgent_qs_task(struct task_struct *t); void rcu_all_qs(void); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3c23435d2083..891d97109e09 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -458,7 +458,7 @@ static void rcu_momentary_dyntick_idle(void) * and requires special handling for preemptible RCU. * The caller must have disabled interrupts. */ -void rcu_note_context_switch(void) +void rcu_note_context_switch(bool preempt) { barrier(); /* Avoid RCU read-side critical sections leaking down. */ trace_rcu_utilization(TPS("Start context switch")); @@ -471,6 +471,8 @@ void rcu_note_context_switch(void) if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) rcu_momentary_dyntick_idle(); this_cpu_inc(rcu_dynticks.rcu_qs_ctr); + if (!preempt) + rcu_note_voluntary_context_switch_lite(current); out: trace_rcu_utilization(TPS("End context switch")); barrier(); /* Avoid RCU read-side critical sections leaking up. */ @@ -1149,6 +1151,24 @@ bool notrace rcu_is_watching(void) } EXPORT_SYMBOL_GPL(rcu_is_watching); +/* + * If a holdout task is actually running, request an urgent quiescent + * state from its CPU. This is unsynchronized, so migrations can cause + * the request to go to the wrong CPU. Which is OK, all that will happen + * is that the CPU's next context switch will be a bit slower and next + * time around this task will generate another request. + */ +void rcu_request_urgent_qs_task(struct task_struct *t) +{ + int cpu; + + barrier(); + cpu = task_cpu(t); + if (!task_curr(t)) + return; /* This task is not running on that CPU. */ + smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true); +} + #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index c5df0d756900..273e869ca21d 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -665,6 +665,7 @@ static void check_holdout_task(struct task_struct *t, put_task_struct(t); return; } + rcu_request_urgent_qs_task(t); if (!needreport) return; if (*firstreport) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3b31fc05a0f1..2adf7b6c04e7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3378,7 +3378,7 @@ static void __sched notrace __schedule(bool preempt) hrtick_clear(rq); local_irq_disable(); - rcu_note_context_switch(); + rcu_note_context_switch(preempt); /* * Make sure that signal_pending_state()->signal_pending() below -- cgit v1.2.3