diff options
-rw-r--r-- | Documentation/RCU/listRCU.rst | 10 | ||||
-rw-r--r-- | Documentation/RCU/whatisRCU.rst | 3 | ||||
-rw-r--r-- | MAINTAINERS | 8 | ||||
-rw-r--r-- | kernel/rcu/rcu.h | 14 | ||||
-rw-r--r-- | kernel/rcu/rcuscale.c | 2 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 2 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 50 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_nocb.h | 10 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 4 | ||||
-rw-r--r-- | rust/kernel/sync/rcu.rs | 5 |
13 files changed, 54 insertions, 60 deletions
diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst index ed5c9d8c9afe..d8bb98623c12 100644 --- a/Documentation/RCU/listRCU.rst +++ b/Documentation/RCU/listRCU.rst @@ -334,7 +334,7 @@ If the system-call audit module were to ever need to reject stale data, one way to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the ``audit_entry`` structure, and modify audit_filter_task() as follows:: - static enum audit_state audit_filter_task(struct task_struct *tsk) + static struct audit_entry *audit_filter_task(struct task_struct *tsk, char **key) { struct audit_entry *e; enum audit_state state; @@ -346,16 +346,18 @@ to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to if (e->deleted) { spin_unlock(&e->lock); rcu_read_unlock(); - return AUDIT_BUILD_CONTEXT; + return NULL; } rcu_read_unlock(); if (state == AUDIT_STATE_RECORD) *key = kstrdup(e->rule.filterkey, GFP_ATOMIC); - return state; + /* As long as e->lock is held, e is valid and + * its value is not stale */ + return e; } } rcu_read_unlock(); - return AUDIT_BUILD_CONTEXT; + return NULL; } The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index 53faeed7c190..be2eb6be16ec 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -15,6 +15,9 @@ to start learning about RCU: | 2014 Big API Table https://lwn.net/Articles/609973/ | 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/ | 2019 Big API Table https://lwn.net/Articles/777165/ +| 7. The RCU API, 2024 Edition https://lwn.net/Articles/988638/ +| 2024 Background Information https://lwn.net/Articles/988641/ +| 2024 Big API Table https://lwn.net/Articles/988666/ For those preferring video: diff --git a/MAINTAINERS b/MAINTAINERS index 96b827049501..901a54a633f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1682,7 +1682,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org> M: Arve Hjønnevåg <arve@android.com> M: Todd Kjos <tkjos@android.com> M: Martijn Coenen <maco@android.com> -M: Joel Fernandes <joel@joelfernandes.org> +M: Joel Fernandes <joelagnelf@nvidia.com> M: Christian Brauner <christian@brauner.io> M: Carlos Llamas <cmllamas@google.com> M: Suren Baghdasaryan <surenb@google.com> @@ -13705,7 +13705,7 @@ M: Luc Maranget <luc.maranget@inria.fr> M: "Paul E. McKenney" <paulmck@kernel.org> R: Akira Yokosawa <akiyks@gmail.com> R: Daniel Lustig <dlustig@nvidia.com> -R: Joel Fernandes <joel@joelfernandes.org> +R: Joel Fernandes <joelagnelf@nvidia.com> L: linux-kernel@vger.kernel.org L: linux-arch@vger.kernel.org L: lkmm@lists.linux.dev @@ -20291,14 +20291,14 @@ READ-COPY UPDATE (RCU) M: "Paul E. McKenney" <paulmck@kernel.org> M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h) M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h) -M: Joel Fernandes <joel@joelfernandes.org> +M: Joel Fernandes <joelagnelf@nvidia.com> M: Josh Triplett <josh@joshtriplett.org> M: Boqun Feng <boqun.feng@gmail.com> M: Uladzislau Rezki <urezki@gmail.com> R: Steven Rostedt <rostedt@goodmis.org> R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> R: Lai Jiangshan <jiangshanlai@gmail.com> -R: Zqiang <qiang.zhang1211@gmail.com> +R: Zqiang <qiang.zhang@linux.dev> L: rcu@vger.kernel.org S: Supported W: http://www.rdrop.com/users/paulmck/RCU/ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 516b26024a37..9cf01832a6c3 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -57,6 +57,9 @@ /* Low-order bit definition for polled grace-period APIs. */ #define RCU_GET_STATE_COMPLETED 0x1 +/* A complete grace period count */ +#define RCU_SEQ_GP (RCU_SEQ_STATE_MASK + 1) + extern int sysctl_sched_rt_runtime; /* @@ -157,12 +160,21 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s) * Given a snapshot from rcu_seq_snap(), determine whether or not a * full update-side operation has occurred, but do not allow the * (ULONG_MAX / 2) safety-factor/guard-band. + * + * The token returned by get_state_synchronize_rcu_full() is based on + * rcu_state.gp_seq but it is tested in poll_state_synchronize_rcu_full() + * against the root rnp->gp_seq. Since rcu_seq_start() is first called + * on rcu_state.gp_seq and only later reflected on the root rnp->gp_seq, + * it is possible that rcu_seq_snap(rcu_state.gp_seq) returns 2 full grace + * periods ahead of the root rnp->gp_seq. To prevent false-positives with the + * full polling API that a wrap around instantly completed the GP, when nothing + * like that happened, adjust for the 2 GPs in the ULONG_CMP_LT(). */ static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s) { unsigned long cur_s = READ_ONCE(*sp); - return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (3 * RCU_SEQ_STATE_MASK + 1)); + return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_GP)); } /* diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 0f3059b1b80d..b521d0455992 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -762,7 +762,7 @@ kfree_scale_thread(void *arg) } for (i = 0; i < kfree_alloc_num; i++) { - alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL); + alloc_ptr = kcalloc(kfree_mult, sizeof(struct kfree_obj), GFP_KERNEL); if (!alloc_ptr) return -ENOMEM; diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 9a59b071501b..48047260697e 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1589,7 +1589,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) { if (cookie != SRCU_GET_STATE_COMPLETED && - !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie)) + !rcu_seq_done_exact(&ssp->srcu_sup->srcu_gp_seq, cookie)) return false; // Ensure that the end of the SRCU grace period happens before // any subsequent code that the caller might execute. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4f36a52d3783..e8a4b720d7d2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -831,6 +831,10 @@ static int rcu_watching_snap_save(struct rcu_data *rdp) return 0; } +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif + /* * Returns positive if the specified CPU has passed through a quiescent state * by virtue of being in or having passed through an dynticks idle state since @@ -966,9 +970,9 @@ static int rcu_watching_snap_recheck(struct rcu_data *rdp) rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu); rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu); rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); - rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu); - rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu); - rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu); + rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu); + rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu); + rsrp->nr_csw = nr_context_switches_cpu(cpu); rsrp->jiffies = jiffies; rsrp->gp_seq = rdp->gp_seq; } @@ -1090,38 +1094,6 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } -static void swake_up_one_online_ipi(void *arg) -{ - struct swait_queue_head *wqh = arg; - - swake_up_one(wqh); -} - -static void swake_up_one_online(struct swait_queue_head *wqh) -{ - int cpu = get_cpu(); - - /* - * If called from rcutree_report_cpu_starting(), wake up - * is dangerous that late in the CPU-down hotplug process. The - * scheduler might queue an ignored hrtimer. Defer the wake up - * to an online CPU instead. - */ - if (unlikely(cpu_is_offline(cpu))) { - int target; - - target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), - cpu_online_mask); - - smp_call_function_single(target, swake_up_one_online_ipi, - wqh, 0); - put_cpu(); - } else { - put_cpu(); - swake_up_one(wqh); - } -} - /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1146,7 +1118,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one_online(&rcu_state.gp_wq); + swake_up_one(&rcu_state.gp_wq); } /* @@ -1828,6 +1800,7 @@ static noinline_for_stack bool rcu_gp_init(void) struct rcu_data *rdp; struct rcu_node *rnp = rcu_get_root(); bool start_new_poll; + unsigned long old_gp_seq; WRITE_ONCE(rcu_state.gp_activity, jiffies); raw_spin_lock_irq_rcu_node(rnp); @@ -1855,7 +1828,12 @@ static noinline_for_stack bool rcu_gp_init(void) */ start_new_poll = rcu_sr_normal_gp_init(); /* Record GP times before starting GP, hence rcu_seq_start(). */ + old_gp_seq = rcu_state.gp_seq; rcu_seq_start(&rcu_state.gp_seq); + /* Ensure that rcu_seq_done_exact() guardband doesn't give false positives. */ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && + rcu_seq_done_exact(&old_gp_seq, rcu_seq_snap(&rcu_state.gp_seq))); + ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq); trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start")); rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 63bea388c243..3830c19cf2f6 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -168,7 +168,7 @@ struct rcu_snap_record { u64 cputime_irq; /* Accumulated cputime of hard irqs */ u64 cputime_softirq;/* Accumulated cputime of soft irqs */ u64 cputime_system; /* Accumulated cputime of kernel tasks */ - unsigned long nr_hardirqs; /* Accumulated number of hard irqs */ + u64 nr_hardirqs; /* Accumulated number of hard irqs */ unsigned int nr_softirqs; /* Accumulated number of soft irqs */ unsigned long long nr_csw; /* Accumulated number of task switches */ unsigned long jiffies; /* Track jiffies value */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8d4895c854c5..c36c7d5575ca 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -200,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, if (rnp->parent == NULL) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) - swake_up_one_online(&rcu_state.expedited_wq); + swake_up_one(&rcu_state.expedited_wq); break; } diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index fa269d34167a..1596812f7f12 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -216,7 +216,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp, raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); if (needwake) { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake")); - swake_up_one_online(&rdp_gp->nocb_gp_wq); + swake_up_one(&rdp_gp->nocb_gp_wq); } return needwake; @@ -554,19 +554,13 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, TPS("WakeLazy")); - } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) { + } else if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ rcu_nocb_unlock(rdp); wake_nocb_gp(rdp, false); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeEmpty")); } else { - /* - * Don't do the wake-up upfront on fragile paths. - * Also offline CPUs can't call swake_up_one_online() from - * (soft-)IRQs. Rely on the final deferred wake-up from - * rcutree_report_cpu_dead() - */ rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, TPS("WakeEmptyIsDeferred")); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3c0bbbbb686f..0b0f56f6abc8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -29,7 +29,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) || lockdep_is_held(&rdp->nocb_lock) || lockdep_is_held(&rcu_state.nocb_mutex) || - (!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) && + ((!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) || softirq_count()) && rdp == this_cpu_ptr(&rcu_data)) || rcu_current_is_nocb_kthread(rdp)), "Unsafe read of RCU_NOCB offloaded state" diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 925fcdad5dea..56b21219442b 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -435,8 +435,8 @@ static void print_cpu_stat_info(int cpu) rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu); pr_err("\t hardirqs softirqs csw/system\n"); - pr_err("\t number: %8ld %10d %12lld\n", - kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs, + pr_err("\t number: %8lld %10d %12lld\n", + kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs, kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs, nr_context_switches_cpu(cpu) - rsrp->nr_csw); pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n", diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs index b51d9150ffe2..a32bef6e490b 100644 --- a/rust/kernel/sync/rcu.rs +++ b/rust/kernel/sync/rcu.rs @@ -17,6 +17,7 @@ pub struct Guard(NotThreadSafe); impl Guard { /// Acquires the RCU read side lock and returns a guard. + #[inline] pub fn new() -> Self { // SAFETY: An FFI call with no additional requirements. unsafe { bindings::rcu_read_lock() }; @@ -25,16 +26,19 @@ impl Guard { } /// Explicitly releases the RCU read side lock. + #[inline] pub fn unlock(self) {} } impl Default for Guard { + #[inline] fn default() -> Self { Self::new() } } impl Drop for Guard { + #[inline] fn drop(&mut self) { // SAFETY: By the type invariants, the RCU read side is locked, so it is ok to unlock it. unsafe { bindings::rcu_read_unlock() }; @@ -42,6 +46,7 @@ impl Drop for Guard { } /// Acquires the RCU read side lock. +#[inline] pub fn read_lock() -> Guard { Guard::new() } |