summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/listRCU.rst10
-rw-r--r--Documentation/RCU/whatisRCU.rst3
-rw-r--r--MAINTAINERS8
-rw-r--r--kernel/rcu/rcu.h14
-rw-r--r--kernel/rcu/rcuscale.c2
-rw-r--r--kernel/rcu/srcutree.c2
-rw-r--r--kernel/rcu/tree.c50
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_nocb.h10
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/rcu/tree_stall.h4
-rw-r--r--rust/kernel/sync/rcu.rs5
13 files changed, 54 insertions, 60 deletions
diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst
index ed5c9d8c9afe..d8bb98623c12 100644
--- a/Documentation/RCU/listRCU.rst
+++ b/Documentation/RCU/listRCU.rst
@@ -334,7 +334,7 @@ If the system-call audit module were to ever need to reject stale data, one way
to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the
``audit_entry`` structure, and modify audit_filter_task() as follows::
- static enum audit_state audit_filter_task(struct task_struct *tsk)
+ static struct audit_entry *audit_filter_task(struct task_struct *tsk, char **key)
{
struct audit_entry *e;
enum audit_state state;
@@ -346,16 +346,18 @@ to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to
if (e->deleted) {
spin_unlock(&e->lock);
rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
+ return NULL;
}
rcu_read_unlock();
if (state == AUDIT_STATE_RECORD)
*key = kstrdup(e->rule.filterkey, GFP_ATOMIC);
- return state;
+ /* As long as e->lock is held, e is valid and
+ * its value is not stale */
+ return e;
}
}
rcu_read_unlock();
- return AUDIT_BUILD_CONTEXT;
+ return NULL;
}
The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 53faeed7c190..be2eb6be16ec 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -15,6 +15,9 @@ to start learning about RCU:
| 2014 Big API Table https://lwn.net/Articles/609973/
| 6. The RCU API, 2019 Edition https://lwn.net/Articles/777036/
| 2019 Big API Table https://lwn.net/Articles/777165/
+| 7. The RCU API, 2024 Edition https://lwn.net/Articles/988638/
+| 2024 Background Information https://lwn.net/Articles/988641/
+| 2024 Big API Table https://lwn.net/Articles/988666/
For those preferring video:
diff --git a/MAINTAINERS b/MAINTAINERS
index 96b827049501..901a54a633f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1682,7 +1682,7 @@ M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
M: Arve Hjønnevåg <arve@android.com>
M: Todd Kjos <tkjos@android.com>
M: Martijn Coenen <maco@android.com>
-M: Joel Fernandes <joel@joelfernandes.org>
+M: Joel Fernandes <joelagnelf@nvidia.com>
M: Christian Brauner <christian@brauner.io>
M: Carlos Llamas <cmllamas@google.com>
M: Suren Baghdasaryan <surenb@google.com>
@@ -13705,7 +13705,7 @@ M: Luc Maranget <luc.maranget@inria.fr>
M: "Paul E. McKenney" <paulmck@kernel.org>
R: Akira Yokosawa <akiyks@gmail.com>
R: Daniel Lustig <dlustig@nvidia.com>
-R: Joel Fernandes <joel@joelfernandes.org>
+R: Joel Fernandes <joelagnelf@nvidia.com>
L: linux-kernel@vger.kernel.org
L: linux-arch@vger.kernel.org
L: lkmm@lists.linux.dev
@@ -20291,14 +20291,14 @@ READ-COPY UPDATE (RCU)
M: "Paul E. McKenney" <paulmck@kernel.org>
M: Frederic Weisbecker <frederic@kernel.org> (kernel/rcu/tree_nocb.h)
M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h)
-M: Joel Fernandes <joel@joelfernandes.org>
+M: Joel Fernandes <joelagnelf@nvidia.com>
M: Josh Triplett <josh@joshtriplett.org>
M: Boqun Feng <boqun.feng@gmail.com>
M: Uladzislau Rezki <urezki@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com>
-R: Zqiang <qiang.zhang1211@gmail.com>
+R: Zqiang <qiang.zhang@linux.dev>
L: rcu@vger.kernel.org
S: Supported
W: http://www.rdrop.com/users/paulmck/RCU/
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 516b26024a37..9cf01832a6c3 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -57,6 +57,9 @@
/* Low-order bit definition for polled grace-period APIs. */
#define RCU_GET_STATE_COMPLETED 0x1
+/* A complete grace period count */
+#define RCU_SEQ_GP (RCU_SEQ_STATE_MASK + 1)
+
extern int sysctl_sched_rt_runtime;
/*
@@ -157,12 +160,21 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
* Given a snapshot from rcu_seq_snap(), determine whether or not a
* full update-side operation has occurred, but do not allow the
* (ULONG_MAX / 2) safety-factor/guard-band.
+ *
+ * The token returned by get_state_synchronize_rcu_full() is based on
+ * rcu_state.gp_seq but it is tested in poll_state_synchronize_rcu_full()
+ * against the root rnp->gp_seq. Since rcu_seq_start() is first called
+ * on rcu_state.gp_seq and only later reflected on the root rnp->gp_seq,
+ * it is possible that rcu_seq_snap(rcu_state.gp_seq) returns 2 full grace
+ * periods ahead of the root rnp->gp_seq. To prevent false-positives with the
+ * full polling API that a wrap around instantly completed the GP, when nothing
+ * like that happened, adjust for the 2 GPs in the ULONG_CMP_LT().
*/
static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s)
{
unsigned long cur_s = READ_ONCE(*sp);
- return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (3 * RCU_SEQ_STATE_MASK + 1));
+ return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_GP));
}
/*
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 0f3059b1b80d..b521d0455992 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -762,7 +762,7 @@ kfree_scale_thread(void *arg)
}
for (i = 0; i < kfree_alloc_num; i++) {
- alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
+ alloc_ptr = kcalloc(kfree_mult, sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
return -ENOMEM;
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9a59b071501b..48047260697e 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1589,7 +1589,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{
if (cookie != SRCU_GET_STATE_COMPLETED &&
- !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
+ !rcu_seq_done_exact(&ssp->srcu_sup->srcu_gp_seq, cookie))
return false;
// Ensure that the end of the SRCU grace period happens before
// any subsequent code that the caller might execute.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4f36a52d3783..e8a4b720d7d2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -831,6 +831,10 @@ static int rcu_watching_snap_save(struct rcu_data *rdp)
return 0;
}
+#ifndef arch_irq_stat_cpu
+#define arch_irq_stat_cpu(cpu) 0
+#endif
+
/*
* Returns positive if the specified CPU has passed through a quiescent state
* by virtue of being in or having passed through an dynticks idle state since
@@ -966,9 +970,9 @@ static int rcu_watching_snap_recheck(struct rcu_data *rdp)
rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
- rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
- rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
- rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
+ rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu);
+ rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu);
+ rsrp->nr_csw = nr_context_switches_cpu(cpu);
rsrp->jiffies = jiffies;
rsrp->gp_seq = rdp->gp_seq;
}
@@ -1090,38 +1094,6 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
return needmore;
}
-static void swake_up_one_online_ipi(void *arg)
-{
- struct swait_queue_head *wqh = arg;
-
- swake_up_one(wqh);
-}
-
-static void swake_up_one_online(struct swait_queue_head *wqh)
-{
- int cpu = get_cpu();
-
- /*
- * If called from rcutree_report_cpu_starting(), wake up
- * is dangerous that late in the CPU-down hotplug process. The
- * scheduler might queue an ignored hrtimer. Defer the wake up
- * to an online CPU instead.
- */
- if (unlikely(cpu_is_offline(cpu))) {
- int target;
-
- target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU),
- cpu_online_mask);
-
- smp_call_function_single(target, swake_up_one_online_ipi,
- wqh, 0);
- put_cpu();
- } else {
- put_cpu();
- swake_up_one(wqh);
- }
-}
-
/*
* Awaken the grace-period kthread. Don't do a self-awaken (unless in an
* interrupt or softirq handler, in which case we just might immediately
@@ -1146,7 +1118,7 @@ static void rcu_gp_kthread_wake(void)
return;
WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
- swake_up_one_online(&rcu_state.gp_wq);
+ swake_up_one(&rcu_state.gp_wq);
}
/*
@@ -1828,6 +1800,7 @@ static noinline_for_stack bool rcu_gp_init(void)
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root();
bool start_new_poll;
+ unsigned long old_gp_seq;
WRITE_ONCE(rcu_state.gp_activity, jiffies);
raw_spin_lock_irq_rcu_node(rnp);
@@ -1855,7 +1828,12 @@ static noinline_for_stack bool rcu_gp_init(void)
*/
start_new_poll = rcu_sr_normal_gp_init();
/* Record GP times before starting GP, hence rcu_seq_start(). */
+ old_gp_seq = rcu_state.gp_seq;
rcu_seq_start(&rcu_state.gp_seq);
+ /* Ensure that rcu_seq_done_exact() guardband doesn't give false positives. */
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
+ rcu_seq_done_exact(&old_gp_seq, rcu_seq_snap(&rcu_state.gp_seq)));
+
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 63bea388c243..3830c19cf2f6 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -168,7 +168,7 @@ struct rcu_snap_record {
u64 cputime_irq; /* Accumulated cputime of hard irqs */
u64 cputime_softirq;/* Accumulated cputime of soft irqs */
u64 cputime_system; /* Accumulated cputime of kernel tasks */
- unsigned long nr_hardirqs; /* Accumulated number of hard irqs */
+ u64 nr_hardirqs; /* Accumulated number of hard irqs */
unsigned int nr_softirqs; /* Accumulated number of soft irqs */
unsigned long long nr_csw; /* Accumulated number of task switches */
unsigned long jiffies; /* Track jiffies value */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 8d4895c854c5..c36c7d5575ca 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -200,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (wake)
- swake_up_one_online(&rcu_state.expedited_wq);
+ swake_up_one(&rcu_state.expedited_wq);
break;
}
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index fa269d34167a..1596812f7f12 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -216,7 +216,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
if (needwake) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
- swake_up_one_online(&rdp_gp->nocb_gp_wq);
+ swake_up_one(&rdp_gp->nocb_gp_wq);
}
return needwake;
@@ -554,19 +554,13 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
- } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) {
+ } else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
- /*
- * Don't do the wake-up upfront on fragile paths.
- * Also offline CPUs can't call swake_up_one_online() from
- * (soft-)IRQs. Rely on the final deferred wake-up from
- * rcutree_report_cpu_dead()
- */
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3c0bbbbb686f..0b0f56f6abc8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -29,7 +29,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
lockdep_is_held(&rdp->nocb_lock) ||
lockdep_is_held(&rcu_state.nocb_mutex) ||
- (!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) &&
+ ((!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) || softirq_count()) &&
rdp == this_cpu_ptr(&rcu_data)) ||
rcu_current_is_nocb_kthread(rdp)),
"Unsafe read of RCU_NOCB offloaded state"
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 925fcdad5dea..56b21219442b 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -435,8 +435,8 @@ static void print_cpu_stat_info(int cpu)
rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
pr_err("\t hardirqs softirqs csw/system\n");
- pr_err("\t number: %8ld %10d %12lld\n",
- kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs,
+ pr_err("\t number: %8lld %10d %12lld\n",
+ kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs,
kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs,
nr_context_switches_cpu(cpu) - rsrp->nr_csw);
pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n",
diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs
index b51d9150ffe2..a32bef6e490b 100644
--- a/rust/kernel/sync/rcu.rs
+++ b/rust/kernel/sync/rcu.rs
@@ -17,6 +17,7 @@ pub struct Guard(NotThreadSafe);
impl Guard {
/// Acquires the RCU read side lock and returns a guard.
+ #[inline]
pub fn new() -> Self {
// SAFETY: An FFI call with no additional requirements.
unsafe { bindings::rcu_read_lock() };
@@ -25,16 +26,19 @@ impl Guard {
}
/// Explicitly releases the RCU read side lock.
+ #[inline]
pub fn unlock(self) {}
}
impl Default for Guard {
+ #[inline]
fn default() -> Self {
Self::new()
}
}
impl Drop for Guard {
+ #[inline]
fn drop(&mut self) {
// SAFETY: By the type invariants, the RCU read side is locked, so it is ok to unlock it.
unsafe { bindings::rcu_read_unlock() };
@@ -42,6 +46,7 @@ impl Drop for Guard {
}
/// Acquires the RCU read side lock.
+#[inline]
pub fn read_lock() -> Guard {
Guard::new()
}