]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
rcu: Document GP init vs hotplug-scan ordering requirements
authorJoel Fernandes <joelagnelf@nvidia.com>
Tue, 15 Jul 2025 20:01:53 +0000 (16:01 -0400)
committerNeeraj Upadhyay <neeraj.iitr10@gmail.com>
Tue, 22 Jul 2025 11:39:35 +0000 (17:09 +0530)
Add detailed comments explaining the critical ordering constraints
during RCU grace period initialization, based on discussions with
Frederic.

Reviewed-by: "Paul E. McKenney" <paulmck@kernel.org>
Co-developed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Neeraj Upadhyay (AMD) <neeraj.upadhyay@kernel.org>
Documentation/RCU/Design/Requirements/Requirements.rst
kernel/rcu/tree.c

index 6125e7068d2ce9dc7fc7bb3d7758d8f5b1eef77a..771a1ce4c84d476774ba1fb4b593761f241dc0c0 100644 (file)
@@ -1970,6 +1970,47 @@ corresponding CPU's leaf node lock is held. This avoids race conditions
 between RCU's hotplug notifier hooks, the grace period initialization
 code, and the FQS loop, all of which refer to or modify this bookkeeping.
 
+Note that grace period initialization (rcu_gp_init()) must carefully sequence
+CPU hotplug scanning with grace period state changes. For example, the
+following race could occur in rcu_gp_init() if rcu_seq_start() were to happen
+after the CPU hotplug scanning.
+
+.. code-block:: none
+
+   CPU0 (rcu_gp_init)                   CPU1                          CPU2
+   ---------------------                ----                          ----
+   // Hotplug scan first (WRONG ORDER)
+   rcu_for_each_leaf_node(rnp) {
+       rnp->qsmaskinit = rnp->qsmaskinitnext;
+   }
+                                        rcutree_report_cpu_starting()
+                                            rnp->qsmaskinitnext |= mask;
+                                        rcu_read_lock()
+                                        r0 = *X;
+                                                                      r1 = *X;
+                                                                      X = NULL;
+                                                                      cookie = get_state_synchronize_rcu();
+                                                                      // cookie = 8 (future GP)
+   rcu_seq_start(&rcu_state.gp_seq);
+   // gp_seq = 5
+
+   // CPU1 now invisible to this GP!
+   rcu_for_each_node_breadth_first() {
+       rnp->qsmask = rnp->qsmaskinit;
+       // CPU1 not included!
+   }
+
+   // GP completes without CPU1
+   rcu_seq_end(&rcu_state.gp_seq);
+   // gp_seq = 8
+                                                                      poll_state_synchronize_rcu(cookie);
+                                                                      // Returns true!
+                                                                      kfree(r1);
+                                        r2 = *r0; // USE-AFTER-FREE!
+
+By incrementing gp_seq first, CPU1's RCU read-side critical section
+is guaranteed to not be missed by CPU2.
+
 Scheduler and RCU
 ~~~~~~~~~~~~~~~~~
 
index 8c22db75997875b7ecb6f74fc533f12240c7831b..384af6b5cf9c9dd66903f82252cc26a3d26f56bb 100644 (file)
@@ -1838,6 +1838,14 @@ static noinline_for_stack bool rcu_gp_init(void)
        start_new_poll = rcu_sr_normal_gp_init();
        /* Record GP times before starting GP, hence rcu_seq_start(). */
        old_gp_seq = rcu_state.gp_seq;
+       /*
+        * Critical ordering: rcu_seq_start() must happen BEFORE the CPU hotplug
+        * scan below. Otherwise we risk a race where a newly onlining CPU could
+        * be missed by the current grace period, potentially leading to
+        * use-after-free errors. For a detailed explanation of this race, see
+        * Documentation/RCU/Design/Requirements/Requirements.rst in the
+        * "Hotplug CPU" section.
+        */
        rcu_seq_start(&rcu_state.gp_seq);
        /* Ensure that rcu_seq_done_exact() guardband doesn't give false positives. */
        WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&