rcu: Latch normal synchronize_rcu() path on flood

author Uladzislau Rezki (Sony) <urezki@gmail.com>

Wed, 11 Mar 2026 18:58:11 +0000 (19:58 +0100)

committer Uladzislau Rezki (Sony) <urezki@gmail.com>

Sun, 24 May 2026 07:40:08 +0000 (09:40 +0200)
author Uladzislau Rezki (Sony) <urezki@gmail.com>
Wed, 11 Mar 2026 18:58:11 +0000 (19:58 +0100)
committer Uladzislau Rezki (Sony) <urezki@gmail.com>
Sun, 24 May 2026 07:40:08 +0000 (09:40 +0200)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 4d0f545fb3ec5a1750d9112a851deb8fd976d32d..d5db2e85d55196cf85f8dc2a0da6be174309cb8a 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5862,13 +5862,13 @@ Kernel parameters
                         use a call_rcu[_hurry]() path. Please note, this is for a
                         normal grace period.
  
-                       How to enable it:
+                       How to disable it:
  
-                       echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
-                       or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1"
+                       echo 0 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
+                       or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=0"
  
-                       Default is 1 if num_possible_cpus() <= 16 and it is not explicitly
-                       disabled by the boot parameter passing 0.
+                       Default is 1 if it is not explicitly disabled by the boot parameter
+                       passing 0.
  
         rcuscale.gp_async= [KNL]
                         Measure performance of asynchronous
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 09f0cef5014c7c9028219797f919f51f05427f16..afb9e7db8f78064d5c4de1159f1d7cd4cd1ae35a 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1632,17 +1632,21 @@ static void rcu_sr_put_wait_head(struct llist_node *node)
         atomic_set_release(&sr_wn->inuse, 0);
  }
  
-/* Enable rcu_normal_wake_from_gp automatically on small systems. */
-#define WAKE_FROM_GP_CPU_THRESHOLD 16
-
-static int rcu_normal_wake_from_gp = -1;
+static int rcu_normal_wake_from_gp = 1;
  module_param(rcu_normal_wake_from_gp, int, 0644);
  static struct workqueue_struct *sync_wq;
  
+#define RCU_SR_NORMAL_LATCH_THR 64
+
+/* Number of in-flight synchronize_rcu() calls queued on srs_next. */
+static atomic_long_t rcu_sr_normal_count;
+static int rcu_sr_normal_latched; /* 0/1 */
+
  static void rcu_sr_normal_complete(struct llist_node *node)
  {
         struct rcu_synchronize *rs = container_of(
                 (struct rcu_head *) node, struct rcu_synchronize, head);
+       long nr;
  
         WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
                 !poll_state_synchronize_rcu_full(&rs->oldstate),
@@ -1650,6 +1654,15 @@ static void rcu_sr_normal_complete(struct llist_node *node)
  
         /* Finally. */
         complete(&rs->completion);
+       nr = atomic_long_dec_return(&rcu_sr_normal_count);
+       WARN_ON_ONCE(nr < 0);
+
+       /*
+        * Unlatch: switch back to normal path when fully
+        * drained and if it has been latched.
+        */
+       if (nr == 0)
+               (void)cmpxchg_relaxed(&rcu_sr_normal_latched, 1, 0);
  }
  
  static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
@@ -1795,6 +1808,24 @@ static bool rcu_sr_normal_gp_init(void)
  
  static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
  {
+       /*
+        * Increment before publish to avoid a complete
+        * vs enqueue race on latch.
+        */
+       long nr = atomic_long_inc_return(&rcu_sr_normal_count);
+
+       /*
+        * Latch when threshold is reached. Checking for an exact match
+        * restricts cmpxchg() to a single context.
+        *
+        * This latch is intentionally relaxed and best-effort. Concurrent
+        * set/clear can race and temporarily lose the latch, which is OK
+        * because it only selects between the fast and fallback paths.
+        */
+       if (nr == RCU_SR_NORMAL_LATCH_THR)
+               (void)cmpxchg_relaxed(&rcu_sr_normal_latched, 0, 1);
+
+       /* Publish for the GP kthread/worker. */
         llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
  }
  
@@ -3278,14 +3309,15 @@ static void synchronize_rcu_normal(void)
  {
         struct rcu_synchronize rs;
  
+       init_rcu_head_on_stack(&rs.head);
         trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
  
-       if (READ_ONCE(rcu_normal_wake_from_gp) < 1) {
+       if (READ_ONCE(rcu_normal_wake_from_gp) < 1 ||
+                       READ_ONCE(rcu_sr_normal_latched)) {
                 wait_rcu_gp(call_rcu_hurry);
                 goto trace_complete_out;
         }
  
-       init_rcu_head_on_stack(&rs.head);
         init_completion(&rs.completion);
  
         /*
@@ -3302,10 +3334,10 @@ static void synchronize_rcu_normal(void)
  
         /* Now we can wait. */
         wait_for_completion(&rs.completion);
-       destroy_rcu_head_on_stack(&rs.head);
  
  trace_complete_out:
         trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("complete"));
+       destroy_rcu_head_on_stack(&rs.head);
  }
  
  /**
@@ -4904,12 +4936,6 @@ void __init rcu_init(void)
         sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
         WARN_ON(!sync_wq);
  
-       /* Respect if explicitly disabled via a boot parameter. */
-       if (rcu_normal_wake_from_gp < 0) {
-               if (num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD)
-                       rcu_normal_wake_from_gp = 1;
-       }
-
         /* Fill in default value for rcutree.qovld boot parameter. */
         /* -After- the rcu_node ->lock fields are initialized! */
         if (qovld < 0)
author	Uladzislau Rezki (Sony) <urezki@gmail.com>
	Wed, 11 Mar 2026 18:58:11 +0000 (19:58 +0100)
committer	Uladzislau Rezki (Sony) <urezki@gmail.com>
	Sun, 24 May 2026 07:40:08 +0000 (09:40 +0200)
Documentation/admin-guide/kernel-parameters.txt		patch \| blob \| blame \| history
kernel/rcu/tree.c		patch \| blob \| blame \| history