rcutorture: Perform more frequent testing of ->gpwrap

author Joel Fernandes <joelagnelf@nvidia.com>

Sun, 16 Feb 2025 11:52:20 +0000 (06:52 -0500)

committer Joel Fernandes <joelagnelf@nvidia.com>

Fri, 16 May 2025 15:13:27 +0000 (11:13 -0400)
author Joel Fernandes <joelagnelf@nvidia.com>
Sun, 16 Feb 2025 11:52:20 +0000 (06:52 -0500)
committer Joel Fernandes <joelagnelf@nvidia.com>
Fri, 16 May 2025 15:13:27 +0000 (11:13 -0400)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index 76e538c77e31615c9cd6a650bb8a72ead14b8008..50d90b2542d1d691e8e8969454f639a7a1e9a2e4 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5657,6 +5657,31 @@
                         are zero, rcutorture acts as if is interpreted
                         they are all non-zero.
  
+       rcutorture.gpwrap_lag= [KNL]
+                       Enable grace-period wrap lag testing. Setting
+                       to false prevents the gpwrap lag test from
+                       running. Default is true.
+
+       rcutorture.gpwrap_lag_gps= [KNL]
+                       Set the value for grace-period wrap lag during
+                       active lag testing periods. This controls how many
+                       grace periods differences we tolerate between
+                       rdp and rnp's gp_seq before setting overflow flag.
+                       The default is always set to 8.
+
+       rcutorture.gpwrap_lag_cycle_mins= [KNL]
+                       Set the total cycle duration for gpwrap lag
+                       testing in minutes. This is the total time for
+                       one complete cycle of active and inactive
+                       testing periods. Default is 30 minutes.
+
+       rcutorture.gpwrap_lag_active_mins= [KNL]
+                       Set the duration for which gpwrap lag is active
+                       within each cycle, in minutes. During this time,
+                       the grace-period wrap lag will be set to the
+                       value specified by gpwrap_lag_gps. Default is
+                       5 minutes.
+
         rcutorture.irqreader= [KNL]
                         Run RCU readers from irq handlers, or, more
                         accurately, from a timer handler.  Not all RCU
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h

index eed2951a4962d5cdf45c200a4900aba0d04d20dd..516b26024a37154fa0e30b32943bcf8f1bc6c206 100644 (file)
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -572,6 +572,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
                                unsigned long c_old,
                                unsigned long c);
  void rcu_gp_set_torture_wait(int duration);
+void rcu_set_gpwrap_lag(unsigned long lag);
+int rcu_get_gpwrap_count(int cpu);
  #else
  static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
  {
@@ -589,6 +591,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
         do { } while (0)
  #endif
  static inline void rcu_gp_set_torture_wait(int duration) { }
+static inline void rcu_set_gpwrap_lag(unsigned long lag) { }
+static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
  #endif
  unsigned long long rcutorture_gather_gp_seqs(void);
  void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c

index 393bb5790e0f987c60798f4cf46b593ee27aa59b..b5db1ac32a3dc954dc597827a4e871594e802c96 100644 (file)
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -115,6 +115,10 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
  torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
  torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
  torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(bool, gpwrap_lag, true, "Enable grace-period wrap lag testing");
+torture_param(int, gpwrap_lag_gps, 8, "Value to set for set_gpwrap_lag during an active testing period.");
+torture_param(int, gpwrap_lag_cycle_mins, 30, "Total cycle duration for gpwrap lag testing (in minutes)");
+torture_param(int, gpwrap_lag_active_mins, 5, "Duration for which gpwrap lag is active within each cycle (in minutes)");
  torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
  torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
  torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
@@ -413,6 +417,8 @@ struct rcu_torture_ops {
         bool (*reader_blocked)(void);
         unsigned long long (*gather_gp_seqs)(void);
         void (*format_gp_seqs)(unsigned long long seqs, char *cp, size_t len);
+       void (*set_gpwrap_lag)(unsigned long lag);
+       int (*get_gpwrap_count)(int cpu);
         long cbflood_max;
         int irq_capable;
         int can_boost;
@@ -619,6 +625,8 @@ static struct rcu_torture_ops rcu_ops = {
                                   : NULL,
         .gather_gp_seqs         = rcutorture_gather_gp_seqs,
         .format_gp_seqs         = rcutorture_format_gp_seqs,
+       .set_gpwrap_lag         = rcu_set_gpwrap_lag,
+       .get_gpwrap_count       = rcu_get_gpwrap_count,
         .irq_capable            = 1,
         .can_boost              = IS_ENABLED(CONFIG_RCU_BOOST),
         .extendables            = RCUTORTURE_MAX_EXTEND,
@@ -2432,6 +2440,7 @@ rcu_torture_stats_print(void)
         int i;
         long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
         long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+       long n_gpwraps = 0;
         struct rcu_torture *rtcp;
         static unsigned long rtcv_snap = ULONG_MAX;
         static bool splatted;
@@ -2442,6 +2451,8 @@ rcu_torture_stats_print(void)
                         pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
                         batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
                 }
+               if (cur_ops->get_gpwrap_count)
+                       n_gpwraps += cur_ops->get_gpwrap_count(cpu);
         }
         for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
                 if (pipesummary[i] != 0)
@@ -2473,8 +2484,9 @@ rcu_torture_stats_print(void)
                 data_race(n_barrier_attempts),
                 data_race(n_rcu_torture_barrier_error));
         pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
-       pr_cont("nocb-toggles: %ld:%ld\n",
+       pr_cont("nocb-toggles: %ld:%ld ",
                 atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
+       pr_cont("gpwraps: %ld\n", n_gpwraps);
  
         pr_alert("%s%s ", torture_type, TORTURE_FLAG);
         if (atomic_read(&n_rcu_torture_mberror) ||
@@ -3645,6 +3657,57 @@ static int rcu_torture_preempt(void *unused)
  
  static enum cpuhp_state rcutor_hp;
  
+static struct hrtimer gpwrap_lag_timer;
+static bool gpwrap_lag_active;
+
+/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
+static enum hrtimer_restart rcu_gpwrap_lag_timer(struct hrtimer *timer)
+{
+       ktime_t next_delay;
+
+       if (gpwrap_lag_active) {
+               pr_alert("rcu-torture: Disabling gpwrap lag (value=0)\n");
+               cur_ops->set_gpwrap_lag(0);
+               gpwrap_lag_active = false;
+               next_delay = ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0);
+       } else {
+               pr_alert("rcu-torture: Enabling gpwrap lag (value=%d)\n", gpwrap_lag_gps);
+               cur_ops->set_gpwrap_lag(gpwrap_lag_gps);
+               gpwrap_lag_active = true;
+               next_delay = ktime_set(gpwrap_lag_active_mins * 60, 0);
+       }
+
+       if (torture_must_stop_irq())
+               return HRTIMER_NORESTART;
+
+       hrtimer_forward_now(timer, next_delay);
+       return HRTIMER_RESTART;
+}
+
+static int rcu_gpwrap_lag_init(void)
+{
+       if (!gpwrap_lag)
+               return 0;
+
+       if (gpwrap_lag_cycle_mins <= 0 || gpwrap_lag_active_mins <= 0) {
+               pr_alert("rcu-torture: lag timing parameters must be positive\n");
+               return -EINVAL;
+       }
+
+       hrtimer_setup(&gpwrap_lag_timer, rcu_gpwrap_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       gpwrap_lag_active = false;
+       hrtimer_start(&gpwrap_lag_timer,
+                     ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0), HRTIMER_MODE_REL);
+
+       return 0;
+}
+
+static void rcu_gpwrap_lag_cleanup(void)
+{
+       hrtimer_cancel(&gpwrap_lag_timer);
+       cur_ops->set_gpwrap_lag(0);
+       gpwrap_lag_active = false;
+}
  static void
  rcu_torture_cleanup(void)
  {
@@ -3814,6 +3877,9 @@ rcu_torture_cleanup(void)
         torture_cleanup_end();
         if (cur_ops->gp_slow_unregister)
                 cur_ops->gp_slow_unregister(NULL);
+
+       if (gpwrap_lag && cur_ops->set_gpwrap_lag)
+               rcu_gpwrap_lag_cleanup();
  }
  
  static void rcu_torture_leak_cb(struct rcu_head *rhp)
@@ -4310,9 +4376,17 @@ rcu_torture_init(void)
         }
         if (object_debug)
                 rcu_test_debug_objects();
-       torture_init_end();
+
         if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
                 cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
+
+       if (gpwrap_lag && cur_ops->set_gpwrap_lag) {
+               firsterr = rcu_gpwrap_lag_init();
+               if (torture_init_error(firsterr))
+                       goto unwind;
+       }
+
+       torture_init_end();
         return 0;
  
  unwind:
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 659f83e7104869a8e03c2f52086d6609cbc94b72..4f36a52d37839e3ef303d23c09689c8cb3c3def1 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
         .gpwrap = true,
  };
+
+int rcu_get_gpwrap_count(int cpu)
+{
+       struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+       return READ_ONCE(rdp->gpwrap_count);
+}
+EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count);
+
  static struct rcu_state rcu_state = {
         .level = { &rcu_state.node[0] },
         .gp_state = RCU_GP_IDLE,
@@ -757,6 +766,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
         smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
  }
  
+static unsigned long seq_gpwrap_lag = ULONG_MAX / 4;
+
+/**
+ * rcu_set_gpwrap_lag - Set RCU GP sequence overflow lag value.
+ * @lag_gps: Set overflow lag to this many grace period worth of counters
+ * which is used by rcutorture to quickly force a gpwrap situation.
+ * @lag_gps = 0 means we reset it back to the boot-time value.
+ */
+void rcu_set_gpwrap_lag(unsigned long lag_gps)
+{
+       unsigned long lag_seq_count;
+
+       lag_seq_count = (lag_gps == 0)
+                       ? ULONG_MAX / 4
+                       : lag_gps << RCU_SEQ_CTR_SHIFT;
+       WRITE_ONCE(seq_gpwrap_lag, lag_seq_count);
+}
+EXPORT_SYMBOL_GPL(rcu_set_gpwrap_lag);
+
  /*
   * When trying to report a quiescent state on behalf of some other CPU,
   * it is our responsibility to check for and handle potential overflow
@@ -767,9 +795,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
  static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
  {
         raw_lockdep_assert_held_rcu_node(rnp);
-       if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
-                        rnp->gp_seq))
+       if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_gpwrap_lag,
+                        rnp->gp_seq)) {
                 WRITE_ONCE(rdp->gpwrap, true);
+               WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1);
+       }
         if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
                 rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
  }
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h

index a9a811d9d7a372abc80dc64e7cf3808e1e8f14fd..63bea388c24332067e94ebd3441e7645d5a53ace 100644 (file)
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -183,6 +183,7 @@ struct rcu_data {
         bool            core_needs_qs;  /* Core waits for quiescent state. */
         bool            beenonline;     /* CPU online at least once. */
         bool            gpwrap;         /* Possible ->gp_seq wrap. */
+       unsigned int    gpwrap_count;   /* Count of GP sequence wrap. */
         bool            cpu_started;    /* RCU watching this onlining CPU. */
         struct rcu_node *mynode;        /* This CPU's leaf of hierarchy */
         unsigned long grpmask;          /* Mask to apply to leaf qsmask. */
author	Joel Fernandes <joelagnelf@nvidia.com>
	Sun, 16 Feb 2025 11:52:20 +0000 (06:52 -0500)
committer	Joel Fernandes <joelagnelf@nvidia.com>
	Fri, 16 May 2025 15:13:27 +0000 (11:13 -0400)
Documentation/admin-guide/kernel-parameters.txt		patch \| blob \| blame \| history
kernel/rcu/rcu.h		patch \| blob \| blame \| history
kernel/rcu/rcutorture.c		patch \| blob \| blame \| history
kernel/rcu/tree.c		patch \| blob \| blame \| history
kernel/rcu/tree.h		patch \| blob \| blame \| history