]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
rcu: Introduce CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
authorUladzislau Rezki <uladzislau.rezki@sony.com>
Wed, 16 Feb 2022 13:52:09 +0000 (14:52 +0100)
committerPaul E. McKenney <paulmck@kernel.org>
Wed, 11 May 2022 18:38:50 +0000 (11:38 -0700)
Currently both expedited and regular grace period stall warnings use
a single timeout value that with units of seconds.  However, recent
Android use cases problem require a sub-100-millisecond expedited RCU CPU
stall warning.  Given that expedited RCU grace periods normally complete
in far less than a single millisecond, especially for small systems,
this is not unreasonable.

Therefore introduce the CONFIG_RCU_EXP_CPU_STALL_TIMEOUT kernel
configuration that defaults to 20 msec on Android and remains the same
as that of the non-expedited stall warnings otherwise.  It also can be
changed in run-time via: /sys/.../parameters/rcu_exp_cpu_stall_timeout.

[ paulmck: Default of zero to use CONFIG_RCU_STALL_TIMEOUT. ]

Signed-off-by: Uladzislau Rezki <uladzislau.rezki@sony.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Documentation/RCU/stallwarn.rst
Documentation/admin-guide/kernel-parameters.txt
kernel/rcu/Kconfig.debug
kernel/rcu/rcu.h
kernel/rcu/tree_exp.h
kernel/rcu/tree_stall.h
kernel/rcu/update.c

index 78404625bad26bbc7bfadf6374be7136bb5bbc25..794837eb519b94949dff617259a72ab2c1f2d2b6 100644 (file)
@@ -162,6 +162,26 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
        Stall-warning messages may be enabled and disabled completely via
        /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
 
+CONFIG_RCU_EXP_CPU_STALL_TIMEOUT
+--------------------------------
+
+       Same as the CONFIG_RCU_CPU_STALL_TIMEOUT parameter but only for
+       the expedited grace period. This parameter defines the period
+       of time that RCU will wait from the beginning of an expedited
+       grace period until it issues an RCU CPU stall warning. This time
+       period is normally 20 milliseconds on Android devices.  A zero
+       value causes the CONFIG_RCU_CPU_STALL_TIMEOUT value to be used,
+       after conversion to milliseconds.
+
+       This configuration parameter may be changed at runtime via the
+       /sys/module/rcupdate/parameters/rcu_exp_cpu_stall_timeout, however
+       this parameter is checked only at the beginning of a cycle. If you
+       are in a current stall cycle, setting it to a new value will change
+       the timeout for the -next- stall.
+
+       Stall-warning messages may be enabled and disabled completely via
+       /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
 RCU_STALL_DELAY_DELTA
 ---------------------
 
index 3f1cc5e317ed4a5ad001082c9c589b6008f68db9..5e21a3fb57c46d18c553c596e9c2f7fd8e7ae670 100644 (file)
 
        rcupdate.rcu_cpu_stall_timeout= [KNL]
                        Set timeout for RCU CPU stall warning messages.
+                       The value is in seconds and the maximum allowed
+                       value is 300 seconds.
+
+       rcupdate.rcu_exp_cpu_stall_timeout= [KNL]
+                       Set timeout for expedited RCU CPU stall warning
+                       messages.  The value is in milliseconds
+                       and the maximum allowed value is 21000
+                       milliseconds. Please note that this value is
+                       adjusted to an arch timer tick resolution.
+                       Setting this to zero causes the value from
+                       rcupdate.rcu_cpu_stall_timeout to be used (after
+                       conversion from seconds to milliseconds).
 
        rcupdate.rcu_expedited= [KNL]
                        Use expedited grace-period primitives, for
index 4fd64999300fc229bea06793861adae6042a697b..0b397b5bf8469a4880a6f7ad61b02c3e948fd1e8 100644 (file)
@@ -91,6 +91,20 @@ config RCU_CPU_STALL_TIMEOUT
          RCU grace period persists, additional CPU stall warnings are
          printed at more widely spaced intervals.
 
+config RCU_EXP_CPU_STALL_TIMEOUT
+       int "Expedited RCU CPU stall timeout in milliseconds"
+       depends on RCU_STALL_COMMON
+       range 0 21000
+       default 20 if ANDROID
+       default 0 if !ANDROID
+       help
+         If a given expedited RCU grace period extends more than the
+         specified number of milliseconds, a CPU stall warning is printed.
+         If the RCU grace period persists, additional CPU stall warnings
+         are printed at more widely spaced intervals.  A value of zero
+         says to use the RCU_CPU_STALL_TIMEOUT value converted from
+         seconds to milliseconds.
+
 config RCU_TRACE
        bool "Enable tracing for RCU"
        depends on DEBUG_KERNEL
index 24b5f2c2de87b4c37b0d3c52c3189f26d669b455..20f0300f6cb1f7361d136107294fc02bd4ca4e2e 100644 (file)
@@ -210,7 +210,9 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
 extern int rcu_cpu_stall_ftrace_dump;
 extern int rcu_cpu_stall_suppress;
 extern int rcu_cpu_stall_timeout;
+extern int rcu_exp_cpu_stall_timeout;
 int rcu_jiffies_till_stall_check(void);
+int rcu_exp_jiffies_till_stall_check(void);
 
 static inline bool rcu_stall_is_suppressed(void)
 {
index 60197ea24ceb9c73a17382b3c031be7fff582557..b1f52b59fa4b44e246743c7ff21a5ad6415da1f2 100644 (file)
@@ -496,7 +496,7 @@ static void synchronize_rcu_expedited_wait(void)
        struct rcu_node *rnp_root = rcu_get_root();
 
        trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
-       jiffies_stall = rcu_jiffies_till_stall_check();
+       jiffies_stall = rcu_exp_jiffies_till_stall_check();
        jiffies_start = jiffies;
        if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
                if (synchronize_rcu_expedited_wait_once(1))
@@ -571,7 +571,7 @@ static void synchronize_rcu_expedited_wait(void)
                                dump_cpu_task(cpu);
                        }
                }
-               jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
+               jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
        }
 }
 
index 0c5d8516516af5780c266c87b4705b84739eeb16..009d3f9305cf7b3bd4b8930647042271608363bc 100644 (file)
@@ -25,6 +25,34 @@ int sysctl_max_rcu_stall_to_panic __read_mostly;
 #define RCU_STALL_MIGHT_DIV            8
 #define RCU_STALL_MIGHT_MIN            (2 * HZ)
 
+int rcu_exp_jiffies_till_stall_check(void)
+{
+       int cpu_stall_timeout = READ_ONCE(rcu_exp_cpu_stall_timeout);
+       int exp_stall_delay_delta = 0;
+       int till_stall_check;
+
+       // Zero says to use rcu_cpu_stall_timeout, but in milliseconds.
+       if (!cpu_stall_timeout)
+               cpu_stall_timeout = jiffies_to_msecs(rcu_jiffies_till_stall_check());
+
+       // Limit check must be consistent with the Kconfig limits for
+       // CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
+       // The minimum clamped value is "2UL", because at least one full
+       // tick has to be guaranteed.
+       till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
+
+       if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
+               WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
+
+#ifdef CONFIG_PROVE_RCU
+       /* Add extra ~25% out of till_stall_check. */
+       exp_stall_delay_delta = ((till_stall_check * 25) / 100) + 1;
+#endif
+
+       return till_stall_check + exp_stall_delay_delta;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_jiffies_till_stall_check);
+
 /* Limit-check stall timeouts specified at boottime and runtime. */
 int rcu_jiffies_till_stall_check(void)
 {
index 180ff9c41fa87e228e5df9f435b7d4f0a1dfb3e6..fc7fef57560646d5a8e64e757434e4d4ad988840 100644 (file)
@@ -506,6 +506,8 @@ EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
 module_param(rcu_cpu_stall_suppress, int, 0644);
 int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
 module_param(rcu_cpu_stall_timeout, int, 0644);
+int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
+module_param(rcu_exp_cpu_stall_timeout, int, 0644);
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 
 // Suppress boot-time RCU CPU stall warnings and rcutorture writer stall