]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tick: Detect and fix jiffies update stall
authorFrederic Weisbecker <frederic@kernel.org>
Sun, 13 Aug 2023 03:16:18 +0000 (03:16 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 16 Aug 2023 16:22:04 +0000 (18:22 +0200)
[ Upstream commit a1ff03cd6fb9c501fff63a4a2bface9adcfa81cd ]

tick: Detect and fix jiffies update stall

On some rare cases, the timekeeper CPU may be delaying its jiffies
update duty for a while. Known causes include:

* The timekeeper is waiting on stop_machine in a MULTI_STOP_DISABLE_IRQ
  or MULTI_STOP_RUN state. Disabled interrupts prevent from timekeeping
  updates while waiting for the target CPU to complete its
  stop_machine() callback.

* The timekeeper vcpu has VMEXIT'ed for a long while due to some overload
  on the host.

Detect and fix these situations with emergency timekeeping catchups.

Original-patch-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
kernel/time/tick-sched.c
kernel/time/tick-sched.h

index f42d0776bc847d158db8c93c5707fed8390dff48..7701c720dc1f1ae9a3364e022e6e2dc25a0c6cb5 100644 (file)
@@ -180,6 +180,8 @@ static ktime_t tick_init_jiffy_update(void)
        return period;
 }
 
+#define MAX_STALLED_JIFFIES 5
+
 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
        int cpu = smp_processor_id();
@@ -207,6 +209,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
        if (tick_do_timer_cpu == cpu)
                tick_do_update_jiffies64(now);
 
+       /*
+        * If jiffies update stalled for too long (timekeeper in stop_machine()
+        * or VMEXIT'ed for several msecs), force an update.
+        */
+       if (ts->last_tick_jiffies != jiffies) {
+               ts->stalled_jiffies = 0;
+               ts->last_tick_jiffies = READ_ONCE(jiffies);
+       } else {
+               if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
+                       tick_do_update_jiffies64(now);
+                       ts->stalled_jiffies = 0;
+                       ts->last_tick_jiffies = READ_ONCE(jiffies);
+               }
+       }
+
        if (ts->inidle)
                ts->got_idle_tick = 1;
 }
index d952ae393423632e997cc996fad2eaa932c5fc3a..504649513399ba3feed83a1f55f87ef1cb255aa5 100644 (file)
@@ -49,6 +49,8 @@ enum tick_nohz_mode {
  * @timer_expires_base:        Base time clock monotonic for @timer_expires
  * @next_timer:                Expiry time of next expiring timer for debugging purpose only
  * @tick_dep_mask:     Tick dependency mask - is set, if someone needs the tick
+ * @last_tick_jiffies: Value of jiffies seen on last tick
+ * @stalled_jiffies:   Number of stalled jiffies detected across ticks
  */
 struct tick_sched {
        struct hrtimer                  sched_timer;
@@ -77,6 +79,8 @@ struct tick_sched {
        u64                             next_timer;
        ktime_t                         idle_expires;
        atomic_t                        tick_dep_mask;
+       unsigned long                   last_tick_jiffies;
+       unsigned int                    stalled_jiffies;
 };
 
 extern struct tick_sched *tick_get_tick_sched(int cpu);