]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 27 Aug 2023 08:03:58 +0000 (10:03 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 27 Aug 2023 08:03:58 +0000 (10:03 +0200)
added patches:
tick-detect-and-fix-jiffies-update-stall.patch
timers-nohz-switch-to-oneshot_stopped-in-the-low-res-handler-when-the-tick-is-stopped.patch
torture-fix-hang-during-kthread-shutdown-phase.patch

queue-5.10/series
queue-5.10/tick-detect-and-fix-jiffies-update-stall.patch [new file with mode: 0644]
queue-5.10/timers-nohz-switch-to-oneshot_stopped-in-the-low-res-handler-when-the-tick-is-stopped.patch [new file with mode: 0644]
queue-5.10/torture-fix-hang-during-kthread-shutdown-phase.patch [new file with mode: 0644]

index b252d585bf96293fa6f623583d348e926dc8592e..84bf2e95ae9258d2344c3fe90a12c714a1f1992e 100644 (file)
@@ -60,3 +60,6 @@ pci-acpiphp-use-pci_assign_unassigned_bridge_resources-only-for-non-root-bus.pat
 drm-vmwgfx-fix-shader-stage-validation.patch
 drm-display-dp-fix-the-dp-dsc-receiver-cap-size.patch
 x86-fpu-set-x86_feature_osxsave-feature-after-enabling-osxsave-in-cr4.patch
+torture-fix-hang-during-kthread-shutdown-phase.patch
+tick-detect-and-fix-jiffies-update-stall.patch
+timers-nohz-switch-to-oneshot_stopped-in-the-low-res-handler-when-the-tick-is-stopped.patch
diff --git a/queue-5.10/tick-detect-and-fix-jiffies-update-stall.patch b/queue-5.10/tick-detect-and-fix-jiffies-update-stall.patch
new file mode 100644 (file)
index 0000000..d01195d
--- /dev/null
@@ -0,0 +1,85 @@
+From a1ff03cd6fb9c501fff63a4a2bface9adcfa81cd Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Wed, 2 Feb 2022 01:01:07 +0100
+Subject: tick: Detect and fix jiffies update stall
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit a1ff03cd6fb9c501fff63a4a2bface9adcfa81cd upstream.
+
+On some rare cases, the timekeeper CPU may be delaying its jiffies
+update duty for a while. Known causes include:
+
+* The timekeeper is waiting on stop_machine in a MULTI_STOP_DISABLE_IRQ
+  or MULTI_STOP_RUN state. Disabled interrupts prevent from timekeeping
+  updates while waiting for the target CPU to complete its
+  stop_machine() callback.
+
+* The timekeeper vcpu has VMEXIT'ed for a long while due to some overload
+  on the host.
+
+Detect and fix these situations with emergency timekeeping catchups.
+
+Original-patch-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/tick-sched.c |   17 +++++++++++++++++
+ kernel/time/tick-sched.h |    4 ++++
+ 2 files changed, 21 insertions(+)
+
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -148,6 +148,8 @@ static ktime_t tick_init_jiffy_update(vo
+       return period;
+ }
++#define MAX_STALLED_JIFFIES 5
++
+ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
+ {
+       int cpu = smp_processor_id();
+@@ -175,6 +177,21 @@ static void tick_sched_do_timer(struct t
+       if (tick_do_timer_cpu == cpu)
+               tick_do_update_jiffies64(now);
++      /*
++       * If jiffies update stalled for too long (timekeeper in stop_machine()
++       * or VMEXIT'ed for several msecs), force an update.
++       */
++      if (ts->last_tick_jiffies != jiffies) {
++              ts->stalled_jiffies = 0;
++              ts->last_tick_jiffies = READ_ONCE(jiffies);
++      } else {
++              if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
++                      tick_do_update_jiffies64(now);
++                      ts->stalled_jiffies = 0;
++                      ts->last_tick_jiffies = READ_ONCE(jiffies);
++              }
++      }
++
+       if (ts->inidle)
+               ts->got_idle_tick = 1;
+ }
+--- a/kernel/time/tick-sched.h
++++ b/kernel/time/tick-sched.h
+@@ -49,6 +49,8 @@ enum tick_nohz_mode {
+  * @timer_expires_base:       Base time clock monotonic for @timer_expires
+  * @next_timer:               Expiry time of next expiring timer for debugging purpose only
+  * @tick_dep_mask:    Tick dependency mask - is set, if someone needs the tick
++ * @last_tick_jiffies:        Value of jiffies seen on last tick
++ * @stalled_jiffies:  Number of stalled jiffies detected across ticks
+  */
+ struct tick_sched {
+       struct hrtimer                  sched_timer;
+@@ -77,6 +79,8 @@ struct tick_sched {
+       u64                             next_timer;
+       ktime_t                         idle_expires;
+       atomic_t                        tick_dep_mask;
++      unsigned long                   last_tick_jiffies;
++      unsigned int                    stalled_jiffies;
+ };
+ extern struct tick_sched *tick_get_tick_sched(int cpu);
diff --git a/queue-5.10/timers-nohz-switch-to-oneshot_stopped-in-the-low-res-handler-when-the-tick-is-stopped.patch b/queue-5.10/timers-nohz-switch-to-oneshot_stopped-in-the-low-res-handler-when-the-tick-is-stopped.patch
new file mode 100644 (file)
index 0000000..a7ef482
--- /dev/null
@@ -0,0 +1,58 @@
+From 62c1256d544747b38e77ca9b5bfe3a26f9592576 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Sat, 23 Apr 2022 00:14:46 +1000
+Subject: timers/nohz: Switch to ONESHOT_STOPPED in the low-res handler when the tick is stopped
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit 62c1256d544747b38e77ca9b5bfe3a26f9592576 upstream.
+
+When tick_nohz_stop_tick() stops the tick and high resolution timers are
+disabled, then the clock event device is not put into ONESHOT_STOPPED
+mode. This can lead to spurious timer interrupts with some clock event
+device drivers that don't shut down entirely after firing.
+
+Eliminate these by putting the device into ONESHOT_STOPPED mode at points
+where it is not being reprogrammed. When there are no timers active, then
+tick_program_event() with KTIME_MAX can be used to stop the device. When
+there is a timer active, the device can be stopped at the next tick (any
+new timer added by timers will reprogram the tick).
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20220422141446.915024-1-npiggin@gmail.com
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/tick-sched.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -884,6 +884,8 @@ static void tick_nohz_stop_tick(struct t
+       if (unlikely(expires == KTIME_MAX)) {
+               if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+                       hrtimer_cancel(&ts->sched_timer);
++              else
++                      tick_program_event(KTIME_MAX, 1);
+               return;
+       }
+@@ -1274,9 +1276,15 @@ static void tick_nohz_handler(struct clo
+       tick_sched_do_timer(ts, now);
+       tick_sched_handle(ts, regs);
+-      /* No need to reprogram if we are running tickless  */
+-      if (unlikely(ts->tick_stopped))
++      if (unlikely(ts->tick_stopped)) {
++              /*
++               * The clockevent device is not reprogrammed, so change the
++               * clock event device to ONESHOT_STOPPED to avoid spurious
++               * interrupts on devices which might not be truly one shot.
++               */
++              tick_program_event(KTIME_MAX, 1);
+               return;
++      }
+       hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
+       tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
diff --git a/queue-5.10/torture-fix-hang-during-kthread-shutdown-phase.patch b/queue-5.10/torture-fix-hang-during-kthread-shutdown-phase.patch
new file mode 100644 (file)
index 0000000..30c102b
--- /dev/null
@@ -0,0 +1,54 @@
+From d52d3a2bf408ff86f3a79560b5cce80efb340239 Mon Sep 17 00:00:00 2001
+From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
+Date: Sun, 1 Jan 2023 06:15:55 +0000
+Subject: torture: Fix hang during kthread shutdown phase
+
+From: Joel Fernandes (Google) <joel@joelfernandes.org>
+
+commit d52d3a2bf408ff86f3a79560b5cce80efb340239 upstream.
+
+During rcutorture shutdown, the rcu_torture_cleanup() function calls
+torture_cleanup_begin(), which sets the fullstop global variable to
+FULLSTOP_RMMOD. This causes the rcutorture threads for readers and
+fakewriters to exit all of their "while" loops and start shutting down.
+
+They then call torture_kthread_stopping(), which in turn waits for
+kthread_stop() to be called.  However, rcu_torture_cleanup() has
+not yet called kthread_stop() on those threads, and before it gets a
+chance to do so, multiple instances of torture_kthread_stopping() invoke
+schedule_timeout_interruptible(1) in a tight loop.  Tracing confirms that
+TIMER_SOFTIRQ can then continuously execute timer callbacks.  If that
+TIMER_SOFTIRQ preempts the task executing rcu_torture_cleanup(), that
+task might never invoke kthread_stop().
+
+This commit improves this situation by increasing the timeout passed to
+schedule_timeout_interruptible() from one jiffy to 1/20th of a second.
+This change prevents TIMER_SOFTIRQ from monopolizing its CPU, thus
+allowing rcu_torture_cleanup() to carry out the needed kthread_stop()
+invocations.  Testing has shown 100 runs of TREE07 passing reliably,
+as oppose to the tens-of-percent failure rates seen beforehand.
+
+Cc: Paul McKenney <paulmck@kernel.org>
+Cc: Frederic Weisbecker <fweisbec@gmail.com>
+Cc: Zhouyi Zhou <zhouzhouyi@gmail.com>
+Cc: <stable@vger.kernel.org> # 6.0.x
+Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
+Tested-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
+Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/torture.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/torture.c
++++ b/kernel/torture.c
+@@ -788,7 +788,7 @@ void torture_kthread_stopping(char *titl
+       VERBOSE_TOROUT_STRING(buf);
+       while (!kthread_should_stop()) {
+               torture_shutdown_absorb(title);
+-              schedule_timeout_uninterruptible(1);
++              schedule_timeout_uninterruptible(HZ / 20);
+       }
+ }
+ EXPORT_SYMBOL_GPL(torture_kthread_stopping);