]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 09:34:36 +0000 (11:34 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 09:34:36 +0000 (11:34 +0200)
added patches:
hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch
tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch

queue-5.4/hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch [new file with mode: 0644]

diff --git a/queue-5.4/hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch b/queue-5.4/hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch
new file mode 100644 (file)
index 0000000..b1f5e4d
--- /dev/null
@@ -0,0 +1,62 @@
+From b46b4a8a57c377b72a98c7930a9f6969d2d4784e Mon Sep 17 00:00:00 2001
+From: Vineeth Pillai <viremana@linux.microsoft.com>
+Date: Fri, 21 Aug 2020 15:28:49 +0000
+Subject: hv_utils: drain the timesync packets on onchannelcallback
+
+From: Vineeth Pillai <viremana@linux.microsoft.com>
+
+commit b46b4a8a57c377b72a98c7930a9f6969d2d4784e upstream.
+
+There could be instances where a system stall prevents the timesync
+packets to be consumed. And this might lead to more than one packet
+pending in the ring buffer. Current code empties one packet per callback
+and it might be a stale one. So drain all the packets from ring buffer
+on each callback.
+
+Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Link: https://lore.kernel.org/r/20200821152849.99517-1-viremana@linux.microsoft.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+[ The old code in the upstream commit uses HV_HYP_PAGE_SIZE, but
+  the old code in 5.4.y sitll uses PAGE_SIZE. Fixed this manually for 5.4.y.
+  Note: 5.4.y already has the define HV_HYP_PAGE_SIZE, so the new code in
+  in the upstream commit works for 5.4.y.
+  If there are multiple messages in the host-to-guest ringbuffer of the TimeSync
+  device, 5.4.y only handles 1 message, and later the host puts new messages
+  into the ringbuffer without signaling the guest because the ringbuffer is not
+  empty, causing a "hung" ringbuffer. Backported the mainline fix for this issue.]
+Signed-off-by: Dexuan Cui <decui@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hv/hv_util.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/drivers/hv/hv_util.c
++++ b/drivers/hv/hv_util.c
+@@ -283,10 +283,23 @@ static void timesync_onchannelcallback(v
+       struct ictimesync_ref_data *refdata;
+       u8 *time_txf_buf = util_timesynch.recv_buffer;
+-      vmbus_recvpacket(channel, time_txf_buf,
+-                       PAGE_SIZE, &recvlen, &requestid);
++      /*
++       * Drain the ring buffer and use the last packet to update
++       * host_ts
++       */
++      while (1) {
++              int ret = vmbus_recvpacket(channel, time_txf_buf,
++                                         HV_HYP_PAGE_SIZE, &recvlen,
++                                         &requestid);
++              if (ret) {
++                      pr_warn_once("TimeSync IC pkt recv failed (Err: %d)\n",
++                                   ret);
++                      break;
++              }
++
++              if (!recvlen)
++                      break;
+-      if (recvlen > 0) {
+               icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[
+                               sizeof(struct vmbuspipe_hdr)];
index c2923f701cd438181b93922fa4cf860235ca155b..6ad46b758b6ae7dbb89ef451fbdad16148798d99 100644 (file)
@@ -94,3 +94,5 @@ intel_th-pci-add-sapphire-rapids-soc-support.patch
 intel_th-pci-add-meteor-lake-s-support.patch
 intel_th-pci-add-lunar-lake-support.patch
 nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch
+tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch
+hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch
diff --git a/queue-5.4/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch b/queue-5.4/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch
new file mode 100644 (file)
index 0000000..3967840
--- /dev/null
@@ -0,0 +1,94 @@
+From 07c54cc5988f19c9642fd463c2dbdac7fc52f777 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 28 May 2024 14:20:19 +0200
+Subject: tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 07c54cc5988f19c9642fd463c2dbdac7fc52f777 upstream.
+
+After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash
+when the boot CPU is nohz_full") the kernel no longer crashes, but there is
+another problem.
+
+In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
+update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
+in smp_call_function_single().
+
+Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
+comment explains why this is safe (thanks Thomas!).
+
+Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com
+Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/time/tick-common.c |   42 ++++++++++++++----------------------------
+ 1 file changed, 14 insertions(+), 28 deletions(-)
+
+--- a/kernel/time/tick-common.c
++++ b/kernel/time/tick-common.c
+@@ -177,26 +177,6 @@ void tick_setup_periodic(struct clock_ev
+       }
+ }
+-#ifdef CONFIG_NO_HZ_FULL
+-static void giveup_do_timer(void *info)
+-{
+-      int cpu = *(unsigned int *)info;
+-
+-      WARN_ON(tick_do_timer_cpu != smp_processor_id());
+-
+-      tick_do_timer_cpu = cpu;
+-}
+-
+-static void tick_take_do_timer_from_boot(void)
+-{
+-      int cpu = smp_processor_id();
+-      int from = tick_do_timer_boot_cpu;
+-
+-      if (from >= 0 && from != cpu)
+-              smp_call_function_single(from, giveup_do_timer, &cpu, 1);
+-}
+-#endif
+-
+ /*
+  * Setup the tick device
+  */
+@@ -220,19 +200,25 @@ static void tick_setup_device(struct tic
+                       tick_next_period = ktime_get();
+ #ifdef CONFIG_NO_HZ_FULL
+                       /*
+-                       * The boot CPU may be nohz_full, in which case set
+-                       * tick_do_timer_boot_cpu so the first housekeeping
+-                       * secondary that comes up will take do_timer from
+-                       * us.
++                       * The boot CPU may be nohz_full, in which case the
++                       * first housekeeping secondary will take do_timer()
++                       * from it.
+                        */
+                       if (tick_nohz_full_cpu(cpu))
+                               tick_do_timer_boot_cpu = cpu;
+-              } else if (tick_do_timer_boot_cpu != -1 &&
+-                                              !tick_nohz_full_cpu(cpu)) {
+-                      tick_take_do_timer_from_boot();
++              } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) {
+                       tick_do_timer_boot_cpu = -1;
+-                      WARN_ON(tick_do_timer_cpu != cpu);
++                      /*
++                       * The boot CPU will stay in periodic (NOHZ disabled)
++                       * mode until clocksource_done_booting() called after
++                       * smp_init() selects a high resolution clocksource and
++                       * timekeeping_notify() kicks the NOHZ stuff alive.
++                       *
++                       * So this WRITE_ONCE can only race with the READ_ONCE
++                       * check in tick_periodic() but this race is harmless.
++                       */
++                      WRITE_ONCE(tick_do_timer_cpu, cpu);
+ #endif
+               }