From: Greg Kroah-Hartman Date: Wed, 19 Jun 2024 09:34:36 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v6.1.95~50 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a2d1897ff0e9537ab5c1f6be37b0ef4b83061816;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch --- diff --git a/queue-5.4/hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch b/queue-5.4/hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch new file mode 100644 index 00000000000..b1f5e4d0834 --- /dev/null +++ b/queue-5.4/hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch @@ -0,0 +1,62 @@ +From b46b4a8a57c377b72a98c7930a9f6969d2d4784e Mon Sep 17 00:00:00 2001 +From: Vineeth Pillai +Date: Fri, 21 Aug 2020 15:28:49 +0000 +Subject: hv_utils: drain the timesync packets on onchannelcallback + +From: Vineeth Pillai + +commit b46b4a8a57c377b72a98c7930a9f6969d2d4784e upstream. + +There could be instances where a system stall prevents the timesync +packets to be consumed. And this might lead to more than one packet +pending in the ring buffer. Current code empties one packet per callback +and it might be a stale one. So drain all the packets from ring buffer +on each callback. + +Signed-off-by: Vineeth Pillai +Reviewed-by: Michael Kelley +Link: https://lore.kernel.org/r/20200821152849.99517-1-viremana@linux.microsoft.com +Signed-off-by: Wei Liu +[ The old code in the upstream commit uses HV_HYP_PAGE_SIZE, but + the old code in 5.4.y sitll uses PAGE_SIZE. Fixed this manually for 5.4.y. + Note: 5.4.y already has the define HV_HYP_PAGE_SIZE, so the new code in + in the upstream commit works for 5.4.y. + If there are multiple messages in the host-to-guest ringbuffer of the TimeSync + device, 5.4.y only handles 1 message, and later the host puts new messages + into the ringbuffer without signaling the guest because the ringbuffer is not + empty, causing a "hung" ringbuffer. Backported the mainline fix for this issue.] +Signed-off-by: Dexuan Cui +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hv/hv_util.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +--- a/drivers/hv/hv_util.c ++++ b/drivers/hv/hv_util.c +@@ -283,10 +283,23 @@ static void timesync_onchannelcallback(v + struct ictimesync_ref_data *refdata; + u8 *time_txf_buf = util_timesynch.recv_buffer; + +- vmbus_recvpacket(channel, time_txf_buf, +- PAGE_SIZE, &recvlen, &requestid); ++ /* ++ * Drain the ring buffer and use the last packet to update ++ * host_ts ++ */ ++ while (1) { ++ int ret = vmbus_recvpacket(channel, time_txf_buf, ++ HV_HYP_PAGE_SIZE, &recvlen, ++ &requestid); ++ if (ret) { ++ pr_warn_once("TimeSync IC pkt recv failed (Err: %d)\n", ++ ret); ++ break; ++ } ++ ++ if (!recvlen) ++ break; + +- if (recvlen > 0) { + icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[ + sizeof(struct vmbuspipe_hdr)]; + diff --git a/queue-5.4/series b/queue-5.4/series index c2923f701cd..6ad46b758b6 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -94,3 +94,5 @@ intel_th-pci-add-sapphire-rapids-soc-support.patch intel_th-pci-add-meteor-lake-s-support.patch intel_th-pci-add-lunar-lake-support.patch nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch +tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch +hv_utils-drain-the-timesync-packets-on-onchannelcallback.patch diff --git a/queue-5.4/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch b/queue-5.4/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch new file mode 100644 index 00000000000..3967840dec1 --- /dev/null +++ b/queue-5.4/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch @@ -0,0 +1,94 @@ +From 07c54cc5988f19c9642fd463c2dbdac7fc52f777 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 28 May 2024 14:20:19 +0200 +Subject: tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device() + +From: Oleg Nesterov + +commit 07c54cc5988f19c9642fd463c2dbdac7fc52f777 upstream. + +After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash +when the boot CPU is nohz_full") the kernel no longer crashes, but there is +another problem. + +In this case tick_setup_device() calls tick_take_do_timer_from_boot() to +update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled) +in smp_call_function_single(). + +Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new +comment explains why this is safe (thanks Thomas!). + +Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full") +Signed-off-by: Oleg Nesterov +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com +Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/tick-common.c | 42 ++++++++++++++---------------------------- + 1 file changed, 14 insertions(+), 28 deletions(-) + +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -177,26 +177,6 @@ void tick_setup_periodic(struct clock_ev + } + } + +-#ifdef CONFIG_NO_HZ_FULL +-static void giveup_do_timer(void *info) +-{ +- int cpu = *(unsigned int *)info; +- +- WARN_ON(tick_do_timer_cpu != smp_processor_id()); +- +- tick_do_timer_cpu = cpu; +-} +- +-static void tick_take_do_timer_from_boot(void) +-{ +- int cpu = smp_processor_id(); +- int from = tick_do_timer_boot_cpu; +- +- if (from >= 0 && from != cpu) +- smp_call_function_single(from, giveup_do_timer, &cpu, 1); +-} +-#endif +- + /* + * Setup the tick device + */ +@@ -220,19 +200,25 @@ static void tick_setup_device(struct tic + tick_next_period = ktime_get(); + #ifdef CONFIG_NO_HZ_FULL + /* +- * The boot CPU may be nohz_full, in which case set +- * tick_do_timer_boot_cpu so the first housekeeping +- * secondary that comes up will take do_timer from +- * us. ++ * The boot CPU may be nohz_full, in which case the ++ * first housekeeping secondary will take do_timer() ++ * from it. + */ + if (tick_nohz_full_cpu(cpu)) + tick_do_timer_boot_cpu = cpu; + +- } else if (tick_do_timer_boot_cpu != -1 && +- !tick_nohz_full_cpu(cpu)) { +- tick_take_do_timer_from_boot(); ++ } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { + tick_do_timer_boot_cpu = -1; +- WARN_ON(tick_do_timer_cpu != cpu); ++ /* ++ * The boot CPU will stay in periodic (NOHZ disabled) ++ * mode until clocksource_done_booting() called after ++ * smp_init() selects a high resolution clocksource and ++ * timekeeping_notify() kicks the NOHZ stuff alive. ++ * ++ * So this WRITE_ONCE can only race with the READ_ONCE ++ * check in tick_periodic() but this race is harmless. ++ */ ++ WRITE_ONCE(tick_do_timer_cpu, cpu); + #endif + } +