From 4e80be80a62253644b52ad68a52525232f92499f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Jun 2024 11:35:17 +0200 Subject: [PATCH] 6.6-stable patches added patches: mm-huge_memory-don-t-unpoison-huge_zero_folio.patch tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch --- ...emory-don-t-unpoison-huge_zero_folio.patch | 90 ++++++++++++++++++ queue-6.6/series | 2 + ...function_single-in-tick_setup_device.patch | 94 +++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 queue-6.6/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch create mode 100644 queue-6.6/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch diff --git a/queue-6.6/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch b/queue-6.6/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch new file mode 100644 index 00000000000..98e298fd5e6 --- /dev/null +++ b/queue-6.6/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch @@ -0,0 +1,90 @@ +From fe6f86f4b40855a130a19aa589f9ba7f650423f4 Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Thu, 16 May 2024 20:26:08 +0800 +Subject: mm/huge_memory: don't unpoison huge_zero_folio + +From: Miaohe Lin + +commit fe6f86f4b40855a130a19aa589f9ba7f650423f4 upstream. + +When I did memory failure tests recently, below panic occurs: + + kernel BUG at include/linux/mm.h:1135! + invalid opcode: 0000 [#1] PREEMPT SMP NOPTI + CPU: 9 PID: 137 Comm: kswapd1 Not tainted 6.9.0-rc4-00491-gd5ce28f156fe-dirty #14 + RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 + RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 + RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 + RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 + RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 + R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 + R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 + FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 + Call Trace: + + do_shrink_slab+0x14f/0x6a0 + shrink_slab+0xca/0x8c0 + shrink_node+0x2d0/0x7d0 + balance_pgdat+0x33a/0x720 + kswapd+0x1f3/0x410 + kthread+0xd5/0x100 + ret_from_fork+0x2f/0x50 + ret_from_fork_asm+0x1a/0x30 + + Modules linked in: mce_inject hwpoison_inject + ---[ end trace 0000000000000000 ]--- + RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0 + RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246 + RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8 + RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0 + RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492 + R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000 + R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00 + FS: 0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0 + +The root cause is that HWPoison flag will be set for huge_zero_folio +without increasing the folio refcnt. But then unpoison_memory() will +decrease the folio refcnt unexpectedly as it appears like a successfully +hwpoisoned folio leading to VM_BUG_ON_PAGE(page_ref_count(page) == 0) when +releasing huge_zero_folio. + +Skip unpoisoning huge_zero_folio in unpoison_memory() to fix this issue. +We're not prepared to unpoison huge_zero_folio yet. + +Link: https://lkml.kernel.org/r/20240516122608.22610-1-linmiaohe@huawei.com +Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page") +Signed-off-by: Miaohe Lin +Acked-by: David Hildenbrand +Reviewed-by: Yang Shi +Reviewed-by: Oscar Salvador +Reviewed-by: Anshuman Khandual +Cc: Naoya Horiguchi +Cc: Xu Yu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Miaohe Lin +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory-failure.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -2535,6 +2535,13 @@ int unpoison_memory(unsigned long pfn) + goto unlock_mutex; + } + ++ if (is_huge_zero_page(&folio->page)) { ++ unpoison_pr_info("Unpoison: huge zero page is not supported %#lx\n", ++ pfn, &unpoison_rs); ++ ret = -EOPNOTSUPP; ++ goto unlock_mutex; ++ } ++ + if (!PageHWPoison(p)) { + unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n", + pfn, &unpoison_rs); diff --git a/queue-6.6/series b/queue-6.6/series index 574e03abc13..142dd5a8129 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -250,3 +250,5 @@ swiotlb-enforce-page-alignment-in-swiotlb_alloc.patch swiotlb-reinstate-page-alignment-for-mappings-page_size.patch swiotlb-extend-buffer-pre-padding-to-alloc_align_mask-if-necessary.patch nilfs2-fix-potential-kernel-bug-due-to-lack-of-writeback-flag-waiting.patch +tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch +mm-huge_memory-don-t-unpoison-huge_zero_folio.patch diff --git a/queue-6.6/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch b/queue-6.6/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch new file mode 100644 index 00000000000..ea72c5ffcc0 --- /dev/null +++ b/queue-6.6/tick-nohz_full-don-t-abuse-smp_call_function_single-in-tick_setup_device.patch @@ -0,0 +1,94 @@ +From 07c54cc5988f19c9642fd463c2dbdac7fc52f777 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 28 May 2024 14:20:19 +0200 +Subject: tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device() + +From: Oleg Nesterov + +commit 07c54cc5988f19c9642fd463c2dbdac7fc52f777 upstream. + +After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash +when the boot CPU is nohz_full") the kernel no longer crashes, but there is +another problem. + +In this case tick_setup_device() calls tick_take_do_timer_from_boot() to +update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled) +in smp_call_function_single(). + +Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new +comment explains why this is safe (thanks Thomas!). + +Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full") +Signed-off-by: Oleg Nesterov +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com +Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/time/tick-common.c | 42 ++++++++++++++---------------------------- + 1 file changed, 14 insertions(+), 28 deletions(-) + +--- a/kernel/time/tick-common.c ++++ b/kernel/time/tick-common.c +@@ -179,26 +179,6 @@ void tick_setup_periodic(struct clock_ev + } + } + +-#ifdef CONFIG_NO_HZ_FULL +-static void giveup_do_timer(void *info) +-{ +- int cpu = *(unsigned int *)info; +- +- WARN_ON(tick_do_timer_cpu != smp_processor_id()); +- +- tick_do_timer_cpu = cpu; +-} +- +-static void tick_take_do_timer_from_boot(void) +-{ +- int cpu = smp_processor_id(); +- int from = tick_do_timer_boot_cpu; +- +- if (from >= 0 && from != cpu) +- smp_call_function_single(from, giveup_do_timer, &cpu, 1); +-} +-#endif +- + /* + * Setup the tick device + */ +@@ -222,19 +202,25 @@ static void tick_setup_device(struct tic + tick_next_period = ktime_get(); + #ifdef CONFIG_NO_HZ_FULL + /* +- * The boot CPU may be nohz_full, in which case set +- * tick_do_timer_boot_cpu so the first housekeeping +- * secondary that comes up will take do_timer from +- * us. ++ * The boot CPU may be nohz_full, in which case the ++ * first housekeeping secondary will take do_timer() ++ * from it. + */ + if (tick_nohz_full_cpu(cpu)) + tick_do_timer_boot_cpu = cpu; + +- } else if (tick_do_timer_boot_cpu != -1 && +- !tick_nohz_full_cpu(cpu)) { +- tick_take_do_timer_from_boot(); ++ } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { + tick_do_timer_boot_cpu = -1; +- WARN_ON(tick_do_timer_cpu != cpu); ++ /* ++ * The boot CPU will stay in periodic (NOHZ disabled) ++ * mode until clocksource_done_booting() called after ++ * smp_init() selects a high resolution clocksource and ++ * timekeeping_notify() kicks the NOHZ stuff alive. ++ * ++ * So this WRITE_ONCE can only race with the READ_ONCE ++ * check in tick_periodic() but this race is harmless. ++ */ ++ WRITE_ONCE(tick_do_timer_cpu, cpu); + #endif + } + -- 2.47.3