]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 09:35:28 +0000 (11:35 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 Jun 2024 09:35:28 +0000 (11:35 +0200)
added patches:
mm-huge_memory-don-t-unpoison-huge_zero_folio.patch

queue-6.9/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch [new file with mode: 0644]
queue-6.9/series

diff --git a/queue-6.9/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch b/queue-6.9/mm-huge_memory-don-t-unpoison-huge_zero_folio.patch
new file mode 100644 (file)
index 0000000..f533716
--- /dev/null
@@ -0,0 +1,90 @@
+From fe6f86f4b40855a130a19aa589f9ba7f650423f4 Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Thu, 16 May 2024 20:26:08 +0800
+Subject: mm/huge_memory: don't unpoison huge_zero_folio
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit fe6f86f4b40855a130a19aa589f9ba7f650423f4 upstream.
+
+When I did memory failure tests recently, below panic occurs:
+
+ kernel BUG at include/linux/mm.h:1135!
+ invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+ CPU: 9 PID: 137 Comm: kswapd1 Not tainted 6.9.0-rc4-00491-gd5ce28f156fe-dirty #14
+ RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0
+ RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246
+ RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8
+ RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0
+ RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492
+ R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00
+ FS:  0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0
+ Call Trace:
+  <TASK>
+  do_shrink_slab+0x14f/0x6a0
+  shrink_slab+0xca/0x8c0
+  shrink_node+0x2d0/0x7d0
+  balance_pgdat+0x33a/0x720
+  kswapd+0x1f3/0x410
+  kthread+0xd5/0x100
+  ret_from_fork+0x2f/0x50
+  ret_from_fork_asm+0x1a/0x30
+  </TASK>
+ Modules linked in: mce_inject hwpoison_inject
+ ---[ end trace 0000000000000000 ]---
+ RIP: 0010:shrink_huge_zero_page_scan+0x168/0x1a0
+ RSP: 0018:ffff9933c6c57bd0 EFLAGS: 00000246
+ RAX: 000000000000003e RBX: 0000000000000000 RCX: ffff88f61fc5c9c8
+ RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff88f61fc5c9c0
+ RBP: ffffcd7c446b0000 R08: ffffffff9a9405f0 R09: 0000000000005492
+ R10: 00000000000030ea R11: ffffffff9a9405f0 R12: 0000000000000000
+ R13: 0000000000000000 R14: 0000000000000000 R15: ffff88e703c4ac00
+ FS:  0000000000000000(0000) GS:ffff88f61fc40000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 000055f4da6e9878 CR3: 0000000c71048000 CR4: 00000000000006f0
+
+The root cause is that HWPoison flag will be set for huge_zero_folio
+without increasing the folio refcnt.  But then unpoison_memory() will
+decrease the folio refcnt unexpectedly as it appears like a successfully
+hwpoisoned folio leading to VM_BUG_ON_PAGE(page_ref_count(page) == 0) when
+releasing huge_zero_folio.
+
+Skip unpoisoning huge_zero_folio in unpoison_memory() to fix this issue.
+We're not prepared to unpoison huge_zero_folio yet.
+
+Link: https://lkml.kernel.org/r/20240516122608.22610-1-linmiaohe@huawei.com
+Fixes: 478d134e9506 ("mm/huge_memory: do not overkill when splitting huge_zero_page")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
+Cc: Xu Yu <xuyu@linux.alibaba.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -2550,6 +2550,13 @@ int unpoison_memory(unsigned long pfn)
+               goto unlock_mutex;
+       }
++      if (is_huge_zero_page(&folio->page)) {
++              unpoison_pr_info("Unpoison: huge zero page is not supported %#lx\n",
++                               pfn, &unpoison_rs);
++              ret = -EOPNOTSUPP;
++              goto unlock_mutex;
++      }
++
+       if (!PageHWPoison(p)) {
+               unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
+                                pfn, &unpoison_rs);
index e6cb57da20e4080e584159c85a26e404df1e8e40..f6b672e3f528bbf6eafd9b1f598c690a7537f3a3 100644 (file)
@@ -274,3 +274,4 @@ pmdomain-ti-sci-fix-duplicate-pd-referrals.patch
 btrfs-zoned-fix-use-after-free-due-to-race-with-dev-replace.patch
 wifi-iwlwifi-mvm-support-iwl_dev_tx_power_cmd_v8.patch
 wifi-iwlwifi-mvm-fix-a-crash-on-7265.patch
+mm-huge_memory-don-t-unpoison-huge_zero_folio.patch