+++ /dev/null
-From bcf33601706a65343648271e046faea9f011a14c Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Mon, 10 Jul 2017 15:47:47 -0700
-Subject: mm: hwpoison: introduce memory_failure_hugetlb()
-
-From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
-
-[ Upstream commit 761ad8d7c7b5485bb66fd5bccb58a891fe784544 ]
-
-memory_failure() is a big function and hard to maintain. Handling
-hugetlb- and non-hugetlb- case in a single function is not good, so this
-patch separates PageHuge() branch into a new function, which saves many
-PageHuge() check.
-
-Link: http://lkml.kernel.org/r/1496305019-5493-7-git-send-email-n-horiguchi@ah.jp.nec.com
-Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- mm/memory-failure.c | 134 +++++++++++++++++++++++++++-----------------
- 1 file changed, 82 insertions(+), 52 deletions(-)
-
-diff --git a/mm/memory-failure.c b/mm/memory-failure.c
-index ad156b42d2ad..d3986a58ca89 100644
---- a/mm/memory-failure.c
-+++ b/mm/memory-failure.c
-@@ -1010,6 +1010,76 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
- return ret;
- }
-
-+static int memory_failure_hugetlb(unsigned long pfn, int trapno, int flags)
-+{
-+ struct page_state *ps;
-+ struct page *p = pfn_to_page(pfn);
-+ struct page *head = compound_head(p);
-+ int res;
-+ unsigned long page_flags;
-+
-+ if (TestSetPageHWPoison(head)) {
-+ pr_err("Memory failure: %#lx: already hardware poisoned\n",
-+ pfn);
-+ return 0;
-+ }
-+
-+ num_poisoned_pages_inc();
-+
-+ if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
-+ /*
-+ * Check "filter hit" and "race with other subpage."
-+ */
-+ lock_page(head);
-+ if (PageHWPoison(head)) {
-+ if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
-+ || (p != head && TestSetPageHWPoison(head))) {
-+ num_poisoned_pages_dec();
-+ unlock_page(head);
-+ return 0;
-+ }
-+ }
-+ unlock_page(head);
-+ dissolve_free_huge_page(p);
-+ action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED);
-+ return 0;
-+ }
-+
-+ lock_page(head);
-+ page_flags = head->flags;
-+
-+ if (!PageHWPoison(head)) {
-+ pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
-+ num_poisoned_pages_dec();
-+ unlock_page(head);
-+ put_hwpoison_page(head);
-+ return 0;
-+ }
-+
-+ if (!hwpoison_user_mappings(p, pfn, trapno, flags, &head)) {
-+ action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
-+ res = -EBUSY;
-+ goto out;
-+ }
-+
-+ res = -EBUSY;
-+
-+ for (ps = error_states;; ps++)
-+ if ((p->flags & ps->mask) == ps->res)
-+ break;
-+
-+ page_flags |= (p->flags & (1UL << PG_dirty));
-+
-+ if (!ps->mask)
-+ for (ps = error_states;; ps++)
-+ if ((page_flags & ps->mask) == ps->res)
-+ break;
-+ res = page_action(ps, p, pfn);
-+out:
-+ unlock_page(head);
-+ return res;
-+}
-+
- /**
- * memory_failure - Handle memory failure of a page.
- * @pfn: Page Number of the corrupted page
-@@ -1047,33 +1117,22 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
- }
-
- p = pfn_to_page(pfn);
-- orig_head = hpage = compound_head(p);
--
-- /* tmporary check code, to be updated in later patches */
-- if (PageHuge(p)) {
-- if (TestSetPageHWPoison(hpage)) {
-- pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn);
-- return 0;
-- }
-- goto tmp;
-- }
-+ if (PageHuge(p))
-+ return memory_failure_hugetlb(pfn, trapno, flags);
- if (TestSetPageHWPoison(p)) {
- pr_err("Memory failure: %#lx: already hardware poisoned\n",
- pfn);
- return 0;
- }
-
--tmp:
-+ orig_head = hpage = compound_head(p);
- num_poisoned_pages_inc();
-
- /*
- * We need/can do nothing about count=0 pages.
- * 1) it's a free page, and therefore in safe hand:
- * prep_new_page() will be the gate keeper.
-- * 2) it's a free hugepage, which is also safe:
-- * an affected hugepage will be dequeued from hugepage freelist,
-- * so there's no concern about reusing it ever after.
-- * 3) it's part of a non-compound high order page.
-+ * 2) it's part of a non-compound high order page.
- * Implies some kernel user: cannot stop them from
- * R/W the page; let's pray that the page has been
- * used and will be freed some time later.
-@@ -1084,31 +1143,13 @@ tmp:
- if (is_free_buddy_page(p)) {
- action_result(pfn, MF_MSG_BUDDY, MF_DELAYED);
- return 0;
-- } else if (PageHuge(hpage)) {
-- /*
-- * Check "filter hit" and "race with other subpage."
-- */
-- lock_page(hpage);
-- if (PageHWPoison(hpage)) {
-- if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
-- || (p != hpage && TestSetPageHWPoison(hpage))) {
-- num_poisoned_pages_dec();
-- unlock_page(hpage);
-- return 0;
-- }
-- }
-- res = dequeue_hwpoisoned_huge_page(hpage);
-- action_result(pfn, MF_MSG_FREE_HUGE,
-- res ? MF_IGNORED : MF_DELAYED);
-- unlock_page(hpage);
-- return res;
- } else {
- action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
- return -EBUSY;
- }
- }
-
-- if (!PageHuge(p) && PageTransHuge(hpage)) {
-+ if (PageTransHuge(hpage)) {
- lock_page(p);
- if (!PageAnon(p) || unlikely(split_huge_page(p))) {
- unlock_page(p);
-@@ -1154,7 +1195,7 @@ tmp:
- }
- }
-
-- lock_page(hpage);
-+ lock_page(p);
-
- /*
- * The page could have changed compound pages during the locking.
-@@ -1184,32 +1225,21 @@ tmp:
- if (!PageHWPoison(p)) {
- pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
- num_poisoned_pages_dec();
-- unlock_page(hpage);
-- put_hwpoison_page(hpage);
-+ unlock_page(p);
-+ put_hwpoison_page(p);
- return 0;
- }
- if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(p))
- num_poisoned_pages_dec();
-- unlock_page(hpage);
-- put_hwpoison_page(hpage);
-+ unlock_page(p);
-+ put_hwpoison_page(p);
- return 0;
- }
-
-- if (!PageHuge(p) && !PageTransTail(p) && !PageLRU(p))
-+ if (!PageTransTail(p) && !PageLRU(p))
- goto identify_page_state;
-
-- /*
-- * For error on the tail page, we should set PG_hwpoison
-- * on the head page to show that the hugepage is hwpoisoned
-- */
-- if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
-- action_result(pfn, MF_MSG_POISONED_HUGE, MF_IGNORED);
-- unlock_page(hpage);
-- put_hwpoison_page(hpage);
-- return 0;
-- }
--
- /*
- * It's very difficult to mess with pages currently under IO
- * and in many cases impossible, so we just avoid it here.
-@@ -1258,7 +1288,7 @@ identify_page_state:
- break;
- res = page_action(ps, p, pfn);
- out:
-- unlock_page(hpage);
-+ unlock_page(p);
- return res;
- }
- EXPORT_SYMBOL_GPL(memory_failure);
---
-2.30.2
-
+++ /dev/null
-From 09ba9806ce09602cac6a49367a08971bd3ce6669 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 15 Jun 2021 18:23:32 -0700
-Subject: mm/memory-failure: make sure wait for page writeback in
- memory_failure
-
-From: yangerkun <yangerkun@huawei.com>
-
-[ Upstream commit e8675d291ac007e1c636870db880f837a9ea112a ]
-
-Our syzkaller trigger the "BUG_ON(!list_empty(&inode->i_wb_list))" in
-clear_inode:
-
- kernel BUG at fs/inode.c:519!
- Internal error: Oops - BUG: 0 [#1] SMP
- Modules linked in:
- Process syz-executor.0 (pid: 249, stack limit = 0x00000000a12409d7)
- CPU: 1 PID: 249 Comm: syz-executor.0 Not tainted 4.19.95
- Hardware name: linux,dummy-virt (DT)
- pstate: 80000005 (Nzcv daif -PAN -UAO)
- pc : clear_inode+0x280/0x2a8
- lr : clear_inode+0x280/0x2a8
- Call trace:
- clear_inode+0x280/0x2a8
- ext4_clear_inode+0x38/0xe8
- ext4_free_inode+0x130/0xc68
- ext4_evict_inode+0xb20/0xcb8
- evict+0x1a8/0x3c0
- iput+0x344/0x460
- do_unlinkat+0x260/0x410
- __arm64_sys_unlinkat+0x6c/0xc0
- el0_svc_common+0xdc/0x3b0
- el0_svc_handler+0xf8/0x160
- el0_svc+0x10/0x218
- Kernel panic - not syncing: Fatal exception
-
-A crash dump of this problem show that someone called __munlock_pagevec
-to clear page LRU without lock_page: do_mmap -> mmap_region -> do_munmap
--> munlock_vma_pages_range -> __munlock_pagevec.
-
-As a result memory_failure will call identify_page_state without
-wait_on_page_writeback. And after truncate_error_page clear the mapping
-of this page. end_page_writeback won't call sb_clear_inode_writeback to
-clear inode->i_wb_list. That will trigger BUG_ON in clear_inode!
-
-Fix it by checking PageWriteback too to help determine should we skip
-wait_on_page_writeback.
-
-Link: https://lkml.kernel.org/r/20210604084705.3729204-1-yangerkun@huawei.com
-Fixes: 0bc1f8b0682c ("hwpoison: fix the handling path of the victimized page frame that belong to non-LRU")
-Signed-off-by: yangerkun <yangerkun@huawei.com>
-Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
-Cc: Jan Kara <jack@suse.cz>
-Cc: Theodore Ts'o <tytso@mit.edu>
-Cc: Oscar Salvador <osalvador@suse.de>
-Cc: Yu Kuai <yukuai3@huawei.com>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- mm/memory-failure.c | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/mm/memory-failure.c b/mm/memory-failure.c
-index d3986a58ca89..448f5decf95c 100644
---- a/mm/memory-failure.c
-+++ b/mm/memory-failure.c
-@@ -1237,7 +1237,12 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
- return 0;
- }
-
-- if (!PageTransTail(p) && !PageLRU(p))
-+ /*
-+ * __munlock_pagevec may clear a writeback page's LRU flag without
-+ * page_lock. We need wait writeback completion for this page or it
-+ * may trigger vfs BUG while evict inode.
-+ */
-+ if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
- goto identify_page_state;
-
- /*
---
-2.30.2
-