--- /dev/null
+From 4966455d9100236fd6dd72b0cd00818435fdb25d Mon Sep 17 00:00:00 2001
+From: Yang Shi <shy828301@gmail.com>
+Date: Fri, 5 Nov 2021 13:41:14 -0700
+Subject: mm: hwpoison: handle non-anonymous THP correctly
+
+From: Yang Shi <shy828301@gmail.com>
+
+commit 4966455d9100236fd6dd72b0cd00818435fdb25d upstream.
+
+Currently hwpoison doesn't handle non-anonymous THP, but since v4.8 THP
+support for tmpfs and read-only file cache has been added. They could
+be offlined by split THP, just like anonymous THP.
+
+Link: https://lkml.kernel.org/r/20211020210755.23964-7-shy828301@gmail.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1440,14 +1440,11 @@ static int identify_page_state(unsigned
+ static int try_to_split_thp_page(struct page *page, const char *msg)
+ {
+ lock_page(page);
+- if (!PageAnon(page) || unlikely(split_huge_page(page))) {
++ if (unlikely(split_huge_page(page))) {
+ unsigned long pfn = page_to_pfn(page);
+
+ unlock_page(page);
+- if (!PageAnon(page))
+- pr_info("%s: %#lx: non anonymous thp\n", msg, pfn);
+- else
+- pr_info("%s: %#lx: thp split failed\n", msg, pfn);
++ pr_info("%s: %#lx: thp split failed\n", msg, pfn);
+ put_page(page);
+ return -EBUSY;
+ }
--- /dev/null
+From dd0f230a0a80ff396c7ce587f16429f2a8131344 Mon Sep 17 00:00:00 2001
+From: Yang Shi <shy828301@gmail.com>
+Date: Fri, 5 Nov 2021 13:41:07 -0700
+Subject: mm: hwpoison: refactor refcount check handling
+
+From: Yang Shi <shy828301@gmail.com>
+
+commit dd0f230a0a80ff396c7ce587f16429f2a8131344 upstream.
+
+Memory failure will report failure if the page still has extra pinned
+refcount other than from hwpoison after the handler is done. Actually
+the check is not necessary for all handlers, so move the check into
+specific handlers. This would make the following keeping shmem page in
+page cache patch easier.
+
+There may be expected extra pin for some cases, for example, when the
+page is dirty and in swapcache.
+
+Link: https://lkml.kernel.org/r/20211020210755.23964-5-shy828301@gmail.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Suggested-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c | 93 +++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 64 insertions(+), 29 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -811,12 +811,44 @@ static int truncate_error_page(struct pa
+ return ret;
+ }
+
++struct page_state {
++ unsigned long mask;
++ unsigned long res;
++ enum mf_action_page_type type;
++
++ /* Callback ->action() has to unlock the relevant page inside it. */
++ int (*action)(struct page_state *ps, struct page *p);
++};
++
++/*
++ * Return true if page is still referenced by others, otherwise return
++ * false.
++ *
++ * The extra_pins is true when one extra refcount is expected.
++ */
++static bool has_extra_refcount(struct page_state *ps, struct page *p,
++ bool extra_pins)
++{
++ int count = page_count(p) - 1;
++
++ if (extra_pins)
++ count -= 1;
++
++ if (count > 0) {
++ pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
++ page_to_pfn(p), action_page_types[ps->type], count);
++ return true;
++ }
++
++ return false;
++}
++
+ /*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+-static int me_kernel(struct page *p, unsigned long pfn)
++static int me_kernel(struct page_state *ps, struct page *p)
+ {
+ unlock_page(p);
+ return MF_IGNORED;
+@@ -825,9 +857,9 @@ static int me_kernel(struct page *p, uns
+ /*
+ * Page in unknown state. Do nothing.
+ */
+-static int me_unknown(struct page *p, unsigned long pfn)
++static int me_unknown(struct page_state *ps, struct page *p)
+ {
+- pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
++ pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p));
+ unlock_page(p);
+ return MF_FAILED;
+ }
+@@ -835,7 +867,7 @@ static int me_unknown(struct page *p, un
+ /*
+ * Clean (or cleaned) page cache page.
+ */
+-static int me_pagecache_clean(struct page *p, unsigned long pfn)
++static int me_pagecache_clean(struct page_state *ps, struct page *p)
+ {
+ int ret;
+ struct address_space *mapping;
+@@ -872,9 +904,13 @@ static int me_pagecache_clean(struct pag
+ *
+ * Open: to take i_rwsem or not for this? Right now we don't.
+ */
+- ret = truncate_error_page(p, pfn, mapping);
++ ret = truncate_error_page(p, page_to_pfn(p), mapping);
+ out:
+ unlock_page(p);
++
++ if (has_extra_refcount(ps, p, false))
++ ret = MF_FAILED;
++
+ return ret;
+ }
+
+@@ -883,7 +919,7 @@ out:
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+-static int me_pagecache_dirty(struct page *p, unsigned long pfn)
++static int me_pagecache_dirty(struct page_state *ps, struct page *p)
+ {
+ struct address_space *mapping = page_mapping(p);
+
+@@ -927,7 +963,7 @@ static int me_pagecache_dirty(struct pag
+ mapping_set_error(mapping, -EIO);
+ }
+
+- return me_pagecache_clean(p, pfn);
++ return me_pagecache_clean(ps, p);
+ }
+
+ /*
+@@ -949,9 +985,10 @@ static int me_pagecache_dirty(struct pag
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+-static int me_swapcache_dirty(struct page *p, unsigned long pfn)
++static int me_swapcache_dirty(struct page_state *ps, struct page *p)
+ {
+ int ret;
++ bool extra_pins = false;
+
+ ClearPageDirty(p);
+ /* Trigger EIO in shmem: */
+@@ -959,10 +996,17 @@ static int me_swapcache_dirty(struct pag
+
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
+ unlock_page(p);
++
++ if (ret == MF_DELAYED)
++ extra_pins = true;
++
++ if (has_extra_refcount(ps, p, extra_pins))
++ ret = MF_FAILED;
++
+ return ret;
+ }
+
+-static int me_swapcache_clean(struct page *p, unsigned long pfn)
++static int me_swapcache_clean(struct page_state *ps, struct page *p)
+ {
+ int ret;
+
+@@ -970,6 +1014,10 @@ static int me_swapcache_clean(struct pag
+
+ ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
+ unlock_page(p);
++
++ if (has_extra_refcount(ps, p, false))
++ ret = MF_FAILED;
++
+ return ret;
+ }
+
+@@ -979,7 +1027,7 @@ static int me_swapcache_clean(struct pag
+ * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
+ * To narrow down kill region to one page, we need to break up pmd.
+ */
+-static int me_huge_page(struct page *p, unsigned long pfn)
++static int me_huge_page(struct page_state *ps, struct page *p)
+ {
+ int res;
+ struct page *hpage = compound_head(p);
+@@ -990,7 +1038,7 @@ static int me_huge_page(struct page *p,
+
+ mapping = page_mapping(hpage);
+ if (mapping) {
+- res = truncate_error_page(hpage, pfn, mapping);
++ res = truncate_error_page(hpage, page_to_pfn(p), mapping);
+ unlock_page(hpage);
+ } else {
+ res = MF_FAILED;
+@@ -1008,6 +1056,9 @@ static int me_huge_page(struct page *p,
+ }
+ }
+
++ if (has_extra_refcount(ps, p, false))
++ res = MF_FAILED;
++
+ return res;
+ }
+
+@@ -1033,14 +1084,7 @@ static int me_huge_page(struct page *p,
+ #define slab (1UL << PG_slab)
+ #define reserved (1UL << PG_reserved)
+
+-static struct page_state {
+- unsigned long mask;
+- unsigned long res;
+- enum mf_action_page_type type;
+-
+- /* Callback ->action() has to unlock the relevant page inside it. */
+- int (*action)(struct page *p, unsigned long pfn);
+-} error_states[] = {
++static struct page_state error_states[] = {
+ { reserved, reserved, MF_MSG_KERNEL, me_kernel },
+ /*
+ * free pages are specially detected outside this table:
+@@ -1100,19 +1144,10 @@ static int page_action(struct page_state
+ unsigned long pfn)
+ {
+ int result;
+- int count;
+
+ /* page p should be unlocked after returning from ps->action(). */
+- result = ps->action(p, pfn);
++ result = ps->action(ps, p);
+
+- count = page_count(p) - 1;
+- if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
+- count--;
+- if (count > 0) {
+- pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
+- pfn, action_page_types[ps->type], count);
+- result = MF_FAILED;
+- }
+ action_result(pfn, ps->type, result);
+
+ /* Could do more checks here if page looks ok */
--- /dev/null
+From a7605426666196c5a460dd3de6f8dac1d3c21f00 Mon Sep 17 00:00:00 2001
+From: Yang Shi <shy828301@gmail.com>
+Date: Fri, 14 Jan 2022 14:05:19 -0800
+Subject: mm: shmem: don't truncate page if memory failure happens
+
+From: Yang Shi <shy828301@gmail.com>
+
+commit a7605426666196c5a460dd3de6f8dac1d3c21f00 upstream.
+
+The current behavior of memory failure is to truncate the page cache
+regardless of dirty or clean. If the page is dirty the later access
+will get the obsolete data from disk without any notification to the
+users. This may cause silent data loss. It is even worse for shmem
+since shmem is in-memory filesystem, truncating page cache means
+discarding data blocks. The later read would return all zero.
+
+The right approach is to keep the corrupted page in page cache, any
+later access would return error for syscalls or SIGBUS for page fault,
+until the file is truncated, hole punched or removed. The regular
+storage backed filesystems would be more complicated so this patch is
+focused on shmem. This also unblock the support for soft offlining
+shmem THP.
+
+[akpm@linux-foundation.org: coding style fixes]
+[arnd@arndb.de: fix uninitialized variable use in me_pagecache_clean()]
+ Link: https://lkml.kernel.org/r/20211022064748.4173718-1-arnd@kernel.org
+[Fix invalid pointer dereference in shmem_read_mapping_page_gfp() with a
+ slight different implementation from what Ajay Garg <ajaygargnsit@gmail.com>
+ and Muchun Song <songmuchun@bytedance.com> proposed and reworked the
+ error handling of shmem_write_begin() suggested by Linus]
+ Link: https://lore.kernel.org/linux-mm/20211111084617.6746-1-ajaygargnsit@gmail.com/
+
+Link: https://lkml.kernel.org/r/20211020210755.23964-6-shy828301@gmail.com
+Link: https://lkml.kernel.org/r/20211116193247.21102-1-shy828301@gmail.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ajay Garg <ajaygargnsit@gmail.com>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Andy Lavr <andy.lavr@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c | 14 +++++++++++---
+ mm/shmem.c | 51 +++++++++++++++++++++++++++++++++++++++++++++------
+ mm/userfaultfd.c | 5 +++++
+ 3 files changed, 61 insertions(+), 9 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -57,6 +57,7 @@
+ #include <linux/ratelimit.h>
+ #include <linux/page-isolation.h>
+ #include <linux/pagewalk.h>
++#include <linux/shmem_fs.h>
+ #include "internal.h"
+ #include "ras/ras_event.h"
+
+@@ -871,6 +872,7 @@ static int me_pagecache_clean(struct pag
+ {
+ int ret;
+ struct address_space *mapping;
++ bool extra_pins;
+
+ delete_from_lru_cache(p);
+
+@@ -900,17 +902,23 @@ static int me_pagecache_clean(struct pag
+ }
+
+ /*
++ * The shmem page is kept in page cache instead of truncating
++ * so is expected to have an extra refcount after error-handling.
++ */
++ extra_pins = shmem_mapping(mapping);
++
++ /*
+ * Truncation is a bit tricky. Enable it per file system for now.
+ *
+ * Open: to take i_rwsem or not for this? Right now we don't.
+ */
+ ret = truncate_error_page(p, page_to_pfn(p), mapping);
++ if (has_extra_refcount(ps, p, extra_pins))
++ ret = MF_FAILED;
++
+ out:
+ unlock_page(p);
+
+- if (has_extra_refcount(ps, p, false))
+- ret = MF_FAILED;
+-
+ return ret;
+ }
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2463,6 +2463,7 @@ shmem_write_begin(struct file *file, str
+ struct inode *inode = mapping->host;
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ pgoff_t index = pos >> PAGE_SHIFT;
++ int ret = 0;
+
+ /* i_rwsem is held by caller */
+ if (unlikely(info->seals & (F_SEAL_GROW |
+@@ -2473,7 +2474,19 @@ shmem_write_begin(struct file *file, str
+ return -EPERM;
+ }
+
+- return shmem_getpage(inode, index, pagep, SGP_WRITE);
++ ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
++
++ if (ret)
++ return ret;
++
++ if (PageHWPoison(*pagep)) {
++ unlock_page(*pagep);
++ put_page(*pagep);
++ *pagep = NULL;
++ return -EIO;
++ }
++
++ return 0;
+ }
+
+ static int
+@@ -2560,6 +2573,12 @@ static ssize_t shmem_file_read_iter(stru
+ if (sgp == SGP_CACHE)
+ set_page_dirty(page);
+ unlock_page(page);
++
++ if (PageHWPoison(page)) {
++ put_page(page);
++ error = -EIO;
++ break;
++ }
+ }
+
+ /*
+@@ -3121,7 +3140,8 @@ static const char *shmem_get_link(struct
+ page = find_get_page(inode->i_mapping, 0);
+ if (!page)
+ return ERR_PTR(-ECHILD);
+- if (!PageUptodate(page)) {
++ if (PageHWPoison(page) ||
++ !PageUptodate(page)) {
+ put_page(page);
+ return ERR_PTR(-ECHILD);
+ }
+@@ -3129,6 +3149,13 @@ static const char *shmem_get_link(struct
+ error = shmem_getpage(inode, 0, &page, SGP_READ);
+ if (error)
+ return ERR_PTR(error);
++ if (!page)
++ return ERR_PTR(-ECHILD);
++ if (PageHWPoison(page)) {
++ unlock_page(page);
++ put_page(page);
++ return ERR_PTR(-ECHILD);
++ }
+ unlock_page(page);
+ }
+ set_delayed_call(done, shmem_put_link, page);
+@@ -3779,6 +3806,13 @@ static void shmem_destroy_inodecache(voi
+ kmem_cache_destroy(shmem_inode_cachep);
+ }
+
++/* Keep the page in page cache instead of truncating it */
++static int shmem_error_remove_page(struct address_space *mapping,
++ struct page *page)
++{
++ return 0;
++}
++
+ const struct address_space_operations shmem_aops = {
+ .writepage = shmem_writepage,
+ .set_page_dirty = __set_page_dirty_no_writeback,
+@@ -3789,7 +3823,7 @@ const struct address_space_operations sh
+ #ifdef CONFIG_MIGRATION
+ .migratepage = migrate_page,
+ #endif
+- .error_remove_page = generic_error_remove_page,
++ .error_remove_page = shmem_error_remove_page,
+ };
+ EXPORT_SYMBOL(shmem_aops);
+
+@@ -4197,9 +4231,14 @@ struct page *shmem_read_mapping_page_gfp
+ error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
+ gfp, NULL, NULL, NULL);
+ if (error)
+- page = ERR_PTR(error);
+- else
+- unlock_page(page);
++ return ERR_PTR(error);
++
++ unlock_page(page);
++ if (PageHWPoison(page)) {
++ put_page(page);
++ return ERR_PTR(-EIO);
++ }
++
+ return page;
+ #else
+ /*
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -238,6 +238,11 @@ static int mcontinue_atomic_pte(struct m
+ goto out;
+ }
+
++ if (PageHWPoison(page)) {
++ ret = -EIO;
++ goto out_release;
++ }
++
+ ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
+ page, false, wp_copy);
+ if (ret)
--- /dev/null
+mm-hwpoison-refactor-refcount-check-handling.patch
+mm-hwpoison-handle-non-anonymous-thp-correctly.patch
+mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch