]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 16 Nov 2022 09:18:10 +0000 (10:18 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 16 Nov 2022 09:18:10 +0000 (10:18 +0100)
added patches:
mm-hwpoison-handle-non-anonymous-thp-correctly.patch
mm-hwpoison-refactor-refcount-check-handling.patch
mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch

queue-5.15/mm-hwpoison-handle-non-anonymous-thp-correctly.patch [new file with mode: 0644]
queue-5.15/mm-hwpoison-refactor-refcount-check-handling.patch [new file with mode: 0644]
queue-5.15/mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch [new file with mode: 0644]
queue-5.15/series [new file with mode: 0644]

diff --git a/queue-5.15/mm-hwpoison-handle-non-anonymous-thp-correctly.patch b/queue-5.15/mm-hwpoison-handle-non-anonymous-thp-correctly.patch
new file mode 100644 (file)
index 0000000..f3afe94
--- /dev/null
@@ -0,0 +1,48 @@
+From 4966455d9100236fd6dd72b0cd00818435fdb25d Mon Sep 17 00:00:00 2001
+From: Yang Shi <shy828301@gmail.com>
+Date: Fri, 5 Nov 2021 13:41:14 -0700
+Subject: mm: hwpoison: handle non-anonymous THP correctly
+
+From: Yang Shi <shy828301@gmail.com>
+
+commit 4966455d9100236fd6dd72b0cd00818435fdb25d upstream.
+
+Currently hwpoison doesn't handle non-anonymous THP, but since v4.8 THP
+support for tmpfs and read-only file cache has been added.  They could
+be offlined by split THP, just like anonymous THP.
+
+Link: https://lkml.kernel.org/r/20211020210755.23964-7-shy828301@gmail.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -1440,14 +1440,11 @@ static int identify_page_state(unsigned
+ static int try_to_split_thp_page(struct page *page, const char *msg)
+ {
+       lock_page(page);
+-      if (!PageAnon(page) || unlikely(split_huge_page(page))) {
++      if (unlikely(split_huge_page(page))) {
+               unsigned long pfn = page_to_pfn(page);
+               unlock_page(page);
+-              if (!PageAnon(page))
+-                      pr_info("%s: %#lx: non anonymous thp\n", msg, pfn);
+-              else
+-                      pr_info("%s: %#lx: thp split failed\n", msg, pfn);
++              pr_info("%s: %#lx: thp split failed\n", msg, pfn);
+               put_page(page);
+               return -EBUSY;
+       }
diff --git a/queue-5.15/mm-hwpoison-refactor-refcount-check-handling.patch b/queue-5.15/mm-hwpoison-refactor-refcount-check-handling.patch
new file mode 100644 (file)
index 0000000..325f5ae
--- /dev/null
@@ -0,0 +1,244 @@
+From dd0f230a0a80ff396c7ce587f16429f2a8131344 Mon Sep 17 00:00:00 2001
+From: Yang Shi <shy828301@gmail.com>
+Date: Fri, 5 Nov 2021 13:41:07 -0700
+Subject: mm: hwpoison: refactor refcount check handling
+
+From: Yang Shi <shy828301@gmail.com>
+
+commit dd0f230a0a80ff396c7ce587f16429f2a8131344 upstream.
+
+Memory failure will report failure if the page still has extra pinned
+refcount other than from hwpoison after the handler is done.  Actually
+the check is not necessary for all handlers, so move the check into
+specific handlers.  This would make the following keeping shmem page in
+page cache patch easier.
+
+There may be expected extra pin for some cases, for example, when the
+page is dirty and in swapcache.
+
+Link: https://lkml.kernel.org/r/20211020210755.23964-5-shy828301@gmail.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Suggested-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |   93 +++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 64 insertions(+), 29 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -811,12 +811,44 @@ static int truncate_error_page(struct pa
+       return ret;
+ }
++struct page_state {
++      unsigned long mask;
++      unsigned long res;
++      enum mf_action_page_type type;
++
++      /* Callback ->action() has to unlock the relevant page inside it. */
++      int (*action)(struct page_state *ps, struct page *p);
++};
++
++/*
++ * Return true if page is still referenced by others, otherwise return
++ * false.
++ *
++ * The extra_pins is true when one extra refcount is expected.
++ */
++static bool has_extra_refcount(struct page_state *ps, struct page *p,
++                             bool extra_pins)
++{
++      int count = page_count(p) - 1;
++
++      if (extra_pins)
++              count -= 1;
++
++      if (count > 0) {
++              pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
++                     page_to_pfn(p), action_page_types[ps->type], count);
++              return true;
++      }
++
++      return false;
++}
++
+ /*
+  * Error hit kernel page.
+  * Do nothing, try to be lucky and not touch this instead. For a few cases we
+  * could be more sophisticated.
+  */
+-static int me_kernel(struct page *p, unsigned long pfn)
++static int me_kernel(struct page_state *ps, struct page *p)
+ {
+       unlock_page(p);
+       return MF_IGNORED;
+@@ -825,9 +857,9 @@ static int me_kernel(struct page *p, uns
+ /*
+  * Page in unknown state. Do nothing.
+  */
+-static int me_unknown(struct page *p, unsigned long pfn)
++static int me_unknown(struct page_state *ps, struct page *p)
+ {
+-      pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
++      pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p));
+       unlock_page(p);
+       return MF_FAILED;
+ }
+@@ -835,7 +867,7 @@ static int me_unknown(struct page *p, un
+ /*
+  * Clean (or cleaned) page cache page.
+  */
+-static int me_pagecache_clean(struct page *p, unsigned long pfn)
++static int me_pagecache_clean(struct page_state *ps, struct page *p)
+ {
+       int ret;
+       struct address_space *mapping;
+@@ -872,9 +904,13 @@ static int me_pagecache_clean(struct pag
+        *
+        * Open: to take i_rwsem or not for this? Right now we don't.
+        */
+-      ret = truncate_error_page(p, pfn, mapping);
++      ret = truncate_error_page(p, page_to_pfn(p), mapping);
+ out:
+       unlock_page(p);
++
++      if (has_extra_refcount(ps, p, false))
++              ret = MF_FAILED;
++
+       return ret;
+ }
+@@ -883,7 +919,7 @@ out:
+  * Issues: when the error hit a hole page the error is not properly
+  * propagated.
+  */
+-static int me_pagecache_dirty(struct page *p, unsigned long pfn)
++static int me_pagecache_dirty(struct page_state *ps, struct page *p)
+ {
+       struct address_space *mapping = page_mapping(p);
+@@ -927,7 +963,7 @@ static int me_pagecache_dirty(struct pag
+               mapping_set_error(mapping, -EIO);
+       }
+-      return me_pagecache_clean(p, pfn);
++      return me_pagecache_clean(ps, p);
+ }
+ /*
+@@ -949,9 +985,10 @@ static int me_pagecache_dirty(struct pag
+  * Clean swap cache pages can be directly isolated. A later page fault will
+  * bring in the known good data from disk.
+  */
+-static int me_swapcache_dirty(struct page *p, unsigned long pfn)
++static int me_swapcache_dirty(struct page_state *ps, struct page *p)
+ {
+       int ret;
++      bool extra_pins = false;
+       ClearPageDirty(p);
+       /* Trigger EIO in shmem: */
+@@ -959,10 +996,17 @@ static int me_swapcache_dirty(struct pag
+       ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
+       unlock_page(p);
++
++      if (ret == MF_DELAYED)
++              extra_pins = true;
++
++      if (has_extra_refcount(ps, p, extra_pins))
++              ret = MF_FAILED;
++
+       return ret;
+ }
+-static int me_swapcache_clean(struct page *p, unsigned long pfn)
++static int me_swapcache_clean(struct page_state *ps, struct page *p)
+ {
+       int ret;
+@@ -970,6 +1014,10 @@ static int me_swapcache_clean(struct pag
+       ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
+       unlock_page(p);
++
++      if (has_extra_refcount(ps, p, false))
++              ret = MF_FAILED;
++
+       return ret;
+ }
+@@ -979,7 +1027,7 @@ static int me_swapcache_clean(struct pag
+  * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
+  *   To narrow down kill region to one page, we need to break up pmd.
+  */
+-static int me_huge_page(struct page *p, unsigned long pfn)
++static int me_huge_page(struct page_state *ps, struct page *p)
+ {
+       int res;
+       struct page *hpage = compound_head(p);
+@@ -990,7 +1038,7 @@ static int me_huge_page(struct page *p,
+       mapping = page_mapping(hpage);
+       if (mapping) {
+-              res = truncate_error_page(hpage, pfn, mapping);
++              res = truncate_error_page(hpage, page_to_pfn(p), mapping);
+               unlock_page(hpage);
+       } else {
+               res = MF_FAILED;
+@@ -1008,6 +1056,9 @@ static int me_huge_page(struct page *p,
+               }
+       }
++      if (has_extra_refcount(ps, p, false))
++              res = MF_FAILED;
++
+       return res;
+ }
+@@ -1033,14 +1084,7 @@ static int me_huge_page(struct page *p,
+ #define slab          (1UL << PG_slab)
+ #define reserved      (1UL << PG_reserved)
+-static struct page_state {
+-      unsigned long mask;
+-      unsigned long res;
+-      enum mf_action_page_type type;
+-
+-      /* Callback ->action() has to unlock the relevant page inside it. */
+-      int (*action)(struct page *p, unsigned long pfn);
+-} error_states[] = {
++static struct page_state error_states[] = {
+       { reserved,     reserved,       MF_MSG_KERNEL,  me_kernel },
+       /*
+        * free pages are specially detected outside this table:
+@@ -1100,19 +1144,10 @@ static int page_action(struct page_state
+                       unsigned long pfn)
+ {
+       int result;
+-      int count;
+       /* page p should be unlocked after returning from ps->action().  */
+-      result = ps->action(p, pfn);
++      result = ps->action(ps, p);
+-      count = page_count(p) - 1;
+-      if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
+-              count--;
+-      if (count > 0) {
+-              pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
+-                     pfn, action_page_types[ps->type], count);
+-              result = MF_FAILED;
+-      }
+       action_result(pfn, ps->type, result);
+       /* Could do more checks here if page looks ok */
diff --git a/queue-5.15/mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch b/queue-5.15/mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch
new file mode 100644 (file)
index 0000000..7b2dac9
--- /dev/null
@@ -0,0 +1,223 @@
+From a7605426666196c5a460dd3de6f8dac1d3c21f00 Mon Sep 17 00:00:00 2001
+From: Yang Shi <shy828301@gmail.com>
+Date: Fri, 14 Jan 2022 14:05:19 -0800
+Subject: mm: shmem: don't truncate page if memory failure happens
+
+From: Yang Shi <shy828301@gmail.com>
+
+commit a7605426666196c5a460dd3de6f8dac1d3c21f00 upstream.
+
+The current behavior of memory failure is to truncate the page cache
+regardless of dirty or clean.  If the page is dirty the later access
+will get the obsolete data from disk without any notification to the
+users.  This may cause silent data loss.  It is even worse for shmem
+since shmem is in-memory filesystem, truncating page cache means
+discarding data blocks.  The later read would return all zero.
+
+The right approach is to keep the corrupted page in page cache, any
+later access would return error for syscalls or SIGBUS for page fault,
+until the file is truncated, hole punched or removed.  The regular
+storage backed filesystems would be more complicated so this patch is
+focused on shmem.  This also unblock the support for soft offlining
+shmem THP.
+
+[akpm@linux-foundation.org: coding style fixes]
+[arnd@arndb.de: fix uninitialized variable use in me_pagecache_clean()]
+  Link: https://lkml.kernel.org/r/20211022064748.4173718-1-arnd@kernel.org
+[Fix invalid pointer dereference in shmem_read_mapping_page_gfp() with a
+ slight different implementation from what Ajay Garg <ajaygargnsit@gmail.com>
+ and Muchun Song <songmuchun@bytedance.com> proposed and reworked the
+ error handling of shmem_write_begin() suggested by Linus]
+  Link: https://lore.kernel.org/linux-mm/20211111084617.6746-1-ajaygargnsit@gmail.com/
+
+Link: https://lkml.kernel.org/r/20211020210755.23964-6-shy828301@gmail.com
+Link: https://lkml.kernel.org/r/20211116193247.21102-1-shy828301@gmail.com
+Signed-off-by: Yang Shi <shy828301@gmail.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ajay Garg <ajaygargnsit@gmail.com>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Andy Lavr <andy.lavr@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |   14 +++++++++++---
+ mm/shmem.c          |   51 +++++++++++++++++++++++++++++++++++++++++++++------
+ mm/userfaultfd.c    |    5 +++++
+ 3 files changed, 61 insertions(+), 9 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -57,6 +57,7 @@
+ #include <linux/ratelimit.h>
+ #include <linux/page-isolation.h>
+ #include <linux/pagewalk.h>
++#include <linux/shmem_fs.h>
+ #include "internal.h"
+ #include "ras/ras_event.h"
+@@ -871,6 +872,7 @@ static int me_pagecache_clean(struct pag
+ {
+       int ret;
+       struct address_space *mapping;
++      bool extra_pins;
+       delete_from_lru_cache(p);
+@@ -900,17 +902,23 @@ static int me_pagecache_clean(struct pag
+       }
+       /*
++       * The shmem page is kept in page cache instead of truncating
++       * so is expected to have an extra refcount after error-handling.
++       */
++      extra_pins = shmem_mapping(mapping);
++
++      /*
+        * Truncation is a bit tricky. Enable it per file system for now.
+        *
+        * Open: to take i_rwsem or not for this? Right now we don't.
+        */
+       ret = truncate_error_page(p, page_to_pfn(p), mapping);
++      if (has_extra_refcount(ps, p, extra_pins))
++              ret = MF_FAILED;
++
+ out:
+       unlock_page(p);
+-      if (has_extra_refcount(ps, p, false))
+-              ret = MF_FAILED;
+-
+       return ret;
+ }
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2463,6 +2463,7 @@ shmem_write_begin(struct file *file, str
+       struct inode *inode = mapping->host;
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       pgoff_t index = pos >> PAGE_SHIFT;
++      int ret = 0;
+       /* i_rwsem is held by caller */
+       if (unlikely(info->seals & (F_SEAL_GROW |
+@@ -2473,7 +2474,19 @@ shmem_write_begin(struct file *file, str
+                       return -EPERM;
+       }
+-      return shmem_getpage(inode, index, pagep, SGP_WRITE);
++      ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
++
++      if (ret)
++              return ret;
++
++      if (PageHWPoison(*pagep)) {
++              unlock_page(*pagep);
++              put_page(*pagep);
++              *pagep = NULL;
++              return -EIO;
++      }
++
++      return 0;
+ }
+ static int
+@@ -2560,6 +2573,12 @@ static ssize_t shmem_file_read_iter(stru
+                       if (sgp == SGP_CACHE)
+                               set_page_dirty(page);
+                       unlock_page(page);
++
++                      if (PageHWPoison(page)) {
++                              put_page(page);
++                              error = -EIO;
++                              break;
++                      }
+               }
+               /*
+@@ -3121,7 +3140,8 @@ static const char *shmem_get_link(struct
+               page = find_get_page(inode->i_mapping, 0);
+               if (!page)
+                       return ERR_PTR(-ECHILD);
+-              if (!PageUptodate(page)) {
++              if (PageHWPoison(page) ||
++                  !PageUptodate(page)) {
+                       put_page(page);
+                       return ERR_PTR(-ECHILD);
+               }
+@@ -3129,6 +3149,13 @@ static const char *shmem_get_link(struct
+               error = shmem_getpage(inode, 0, &page, SGP_READ);
+               if (error)
+                       return ERR_PTR(error);
++              if (!page)
++                      return ERR_PTR(-ECHILD);
++              if (PageHWPoison(page)) {
++                      unlock_page(page);
++                      put_page(page);
++                      return ERR_PTR(-ECHILD);
++              }
+               unlock_page(page);
+       }
+       set_delayed_call(done, shmem_put_link, page);
+@@ -3779,6 +3806,13 @@ static void shmem_destroy_inodecache(voi
+       kmem_cache_destroy(shmem_inode_cachep);
+ }
++/* Keep the page in page cache instead of truncating it */
++static int shmem_error_remove_page(struct address_space *mapping,
++                                 struct page *page)
++{
++      return 0;
++}
++
+ const struct address_space_operations shmem_aops = {
+       .writepage      = shmem_writepage,
+       .set_page_dirty = __set_page_dirty_no_writeback,
+@@ -3789,7 +3823,7 @@ const struct address_space_operations sh
+ #ifdef CONFIG_MIGRATION
+       .migratepage    = migrate_page,
+ #endif
+-      .error_remove_page = generic_error_remove_page,
++      .error_remove_page = shmem_error_remove_page,
+ };
+ EXPORT_SYMBOL(shmem_aops);
+@@ -4197,9 +4231,14 @@ struct page *shmem_read_mapping_page_gfp
+       error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE,
+                                 gfp, NULL, NULL, NULL);
+       if (error)
+-              page = ERR_PTR(error);
+-      else
+-              unlock_page(page);
++              return ERR_PTR(error);
++
++      unlock_page(page);
++      if (PageHWPoison(page)) {
++              put_page(page);
++              return ERR_PTR(-EIO);
++      }
++
+       return page;
+ #else
+       /*
+--- a/mm/userfaultfd.c
++++ b/mm/userfaultfd.c
+@@ -238,6 +238,11 @@ static int mcontinue_atomic_pte(struct m
+               goto out;
+       }
++      if (PageHWPoison(page)) {
++              ret = -EIO;
++              goto out_release;
++      }
++
+       ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
+                                      page, false, wp_copy);
+       if (ret)
diff --git a/queue-5.15/series b/queue-5.15/series
new file mode 100644 (file)
index 0000000..d3f6cc7
--- /dev/null
@@ -0,0 +1,3 @@
+mm-hwpoison-refactor-refcount-check-handling.patch
+mm-hwpoison-handle-non-anonymous-thp-correctly.patch
+mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch