From: Greg Kroah-Hartman Date: Wed, 16 Nov 2022 09:18:10 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v4.19.266~61 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e2601c85b83263c0464c059a05d4acaac5a0a2ab;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: mm-hwpoison-handle-non-anonymous-thp-correctly.patch mm-hwpoison-refactor-refcount-check-handling.patch mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch --- diff --git a/queue-5.15/mm-hwpoison-handle-non-anonymous-thp-correctly.patch b/queue-5.15/mm-hwpoison-handle-non-anonymous-thp-correctly.patch new file mode 100644 index 00000000000..f3afe94e97e --- /dev/null +++ b/queue-5.15/mm-hwpoison-handle-non-anonymous-thp-correctly.patch @@ -0,0 +1,48 @@ +From 4966455d9100236fd6dd72b0cd00818435fdb25d Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Fri, 5 Nov 2021 13:41:14 -0700 +Subject: mm: hwpoison: handle non-anonymous THP correctly + +From: Yang Shi + +commit 4966455d9100236fd6dd72b0cd00818435fdb25d upstream. + +Currently hwpoison doesn't handle non-anonymous THP, but since v4.8 THP +support for tmpfs and read-only file cache has been added. They could +be offlined by split THP, just like anonymous THP. + +Link: https://lkml.kernel.org/r/20211020210755.23964-7-shy828301@gmail.com +Signed-off-by: Yang Shi +Acked-by: Naoya Horiguchi +Cc: Hugh Dickins +Cc: Kirill A. Shutemov +Cc: Matthew Wilcox +Cc: Oscar Salvador +Cc: Peter Xu +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Naoya Horiguchi +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory-failure.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1440,14 +1440,11 @@ static int identify_page_state(unsigned + static int try_to_split_thp_page(struct page *page, const char *msg) + { + lock_page(page); +- if (!PageAnon(page) || unlikely(split_huge_page(page))) { ++ if (unlikely(split_huge_page(page))) { + unsigned long pfn = page_to_pfn(page); + + unlock_page(page); +- if (!PageAnon(page)) +- pr_info("%s: %#lx: non anonymous thp\n", msg, pfn); +- else +- pr_info("%s: %#lx: thp split failed\n", msg, pfn); ++ pr_info("%s: %#lx: thp split failed\n", msg, pfn); + put_page(page); + return -EBUSY; + } diff --git a/queue-5.15/mm-hwpoison-refactor-refcount-check-handling.patch b/queue-5.15/mm-hwpoison-refactor-refcount-check-handling.patch new file mode 100644 index 00000000000..325f5aeee27 --- /dev/null +++ b/queue-5.15/mm-hwpoison-refactor-refcount-check-handling.patch @@ -0,0 +1,244 @@ +From dd0f230a0a80ff396c7ce587f16429f2a8131344 Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Fri, 5 Nov 2021 13:41:07 -0700 +Subject: mm: hwpoison: refactor refcount check handling + +From: Yang Shi + +commit dd0f230a0a80ff396c7ce587f16429f2a8131344 upstream. + +Memory failure will report failure if the page still has extra pinned +refcount other than from hwpoison after the handler is done. Actually +the check is not necessary for all handlers, so move the check into +specific handlers. This would make the following keeping shmem page in +page cache patch easier. + +There may be expected extra pin for some cases, for example, when the +page is dirty and in swapcache. + +Link: https://lkml.kernel.org/r/20211020210755.23964-5-shy828301@gmail.com +Signed-off-by: Yang Shi +Signed-off-by: Naoya Horiguchi +Suggested-by: Naoya Horiguchi +Cc: Hugh Dickins +Cc: Kirill A. Shutemov +Cc: Matthew Wilcox +Cc: Oscar Salvador +Cc: Peter Xu +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Naoya Horiguchi +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory-failure.c | 93 +++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 64 insertions(+), 29 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -811,12 +811,44 @@ static int truncate_error_page(struct pa + return ret; + } + ++struct page_state { ++ unsigned long mask; ++ unsigned long res; ++ enum mf_action_page_type type; ++ ++ /* Callback ->action() has to unlock the relevant page inside it. */ ++ int (*action)(struct page_state *ps, struct page *p); ++}; ++ ++/* ++ * Return true if page is still referenced by others, otherwise return ++ * false. ++ * ++ * The extra_pins is true when one extra refcount is expected. ++ */ ++static bool has_extra_refcount(struct page_state *ps, struct page *p, ++ bool extra_pins) ++{ ++ int count = page_count(p) - 1; ++ ++ if (extra_pins) ++ count -= 1; ++ ++ if (count > 0) { ++ pr_err("Memory failure: %#lx: %s still referenced by %d users\n", ++ page_to_pfn(p), action_page_types[ps->type], count); ++ return true; ++ } ++ ++ return false; ++} ++ + /* + * Error hit kernel page. + * Do nothing, try to be lucky and not touch this instead. For a few cases we + * could be more sophisticated. + */ +-static int me_kernel(struct page *p, unsigned long pfn) ++static int me_kernel(struct page_state *ps, struct page *p) + { + unlock_page(p); + return MF_IGNORED; +@@ -825,9 +857,9 @@ static int me_kernel(struct page *p, uns + /* + * Page in unknown state. Do nothing. + */ +-static int me_unknown(struct page *p, unsigned long pfn) ++static int me_unknown(struct page_state *ps, struct page *p) + { +- pr_err("Memory failure: %#lx: Unknown page state\n", pfn); ++ pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p)); + unlock_page(p); + return MF_FAILED; + } +@@ -835,7 +867,7 @@ static int me_unknown(struct page *p, un + /* + * Clean (or cleaned) page cache page. + */ +-static int me_pagecache_clean(struct page *p, unsigned long pfn) ++static int me_pagecache_clean(struct page_state *ps, struct page *p) + { + int ret; + struct address_space *mapping; +@@ -872,9 +904,13 @@ static int me_pagecache_clean(struct pag + * + * Open: to take i_rwsem or not for this? Right now we don't. + */ +- ret = truncate_error_page(p, pfn, mapping); ++ ret = truncate_error_page(p, page_to_pfn(p), mapping); + out: + unlock_page(p); ++ ++ if (has_extra_refcount(ps, p, false)) ++ ret = MF_FAILED; ++ + return ret; + } + +@@ -883,7 +919,7 @@ out: + * Issues: when the error hit a hole page the error is not properly + * propagated. + */ +-static int me_pagecache_dirty(struct page *p, unsigned long pfn) ++static int me_pagecache_dirty(struct page_state *ps, struct page *p) + { + struct address_space *mapping = page_mapping(p); + +@@ -927,7 +963,7 @@ static int me_pagecache_dirty(struct pag + mapping_set_error(mapping, -EIO); + } + +- return me_pagecache_clean(p, pfn); ++ return me_pagecache_clean(ps, p); + } + + /* +@@ -949,9 +985,10 @@ static int me_pagecache_dirty(struct pag + * Clean swap cache pages can be directly isolated. A later page fault will + * bring in the known good data from disk. + */ +-static int me_swapcache_dirty(struct page *p, unsigned long pfn) ++static int me_swapcache_dirty(struct page_state *ps, struct page *p) + { + int ret; ++ bool extra_pins = false; + + ClearPageDirty(p); + /* Trigger EIO in shmem: */ +@@ -959,10 +996,17 @@ static int me_swapcache_dirty(struct pag + + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; + unlock_page(p); ++ ++ if (ret == MF_DELAYED) ++ extra_pins = true; ++ ++ if (has_extra_refcount(ps, p, extra_pins)) ++ ret = MF_FAILED; ++ + return ret; + } + +-static int me_swapcache_clean(struct page *p, unsigned long pfn) ++static int me_swapcache_clean(struct page_state *ps, struct page *p) + { + int ret; + +@@ -970,6 +1014,10 @@ static int me_swapcache_clean(struct pag + + ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; + unlock_page(p); ++ ++ if (has_extra_refcount(ps, p, false)) ++ ret = MF_FAILED; ++ + return ret; + } + +@@ -979,7 +1027,7 @@ static int me_swapcache_clean(struct pag + * - Error on hugepage is contained in hugepage unit (not in raw page unit.) + * To narrow down kill region to one page, we need to break up pmd. + */ +-static int me_huge_page(struct page *p, unsigned long pfn) ++static int me_huge_page(struct page_state *ps, struct page *p) + { + int res; + struct page *hpage = compound_head(p); +@@ -990,7 +1038,7 @@ static int me_huge_page(struct page *p, + + mapping = page_mapping(hpage); + if (mapping) { +- res = truncate_error_page(hpage, pfn, mapping); ++ res = truncate_error_page(hpage, page_to_pfn(p), mapping); + unlock_page(hpage); + } else { + res = MF_FAILED; +@@ -1008,6 +1056,9 @@ static int me_huge_page(struct page *p, + } + } + ++ if (has_extra_refcount(ps, p, false)) ++ res = MF_FAILED; ++ + return res; + } + +@@ -1033,14 +1084,7 @@ static int me_huge_page(struct page *p, + #define slab (1UL << PG_slab) + #define reserved (1UL << PG_reserved) + +-static struct page_state { +- unsigned long mask; +- unsigned long res; +- enum mf_action_page_type type; +- +- /* Callback ->action() has to unlock the relevant page inside it. */ +- int (*action)(struct page *p, unsigned long pfn); +-} error_states[] = { ++static struct page_state error_states[] = { + { reserved, reserved, MF_MSG_KERNEL, me_kernel }, + /* + * free pages are specially detected outside this table: +@@ -1100,19 +1144,10 @@ static int page_action(struct page_state + unsigned long pfn) + { + int result; +- int count; + + /* page p should be unlocked after returning from ps->action(). */ +- result = ps->action(p, pfn); ++ result = ps->action(ps, p); + +- count = page_count(p) - 1; +- if (ps->action == me_swapcache_dirty && result == MF_DELAYED) +- count--; +- if (count > 0) { +- pr_err("Memory failure: %#lx: %s still referenced by %d users\n", +- pfn, action_page_types[ps->type], count); +- result = MF_FAILED; +- } + action_result(pfn, ps->type, result); + + /* Could do more checks here if page looks ok */ diff --git a/queue-5.15/mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch b/queue-5.15/mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch new file mode 100644 index 00000000000..7b2dac99286 --- /dev/null +++ b/queue-5.15/mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch @@ -0,0 +1,223 @@ +From a7605426666196c5a460dd3de6f8dac1d3c21f00 Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Fri, 14 Jan 2022 14:05:19 -0800 +Subject: mm: shmem: don't truncate page if memory failure happens + +From: Yang Shi + +commit a7605426666196c5a460dd3de6f8dac1d3c21f00 upstream. + +The current behavior of memory failure is to truncate the page cache +regardless of dirty or clean. If the page is dirty the later access +will get the obsolete data from disk without any notification to the +users. This may cause silent data loss. It is even worse for shmem +since shmem is in-memory filesystem, truncating page cache means +discarding data blocks. The later read would return all zero. + +The right approach is to keep the corrupted page in page cache, any +later access would return error for syscalls or SIGBUS for page fault, +until the file is truncated, hole punched or removed. The regular +storage backed filesystems would be more complicated so this patch is +focused on shmem. This also unblock the support for soft offlining +shmem THP. + +[akpm@linux-foundation.org: coding style fixes] +[arnd@arndb.de: fix uninitialized variable use in me_pagecache_clean()] + Link: https://lkml.kernel.org/r/20211022064748.4173718-1-arnd@kernel.org +[Fix invalid pointer dereference in shmem_read_mapping_page_gfp() with a + slight different implementation from what Ajay Garg + and Muchun Song proposed and reworked the + error handling of shmem_write_begin() suggested by Linus] + Link: https://lore.kernel.org/linux-mm/20211111084617.6746-1-ajaygargnsit@gmail.com/ + +Link: https://lkml.kernel.org/r/20211020210755.23964-6-shy828301@gmail.com +Link: https://lkml.kernel.org/r/20211116193247.21102-1-shy828301@gmail.com +Signed-off-by: Yang Shi +Signed-off-by: Arnd Bergmann +Cc: Hugh Dickins +Cc: Kirill A. Shutemov +Cc: Matthew Wilcox +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: Peter Xu +Cc: Ajay Garg +Cc: Muchun Song +Cc: Andy Lavr +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Naoya Horiguchi +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory-failure.c | 14 +++++++++++--- + mm/shmem.c | 51 +++++++++++++++++++++++++++++++++++++++++++++------ + mm/userfaultfd.c | 5 +++++ + 3 files changed, 61 insertions(+), 9 deletions(-) + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -57,6 +57,7 @@ + #include + #include + #include ++#include + #include "internal.h" + #include "ras/ras_event.h" + +@@ -871,6 +872,7 @@ static int me_pagecache_clean(struct pag + { + int ret; + struct address_space *mapping; ++ bool extra_pins; + + delete_from_lru_cache(p); + +@@ -900,17 +902,23 @@ static int me_pagecache_clean(struct pag + } + + /* ++ * The shmem page is kept in page cache instead of truncating ++ * so is expected to have an extra refcount after error-handling. ++ */ ++ extra_pins = shmem_mapping(mapping); ++ ++ /* + * Truncation is a bit tricky. Enable it per file system for now. + * + * Open: to take i_rwsem or not for this? Right now we don't. + */ + ret = truncate_error_page(p, page_to_pfn(p), mapping); ++ if (has_extra_refcount(ps, p, extra_pins)) ++ ret = MF_FAILED; ++ + out: + unlock_page(p); + +- if (has_extra_refcount(ps, p, false)) +- ret = MF_FAILED; +- + return ret; + } + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2463,6 +2463,7 @@ shmem_write_begin(struct file *file, str + struct inode *inode = mapping->host; + struct shmem_inode_info *info = SHMEM_I(inode); + pgoff_t index = pos >> PAGE_SHIFT; ++ int ret = 0; + + /* i_rwsem is held by caller */ + if (unlikely(info->seals & (F_SEAL_GROW | +@@ -2473,7 +2474,19 @@ shmem_write_begin(struct file *file, str + return -EPERM; + } + +- return shmem_getpage(inode, index, pagep, SGP_WRITE); ++ ret = shmem_getpage(inode, index, pagep, SGP_WRITE); ++ ++ if (ret) ++ return ret; ++ ++ if (PageHWPoison(*pagep)) { ++ unlock_page(*pagep); ++ put_page(*pagep); ++ *pagep = NULL; ++ return -EIO; ++ } ++ ++ return 0; + } + + static int +@@ -2560,6 +2573,12 @@ static ssize_t shmem_file_read_iter(stru + if (sgp == SGP_CACHE) + set_page_dirty(page); + unlock_page(page); ++ ++ if (PageHWPoison(page)) { ++ put_page(page); ++ error = -EIO; ++ break; ++ } + } + + /* +@@ -3121,7 +3140,8 @@ static const char *shmem_get_link(struct + page = find_get_page(inode->i_mapping, 0); + if (!page) + return ERR_PTR(-ECHILD); +- if (!PageUptodate(page)) { ++ if (PageHWPoison(page) || ++ !PageUptodate(page)) { + put_page(page); + return ERR_PTR(-ECHILD); + } +@@ -3129,6 +3149,13 @@ static const char *shmem_get_link(struct + error = shmem_getpage(inode, 0, &page, SGP_READ); + if (error) + return ERR_PTR(error); ++ if (!page) ++ return ERR_PTR(-ECHILD); ++ if (PageHWPoison(page)) { ++ unlock_page(page); ++ put_page(page); ++ return ERR_PTR(-ECHILD); ++ } + unlock_page(page); + } + set_delayed_call(done, shmem_put_link, page); +@@ -3779,6 +3806,13 @@ static void shmem_destroy_inodecache(voi + kmem_cache_destroy(shmem_inode_cachep); + } + ++/* Keep the page in page cache instead of truncating it */ ++static int shmem_error_remove_page(struct address_space *mapping, ++ struct page *page) ++{ ++ return 0; ++} ++ + const struct address_space_operations shmem_aops = { + .writepage = shmem_writepage, + .set_page_dirty = __set_page_dirty_no_writeback, +@@ -3789,7 +3823,7 @@ const struct address_space_operations sh + #ifdef CONFIG_MIGRATION + .migratepage = migrate_page, + #endif +- .error_remove_page = generic_error_remove_page, ++ .error_remove_page = shmem_error_remove_page, + }; + EXPORT_SYMBOL(shmem_aops); + +@@ -4197,9 +4231,14 @@ struct page *shmem_read_mapping_page_gfp + error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, + gfp, NULL, NULL, NULL); + if (error) +- page = ERR_PTR(error); +- else +- unlock_page(page); ++ return ERR_PTR(error); ++ ++ unlock_page(page); ++ if (PageHWPoison(page)) { ++ put_page(page); ++ return ERR_PTR(-EIO); ++ } ++ + return page; + #else + /* +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -238,6 +238,11 @@ static int mcontinue_atomic_pte(struct m + goto out; + } + ++ if (PageHWPoison(page)) { ++ ret = -EIO; ++ goto out_release; ++ } ++ + ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, + page, false, wp_copy); + if (ret) diff --git a/queue-5.15/series b/queue-5.15/series new file mode 100644 index 00000000000..d3f6cc7d62b --- /dev/null +++ b/queue-5.15/series @@ -0,0 +1,3 @@ +mm-hwpoison-refactor-refcount-check-handling.patch +mm-hwpoison-handle-non-anonymous-thp-correctly.patch +mm-shmem-don-t-truncate-page-if-memory-failure-happens.patch