From 3ac44214585b76f87a22a12f1cc4c85b55f21b27 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 11 Apr 2022 16:42:10 +0200 Subject: [PATCH] 4.14-stable patches added patches: mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch --- ...-entry-even-if-zap_details-specified.patch | 179 ++++++++++++++++++ queue-4.14/series | 1 + 2 files changed, 180 insertions(+) create mode 100644 queue-4.14/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch diff --git a/queue-4.14/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch b/queue-4.14/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch new file mode 100644 index 00000000000..c14ee32829e --- /dev/null +++ b/queue-4.14/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch @@ -0,0 +1,179 @@ +From 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 22 Mar 2022 14:42:15 -0700 +Subject: mm: don't skip swap entry even if zap_details specified + +From: Peter Xu + +commit 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 upstream. + +Patch series "mm: Rework zap ptes on swap entries", v5. + +Patch 1 should fix a long standing bug for zap_pte_range() on +zap_details usage. The risk is we could have some swap entries skipped +while we should have zapped them. + +Migration entries are not the major concern because file backed memory +always zap in the pattern that "first time without page lock, then +re-zap with page lock" hence the 2nd zap will always make sure all +migration entries are already recovered. + +However there can be issues with real swap entries got skipped +errornoously. There's a reproducer provided in commit message of patch +1 for that. + +Patch 2-4 are cleanups that are based on patch 1. After the whole +patchset applied, we should have a very clean view of zap_pte_range(). + +Only patch 1 needs to be backported to stable if necessary. + +This patch (of 4): + +The "details" pointer shouldn't be the token to decide whether we should +skip swap entries. + +For example, when the callers specified details->zap_mapping==NULL, it +means the user wants to zap all the pages (including COWed pages), then +we need to look into swap entries because there can be private COWed +pages that was swapped out. + +Skipping some swap entries when details is non-NULL may lead to wrongly +leaving some of the swap entries while we should have zapped them. + +A reproducer of the problem: + +===8<=== + #define _GNU_SOURCE /* See feature_test_macros(7) */ + #include + #include + #include + #include + #include + + int page_size; + int shmem_fd; + char *buffer; + + void main(void) + { + int ret; + char val; + + page_size = getpagesize(); + shmem_fd = memfd_create("test", 0); + assert(shmem_fd >= 0); + + ret = ftruncate(shmem_fd, page_size * 2); + assert(ret == 0); + + buffer = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE, shmem_fd, 0); + assert(buffer != MAP_FAILED); + + /* Write private page, swap it out */ + buffer[page_size] = 1; + madvise(buffer, page_size * 2, MADV_PAGEOUT); + + /* This should drop private buffer[page_size] already */ + ret = ftruncate(shmem_fd, page_size); + assert(ret == 0); + /* Recover the size */ + ret = ftruncate(shmem_fd, page_size * 2); + assert(ret == 0); + + /* Re-read the data, it should be all zero */ + val = buffer[page_size]; + if (val == 0) + printf("Good\n"); + else + printf("BUG\n"); + } +===8<=== + +We don't need to touch up the pmd path, because pmd never had a issue with +swap entries. For example, shmem pmd migration will always be split into +pte level, and same to swapping on anonymous. + +Add another helper should_zap_cows() so that we can also check whether we +should zap private mappings when there's no page pointer specified. + +This patch drops that trick, so we handle swap ptes coherently. Meanwhile +we should do the same check upon migration entry, hwpoison entry and +genuine swap entries too. + +To be explicit, we should still remember to keep the private entries if +even_cows==false, and always zap them when even_cows==true. + +The issue seems to exist starting from the initial commit of git. + +[peterx@redhat.com: comment tweaks] + Link: https://lkml.kernel.org/r/20220217060746.71256-2-peterx@redhat.com + +Link: https://lkml.kernel.org/r/20220217060746.71256-1-peterx@redhat.com +Link: https://lkml.kernel.org/r/20220216094810.60572-1-peterx@redhat.com +Link: https://lkml.kernel.org/r/20220216094810.60572-2-peterx@redhat.com +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Peter Xu +Reviewed-by: John Hubbard +Cc: David Hildenbrand +Cc: Hugh Dickins +Cc: Alistair Popple +Cc: Andrea Arcangeli +Cc: "Kirill A . Shutemov" +Cc: Matthew Wilcox +Cc: Vlastimil Babka +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 25 +++++++++++++++++++------ + 1 file changed, 19 insertions(+), 6 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1306,6 +1306,17 @@ int copy_page_range(struct mm_struct *ds + return ret; + } + ++/* Whether we should zap all COWed (private) pages too */ ++static inline bool should_zap_cows(struct zap_details *details) ++{ ++ /* By default, zap all pages */ ++ if (!details) ++ return true; ++ ++ /* Or, we zap COWed pages only if the caller wants to */ ++ return !details->check_mapping; ++} ++ + static unsigned long zap_pte_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, unsigned long end, +@@ -1394,17 +1405,19 @@ again: + continue; + } + +- /* If details->check_mapping, we leave swap entries. */ +- if (unlikely(details)) +- continue; +- + entry = pte_to_swp_entry(ptent); +- if (!non_swap_entry(entry)) ++ if (!non_swap_entry(entry)) { ++ /* Genuine swap entry, hence a private anon page */ ++ if (!should_zap_cows(details)) ++ continue; + rss[MM_SWAPENTS]--; +- else if (is_migration_entry(entry)) { ++ } else if (is_migration_entry(entry)) { + struct page *page; + + page = migration_entry_to_page(entry); ++ if (details && details->check_mapping && ++ details->check_mapping != page_rmapping(page)) ++ continue; + rss[mm_counter(page)]--; + } + if (unlikely(!free_swap_and_cache(entry))) diff --git a/queue-4.14/series b/queue-4.14/series index 3dc044471a2..8b42231b747 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -256,3 +256,4 @@ perf-qcom_l2_pmu-fix-an-incorrect-null-check-on-list-iterator.patch irqchip-gic-v3-fix-gicr_ctlr.rwp-polling.patch tools-build-use-shell-instead-of-to-get-embedded-libperl-s-ccopts.patch dmaengine-revert-dmaengine-shdma-fix-runtime-pm-imbalance-on-error.patch +mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch -- 2.47.3