]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 14:42:15 +0000 (16:42 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 14:42:15 +0000 (16:42 +0200)
added patches:
mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch

queue-4.19/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch b/queue-4.19/mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch
new file mode 100644 (file)
index 0000000..313856d
--- /dev/null
@@ -0,0 +1,179 @@
+From 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Tue, 22 Mar 2022 14:42:15 -0700
+Subject: mm: don't skip swap entry even if zap_details specified
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 5abfd71d936a8aefd9f9ccd299dea7a164a5d455 upstream.
+
+Patch series "mm: Rework zap ptes on swap entries", v5.
+
+Patch 1 should fix a long standing bug for zap_pte_range() on
+zap_details usage.  The risk is we could have some swap entries skipped
+while we should have zapped them.
+
+Migration entries are not the major concern because file backed memory
+always zap in the pattern that "first time without page lock, then
+re-zap with page lock" hence the 2nd zap will always make sure all
+migration entries are already recovered.
+
+However there can be issues with real swap entries got skipped
+errornoously.  There's a reproducer provided in commit message of patch
+1 for that.
+
+Patch 2-4 are cleanups that are based on patch 1.  After the whole
+patchset applied, we should have a very clean view of zap_pte_range().
+
+Only patch 1 needs to be backported to stable if necessary.
+
+This patch (of 4):
+
+The "details" pointer shouldn't be the token to decide whether we should
+skip swap entries.
+
+For example, when the callers specified details->zap_mapping==NULL, it
+means the user wants to zap all the pages (including COWed pages), then
+we need to look into swap entries because there can be private COWed
+pages that was swapped out.
+
+Skipping some swap entries when details is non-NULL may lead to wrongly
+leaving some of the swap entries while we should have zapped them.
+
+A reproducer of the problem:
+
+===8<===
+        #define _GNU_SOURCE         /* See feature_test_macros(7) */
+        #include <stdio.h>
+        #include <assert.h>
+        #include <unistd.h>
+        #include <sys/mman.h>
+        #include <sys/types.h>
+
+        int page_size;
+        int shmem_fd;
+        char *buffer;
+
+        void main(void)
+        {
+                int ret;
+                char val;
+
+                page_size = getpagesize();
+                shmem_fd = memfd_create("test", 0);
+                assert(shmem_fd >= 0);
+
+                ret = ftruncate(shmem_fd, page_size * 2);
+                assert(ret == 0);
+
+                buffer = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+                                MAP_PRIVATE, shmem_fd, 0);
+                assert(buffer != MAP_FAILED);
+
+                /* Write private page, swap it out */
+                buffer[page_size] = 1;
+                madvise(buffer, page_size * 2, MADV_PAGEOUT);
+
+                /* This should drop private buffer[page_size] already */
+                ret = ftruncate(shmem_fd, page_size);
+                assert(ret == 0);
+                /* Recover the size */
+                ret = ftruncate(shmem_fd, page_size * 2);
+                assert(ret == 0);
+
+                /* Re-read the data, it should be all zero */
+                val = buffer[page_size];
+                if (val == 0)
+                        printf("Good\n");
+                else
+                        printf("BUG\n");
+        }
+===8<===
+
+We don't need to touch up the pmd path, because pmd never had a issue with
+swap entries.  For example, shmem pmd migration will always be split into
+pte level, and same to swapping on anonymous.
+
+Add another helper should_zap_cows() so that we can also check whether we
+should zap private mappings when there's no page pointer specified.
+
+This patch drops that trick, so we handle swap ptes coherently.  Meanwhile
+we should do the same check upon migration entry, hwpoison entry and
+genuine swap entries too.
+
+To be explicit, we should still remember to keep the private entries if
+even_cows==false, and always zap them when even_cows==true.
+
+The issue seems to exist starting from the initial commit of git.
+
+[peterx@redhat.com: comment tweaks]
+  Link: https://lkml.kernel.org/r/20220217060746.71256-2-peterx@redhat.com
+
+Link: https://lkml.kernel.org/r/20220217060746.71256-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20220216094810.60572-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20220216094810.60572-2-peterx@redhat.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: John Hubbard <jhubbard@nvidia.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c |   25 +++++++++++++++++++------
+ 1 file changed, 19 insertions(+), 6 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1302,6 +1302,17 @@ int copy_page_range(struct mm_struct *ds
+       return ret;
+ }
++/* Whether we should zap all COWed (private) pages too */
++static inline bool should_zap_cows(struct zap_details *details)
++{
++      /* By default, zap all pages */
++      if (!details)
++              return true;
++
++      /* Or, we zap COWed pages only if the caller wants to */
++      return !details->check_mapping;
++}
++
+ static unsigned long zap_pte_range(struct mmu_gather *tlb,
+                               struct vm_area_struct *vma, pmd_t *pmd,
+                               unsigned long addr, unsigned long end,
+@@ -1390,17 +1401,19 @@ again:
+                       continue;
+               }
+-              /* If details->check_mapping, we leave swap entries. */
+-              if (unlikely(details))
+-                      continue;
+-
+               entry = pte_to_swp_entry(ptent);
+-              if (!non_swap_entry(entry))
++              if (!non_swap_entry(entry)) {
++                      /* Genuine swap entry, hence a private anon page */
++                      if (!should_zap_cows(details))
++                              continue;
+                       rss[MM_SWAPENTS]--;
+-              else if (is_migration_entry(entry)) {
++              } else if (is_migration_entry(entry)) {
+                       struct page *page;
+                       page = migration_entry_to_page(entry);
++                      if (details && details->check_mapping &&
++                          details->check_mapping != page_rmapping(page))
++                              continue;
+                       rss[mm_counter(page)]--;
+               }
+               if (unlikely(!free_swap_and_cache(entry)))
index fe567d16b3368a0d5af802ab4ba99418f0783367..8dbde6fa6acf2591d9428d00e71bfbfdebcf3339 100644 (file)
@@ -325,3 +325,4 @@ irqchip-gic-v3-fix-gicr_ctlr.rwp-polling.patch
 tools-build-filter-out-options-and-warnings-not-supported-by-clang.patch
 tools-build-use-shell-instead-of-to-get-embedded-libperl-s-ccopts.patch
 dmaengine-revert-dmaengine-shdma-fix-runtime-pm-imbalance-on-error.patch
+mm-don-t-skip-swap-entry-even-if-zap_details-specified.patch