]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.19-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 May 2022 11:19:05 +0000 (13:19 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 May 2022 11:19:05 +0000 (13:19 +0200)
added patches:
mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch

queue-4.19/mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch [new file with mode: 0644]
queue-4.19/series

diff --git a/queue-4.19/mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch b/queue-4.19/mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch
new file mode 100644 (file)
index 0000000..69969f1
--- /dev/null
@@ -0,0 +1,157 @@
+From e914d8f00391520ecc4495dd0ca0124538ab7119 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Thu, 14 Apr 2022 19:13:46 -0700
+Subject: mm: fix unexpected zeroed page mapping with zram swap
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit e914d8f00391520ecc4495dd0ca0124538ab7119 upstream.
+
+Two processes under CLONE_VM cloning, user process can be corrupted by
+seeing zeroed page unexpectedly.
+
+      CPU A                        CPU B
+
+  do_swap_page                do_swap_page
+  SWP_SYNCHRONOUS_IO path     SWP_SYNCHRONOUS_IO path
+  swap_readpage valid data
+    swap_slot_free_notify
+      delete zram entry
+                              swap_readpage zeroed(invalid) data
+                              pte_lock
+                              map the *zero data* to userspace
+                              pte_unlock
+  pte_lock
+  if (!pte_same)
+    goto out_nomap;
+  pte_unlock
+  return and next refault will
+  read zeroed data
+
+The swap_slot_free_notify is bogus for CLONE_VM case since it doesn't
+increase the refcount of swap slot at copy_mm so it couldn't catch up
+whether it's safe or not to discard data from backing device.  In the
+case, only the lock it could rely on to synchronize swap slot freeing is
+page table lock.  Thus, this patch gets rid of the swap_slot_free_notify
+function.  With this patch, CPU A will see correct data.
+
+      CPU A                        CPU B
+
+  do_swap_page                do_swap_page
+  SWP_SYNCHRONOUS_IO path     SWP_SYNCHRONOUS_IO path
+                              swap_readpage original data
+                              pte_lock
+                              map the original data
+                              swap_free
+                                swap_range_free
+                                  bd_disk->fops->swap_slot_free_notify
+  swap_readpage read zeroed data
+                              pte_unlock
+  pte_lock
+  if (!pte_same)
+    goto out_nomap;
+  pte_unlock
+  return
+  on next refault will see mapped data by CPU B
+
+The concern of the patch would increase memory consumption since it
+could keep wasted memory with compressed form in zram as well as
+uncompressed form in address space.  However, most of cases of zram uses
+no readahead and do_swap_page is followed by swap_free so it will free
+the compressed form from in zram quickly.
+
+Link: https://lkml.kernel.org/r/YjTVVxIAsnKAXjTd@google.com
+Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device")
+Reported-by: Ivan Babrou <ivan@cloudflare.com>
+Tested-by: Ivan Babrou <ivan@cloudflare.com>
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Cc: Nitin Gupta <ngupta@vflare.org>
+Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: <stable@vger.kernel.org>   [4.14+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_io.c |   55 -------------------------------------------------------
+ 1 file changed, 55 deletions(-)
+
+--- a/mm/page_io.c
++++ b/mm/page_io.c
+@@ -71,55 +71,6 @@ void end_swap_bio_write(struct bio *bio)
+       bio_put(bio);
+ }
+-static void swap_slot_free_notify(struct page *page)
+-{
+-      struct swap_info_struct *sis;
+-      struct gendisk *disk;
+-      swp_entry_t entry;
+-
+-      /*
+-       * There is no guarantee that the page is in swap cache - the software
+-       * suspend code (at least) uses end_swap_bio_read() against a non-
+-       * swapcache page.  So we must check PG_swapcache before proceeding with
+-       * this optimization.
+-       */
+-      if (unlikely(!PageSwapCache(page)))
+-              return;
+-
+-      sis = page_swap_info(page);
+-      if (!(sis->flags & SWP_BLKDEV))
+-              return;
+-
+-      /*
+-       * The swap subsystem performs lazy swap slot freeing,
+-       * expecting that the page will be swapped out again.
+-       * So we can avoid an unnecessary write if the page
+-       * isn't redirtied.
+-       * This is good for real swap storage because we can
+-       * reduce unnecessary I/O and enhance wear-leveling
+-       * if an SSD is used as the as swap device.
+-       * But if in-memory swap device (eg zram) is used,
+-       * this causes a duplicated copy between uncompressed
+-       * data in VM-owned memory and compressed data in
+-       * zram-owned memory.  So let's free zram-owned memory
+-       * and make the VM-owned decompressed page *dirty*,
+-       * so the page should be swapped out somewhere again if
+-       * we again wish to reclaim it.
+-       */
+-      disk = sis->bdev->bd_disk;
+-      entry.val = page_private(page);
+-      if (disk->fops->swap_slot_free_notify &&
+-                      __swap_count(sis, entry) == 1) {
+-              unsigned long offset;
+-
+-              offset = swp_offset(entry);
+-
+-              SetPageDirty(page);
+-              disk->fops->swap_slot_free_notify(sis->bdev,
+-                              offset);
+-      }
+-}
+-
+ static void end_swap_bio_read(struct bio *bio)
+ {
+       struct page *page = bio_first_page_all(bio);
+@@ -135,7 +86,6 @@ static void end_swap_bio_read(struct bio
+       }
+       SetPageUptodate(page);
+-      swap_slot_free_notify(page);
+ out:
+       unlock_page(page);
+       WRITE_ONCE(bio->bi_private, NULL);
+@@ -373,11 +323,6 @@ int swap_readpage(struct page *page, boo
+       ret = bdev_read_page(sis->bdev, map_swap_page(page, &sis->bdev), page);
+       if (!ret) {
+-              if (trylock_page(page)) {
+-                      swap_slot_free_notify(page);
+-                      unlock_page(page);
+-              }
+-
+               count_vm_event(PSWPIN);
+               return 0;
+       }
index 00507e39a83d2b9d8e1a7c01aec89d1a92fc62ff..385c4c8730a41f33b6216c08e3ff4761f036aa80 100644 (file)
@@ -81,3 +81,4 @@ net-igmp-respect-rcu-rules-in-ip_mc_source-and-ip_mc_msfilter.patch
 hwmon-adt7470-fix-warning-on-module-removal.patch-23920
 kvm-x86-cpuid-only-provide-cpuid-leaf-0xa-if-host-ha.patch
 nfc-netlink-fix-sleep-in-atomic-bug-when-firmware-do.patch
+mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch