]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 18 Apr 2022 08:13:05 +0000 (10:13 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 18 Apr 2022 08:13:05 +0000 (10:13 +0200)
added patches:
gcc-plugins-latent_entropy-use-dev-urandom.patch
kvm-don-t-create-vm-debugfs-files-outside-of-the-vm-directory.patch
kvm-x86-mmu-resolve-nx_huge_pages-when-kvm.ko-is-loaded.patch
memory-renesas-rpc-if-fix-platform-device-leak-in-error-path.patch
mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch
mm-kmemleak-take-a-full-lowmem-check-in-kmemleak_-_phys.patch
mm-page_alloc-fix-build_zonerefs_node.patch

queue-5.10/gcc-plugins-latent_entropy-use-dev-urandom.patch [new file with mode: 0644]
queue-5.10/kvm-don-t-create-vm-debugfs-files-outside-of-the-vm-directory.patch [new file with mode: 0644]
queue-5.10/kvm-x86-mmu-resolve-nx_huge_pages-when-kvm.ko-is-loaded.patch [new file with mode: 0644]
queue-5.10/memory-renesas-rpc-if-fix-platform-device-leak-in-error-path.patch [new file with mode: 0644]
queue-5.10/mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch [new file with mode: 0644]
queue-5.10/mm-kmemleak-take-a-full-lowmem-check-in-kmemleak_-_phys.patch [new file with mode: 0644]
queue-5.10/mm-page_alloc-fix-build_zonerefs_node.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/gcc-plugins-latent_entropy-use-dev-urandom.patch b/queue-5.10/gcc-plugins-latent_entropy-use-dev-urandom.patch
new file mode 100644 (file)
index 0000000..6676923
--- /dev/null
@@ -0,0 +1,121 @@
+From c40160f2998c897231f8454bf797558d30a20375 Mon Sep 17 00:00:00 2001
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Wed, 6 Apr 2022 00:28:15 +0200
+Subject: gcc-plugins: latent_entropy: use /dev/urandom
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+commit c40160f2998c897231f8454bf797558d30a20375 upstream.
+
+While the latent entropy plugin mostly doesn't derive entropy from
+get_random_const() for measuring the call graph, when __latent_entropy is
+applied to a constant, then it's initialized statically to output from
+get_random_const(). In that case, this data is derived from a 64-bit
+seed, which means a buffer of 512 bits doesn't really have that amount
+of compile-time entropy.
+
+This patch fixes that shortcoming by just buffering chunks of
+/dev/urandom output and doling it out as requested.
+
+At the same time, it's important that we don't break the use of
+-frandom-seed, for people who want the runtime benefits of the latent
+entropy plugin, while still having compile-time determinism. In that
+case, we detect whether gcc's set_random_seed() has been called by
+making a call to get_random_seed(noinit=true) in the plugin init
+function, which is called after set_random_seed() is called but before
+anything that calls get_random_seed(noinit=false), and seeing if it's
+zero or not. If it's not zero, we're in deterministic mode, and so we
+just generate numbers with a basic xorshift prng.
+
+Note that we don't detect if -frandom-seed is being used using the
+documented local_tick variable, because it's assigned via:
+   local_tick = (unsigned) tv.tv_sec * 1000 + tv.tv_usec / 1000;
+which may well overflow and become -1 on its own, and so isn't
+reliable: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105171
+
+[kees: The 256 byte rnd_buf size was chosen based on average (250),
+ median (64), and std deviation (575) bytes of used entropy for a
+ defconfig x86_64 build]
+
+Fixes: 38addce8b600 ("gcc-plugins: Add latent_entropy plugin")
+Cc: stable@vger.kernel.org
+Cc: PaX Team <pageexec@freemail.hu>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20220405222815.21155-1-Jason@zx2c4.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/gcc-plugins/latent_entropy_plugin.c |   44 +++++++++++++++++-----------
+ 1 file changed, 27 insertions(+), 17 deletions(-)
+
+--- a/scripts/gcc-plugins/latent_entropy_plugin.c
++++ b/scripts/gcc-plugins/latent_entropy_plugin.c
+@@ -86,25 +86,31 @@ static struct plugin_info latent_entropy
+       .help           = "disable\tturn off latent entropy instrumentation\n",
+ };
+-static unsigned HOST_WIDE_INT seed;
+-/*
+- * get_random_seed() (this is a GCC function) generates the seed.
+- * This is a simple random generator without any cryptographic security because
+- * the entropy doesn't come from here.
+- */
++static unsigned HOST_WIDE_INT deterministic_seed;
++static unsigned HOST_WIDE_INT rnd_buf[32];
++static size_t rnd_idx = ARRAY_SIZE(rnd_buf);
++static int urandom_fd = -1;
++
+ static unsigned HOST_WIDE_INT get_random_const(void)
+ {
+-      unsigned int i;
+-      unsigned HOST_WIDE_INT ret = 0;
+-
+-      for (i = 0; i < 8 * sizeof(ret); i++) {
+-              ret = (ret << 1) | (seed & 1);
+-              seed >>= 1;
+-              if (ret & 1)
+-                      seed ^= 0xD800000000000000ULL;
++      if (deterministic_seed) {
++              unsigned HOST_WIDE_INT w = deterministic_seed;
++              w ^= w << 13;
++              w ^= w >> 7;
++              w ^= w << 17;
++              deterministic_seed = w;
++              return deterministic_seed;
+       }
+-      return ret;
++      if (urandom_fd < 0) {
++              urandom_fd = open("/dev/urandom", O_RDONLY);
++              gcc_assert(urandom_fd >= 0);
++      }
++      if (rnd_idx >= ARRAY_SIZE(rnd_buf)) {
++              gcc_assert(read(urandom_fd, rnd_buf, sizeof(rnd_buf)) == sizeof(rnd_buf));
++              rnd_idx = 0;
++      }
++      return rnd_buf[rnd_idx++];
+ }
+ static tree tree_get_random_const(tree type)
+@@ -549,8 +555,6 @@ static void latent_entropy_start_unit(vo
+       tree type, id;
+       int quals;
+-      seed = get_random_seed(false);
+-
+       if (in_lto_p)
+               return;
+@@ -585,6 +589,12 @@ __visible int plugin_init(struct plugin_
+       const struct plugin_argument * const argv = plugin_info->argv;
+       int i;
++      /*
++       * Call get_random_seed() with noinit=true, so that this returns
++       * 0 in the case where no seed has been passed via -frandom-seed.
++       */
++      deterministic_seed = get_random_seed(true);
++
+       static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = {
+               {
+                       .base = &latent_entropy_decl,
diff --git a/queue-5.10/kvm-don-t-create-vm-debugfs-files-outside-of-the-vm-directory.patch b/queue-5.10/kvm-don-t-create-vm-debugfs-files-outside-of-the-vm-directory.patch
new file mode 100644 (file)
index 0000000..c8dd9fc
--- /dev/null
@@ -0,0 +1,68 @@
+From a44a4cc1c969afec97dbb2aedaf6f38eaa6253bb Mon Sep 17 00:00:00 2001
+From: Oliver Upton <oupton@google.com>
+Date: Wed, 6 Apr 2022 23:56:13 +0000
+Subject: KVM: Don't create VM debugfs files outside of the VM directory
+
+From: Oliver Upton <oupton@google.com>
+
+commit a44a4cc1c969afec97dbb2aedaf6f38eaa6253bb upstream.
+
+Unfortunately, there is no guarantee that KVM was able to instantiate a
+debugfs directory for a particular VM. To that end, KVM shouldn't even
+attempt to create new debugfs files in this case. If the specified
+parent dentry is NULL, debugfs_create_file() will instantiate files at
+the root of debugfs.
+
+For arm64, it is possible to create the vgic-state file outside of a
+VM directory, the file is not cleaned up when a VM is destroyed.
+Nonetheless, the corresponding struct kvm is freed when the VM is
+destroyed.
+
+Nip the problem in the bud for all possible errant debugfs file
+creations by initializing kvm->debugfs_dentry to -ENOENT. In so doing,
+debugfs_create_file() will fail instead of creating the file in the root
+directory.
+
+Cc: stable@kernel.org
+Fixes: 929f45e32499 ("kvm: no need to check return value of debugfs_create functions")
+Signed-off-by: Oliver Upton <oupton@google.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20220406235615.1447180-2-oupton@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |   10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -673,7 +673,7 @@ static void kvm_destroy_vm_debugfs(struc
+ {
+       int i;
+-      if (!kvm->debugfs_dentry)
++      if (IS_ERR(kvm->debugfs_dentry))
+               return;
+       debugfs_remove_recursive(kvm->debugfs_dentry);
+@@ -693,6 +693,12 @@ static int kvm_create_vm_debugfs(struct
+       struct kvm_stat_data *stat_data;
+       struct kvm_stats_debugfs_item *p;
++      /*
++       * Force subsequent debugfs file creations to fail if the VM directory
++       * is not created.
++       */
++      kvm->debugfs_dentry = ERR_PTR(-ENOENT);
++
+       if (!debugfs_initialized())
+               return 0;
+@@ -4731,7 +4737,7 @@ static void kvm_uevent_notify_change(uns
+       }
+       add_uevent_var(env, "PID=%d", kvm->userspace_pid);
+-      if (kvm->debugfs_dentry) {
++      if (!IS_ERR(kvm->debugfs_dentry)) {
+               char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
+               if (p) {
diff --git a/queue-5.10/kvm-x86-mmu-resolve-nx_huge_pages-when-kvm.ko-is-loaded.patch b/queue-5.10/kvm-x86-mmu-resolve-nx_huge_pages-when-kvm.ko-is-loaded.patch
new file mode 100644 (file)
index 0000000..4efa90f
--- /dev/null
@@ -0,0 +1,156 @@
+From 1d0e84806047f38027d7572adb4702ef7c09b317 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 31 Mar 2022 22:13:59 +0000
+Subject: KVM: x86/mmu: Resolve nx_huge_pages when kvm.ko is loaded
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 1d0e84806047f38027d7572adb4702ef7c09b317 upstream.
+
+Resolve nx_huge_pages to true/false when kvm.ko is loaded, leaving it as
+-1 is technically undefined behavior when its value is read out by
+param_get_bool(), as boolean values are supposed to be '0' or '1'.
+
+Alternatively, KVM could define a custom getter for the param, but the
+auto value doesn't depend on the vendor module in any way, and printing
+"auto" would be unnecessarily unfriendly to the user.
+
+In addition to fixing the undefined behavior, resolving the auto value
+also fixes the scenario where the auto value resolves to N and no vendor
+module is loaded.  Previously, -1 would result in Y being printed even
+though KVM would ultimately disable the mitigation.
+
+Rename the existing MMU module init/exit helpers to clarify that they're
+invoked with respect to the vendor module, and add comments to document
+why KVM has two separate "module init" flows.
+
+  =========================================================================
+  UBSAN: invalid-load in kernel/params.c:320:33
+  load of value 255 is not a valid value for type '_Bool'
+  CPU: 6 PID: 892 Comm: tail Not tainted 5.17.0-rc3+ #799
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+  Call Trace:
+   <TASK>
+   dump_stack_lvl+0x34/0x44
+   ubsan_epilogue+0x5/0x40
+   __ubsan_handle_load_invalid_value.cold+0x43/0x48
+   param_get_bool.cold+0xf/0x14
+   param_attr_show+0x55/0x80
+   module_attr_show+0x1c/0x30
+   sysfs_kf_seq_show+0x93/0xc0
+   seq_read_iter+0x11c/0x450
+   new_sync_read+0x11b/0x1a0
+   vfs_read+0xf0/0x190
+   ksys_read+0x5f/0xe0
+   do_syscall_64+0x3b/0xc0
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+   </TASK>
+  =========================================================================
+
+Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation")
+Cc: stable@vger.kernel.org
+Reported-by: Bruno Goncalves <bgoncalv@redhat.com>
+Reported-by: Jan Stancek <jstancek@redhat.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220331221359.3912754-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    5 +++--
+ arch/x86/kvm/mmu/mmu.c          |   20 ++++++++++++++++----
+ arch/x86/kvm/x86.c              |   20 ++++++++++++++++++--
+ 3 files changed, 37 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1340,8 +1340,9 @@ static inline int kvm_arch_flush_remote_
+               return -ENOTSUPP;
+ }
+-int kvm_mmu_module_init(void);
+-void kvm_mmu_module_exit(void);
++void kvm_mmu_x86_module_init(void);
++int kvm_mmu_vendor_module_init(void);
++void kvm_mmu_vendor_module_exit(void);
+ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
+ int kvm_mmu_create(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5876,12 +5876,24 @@ static int set_nx_huge_pages(const char
+       return 0;
+ }
+-int kvm_mmu_module_init(void)
++/*
++ * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as
++ * its default value of -1 is technically undefined behavior for a boolean.
++ */
++void kvm_mmu_x86_module_init(void)
+ {
+-      int ret = -ENOMEM;
+-
+       if (nx_huge_pages == -1)
+               __set_nx_huge_pages(get_nx_auto_mode());
++}
++
++/*
++ * The bulk of the MMU initialization is deferred until the vendor module is
++ * loaded as many of the masks/values may be modified by VMX or SVM, i.e. need
++ * to be reset when a potentially different vendor module is loaded.
++ */
++int kvm_mmu_vendor_module_init(void)
++{
++      int ret = -ENOMEM;
+       /*
+        * MMU roles use union aliasing which is, generally speaking, an
+@@ -5955,7 +5967,7 @@ void kvm_mmu_destroy(struct kvm_vcpu *vc
+       mmu_free_memory_caches(vcpu);
+ }
+-void kvm_mmu_module_exit(void)
++void kvm_mmu_vendor_module_exit(void)
+ {
+       mmu_destroy_caches();
+       percpu_counter_destroy(&kvm_total_used_mmu_pages);
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8005,7 +8005,7 @@ int kvm_arch_init(void *opaque)
+               goto out_free_x86_emulator_cache;
+       }
+-      r = kvm_mmu_module_init();
++      r = kvm_mmu_vendor_module_init();
+       if (r)
+               goto out_free_percpu;
+@@ -8065,7 +8065,7 @@ void kvm_arch_exit(void)
+       cancel_work_sync(&pvclock_gtod_work);
+ #endif
+       kvm_x86_ops.hardware_enable = NULL;
+-      kvm_mmu_module_exit();
++      kvm_mmu_vendor_module_exit();
+       free_percpu(user_return_msrs);
+       kmem_cache_destroy(x86_emulator_cache);
+       kmem_cache_destroy(x86_fpu_cache);
+@@ -11426,3 +11426,19 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_un
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
++
++static int __init kvm_x86_init(void)
++{
++      kvm_mmu_x86_module_init();
++      return 0;
++}
++module_init(kvm_x86_init);
++
++static void __exit kvm_x86_exit(void)
++{
++      /*
++       * If module_init() is implemented, module_exit() must also be
++       * implemented to allow module unload.
++       */
++}
++module_exit(kvm_x86_exit);
diff --git a/queue-5.10/memory-renesas-rpc-if-fix-platform-device-leak-in-error-path.patch b/queue-5.10/memory-renesas-rpc-if-fix-platform-device-leak-in-error-path.patch
new file mode 100644 (file)
index 0000000..526d7c0
--- /dev/null
@@ -0,0 +1,49 @@
+From b452dbf24d7d9a990d70118462925f6ee287d135 Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Thu, 3 Mar 2022 19:06:32 +0100
+Subject: memory: renesas-rpc-if: fix platform-device leak in error path
+
+From: Johan Hovold <johan@kernel.org>
+
+commit b452dbf24d7d9a990d70118462925f6ee287d135 upstream.
+
+Make sure to free the flash platform device in the event that
+registration fails during probe.
+
+Fixes: ca7d8b980b67 ("memory: add Renesas RPC-IF driver")
+Cc: stable@vger.kernel.org      # 5.8
+Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Link: https://lore.kernel.org/r/20220303180632.3194-1-johan@kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/memory/renesas-rpc-if.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/drivers/memory/renesas-rpc-if.c
++++ b/drivers/memory/renesas-rpc-if.c
+@@ -592,6 +592,7 @@ static int rpcif_probe(struct platform_d
+       struct platform_device *vdev;
+       struct device_node *flash;
+       const char *name;
++      int ret;
+       flash = of_get_next_child(pdev->dev.of_node, NULL);
+       if (!flash) {
+@@ -615,7 +616,14 @@ static int rpcif_probe(struct platform_d
+               return -ENOMEM;
+       vdev->dev.parent = &pdev->dev;
+       platform_set_drvdata(pdev, vdev);
+-      return platform_device_add(vdev);
++
++      ret = platform_device_add(vdev);
++      if (ret) {
++              platform_device_put(vdev);
++              return ret;
++      }
++
++      return 0;
+ }
+ static int rpcif_remove(struct platform_device *pdev)
diff --git a/queue-5.10/mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch b/queue-5.10/mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch
new file mode 100644 (file)
index 0000000..c2e6f89
--- /dev/null
@@ -0,0 +1,156 @@
+From e914d8f00391520ecc4495dd0ca0124538ab7119 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Thu, 14 Apr 2022 19:13:46 -0700
+Subject: mm: fix unexpected zeroed page mapping with zram swap
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit e914d8f00391520ecc4495dd0ca0124538ab7119 upstream.
+
+Two processes under CLONE_VM cloning, user process can be corrupted by
+seeing zeroed page unexpectedly.
+
+      CPU A                        CPU B
+
+  do_swap_page                do_swap_page
+  SWP_SYNCHRONOUS_IO path     SWP_SYNCHRONOUS_IO path
+  swap_readpage valid data
+    swap_slot_free_notify
+      delete zram entry
+                              swap_readpage zeroed(invalid) data
+                              pte_lock
+                              map the *zero data* to userspace
+                              pte_unlock
+  pte_lock
+  if (!pte_same)
+    goto out_nomap;
+  pte_unlock
+  return and next refault will
+  read zeroed data
+
+The swap_slot_free_notify is bogus for CLONE_VM case since it doesn't
+increase the refcount of swap slot at copy_mm so it couldn't catch up
+whether it's safe or not to discard data from backing device.  In the
+case, only the lock it could rely on to synchronize swap slot freeing is
+page table lock.  Thus, this patch gets rid of the swap_slot_free_notify
+function.  With this patch, CPU A will see correct data.
+
+      CPU A                        CPU B
+
+  do_swap_page                do_swap_page
+  SWP_SYNCHRONOUS_IO path     SWP_SYNCHRONOUS_IO path
+                              swap_readpage original data
+                              pte_lock
+                              map the original data
+                              swap_free
+                                swap_range_free
+                                  bd_disk->fops->swap_slot_free_notify
+  swap_readpage read zeroed data
+                              pte_unlock
+  pte_lock
+  if (!pte_same)
+    goto out_nomap;
+  pte_unlock
+  return
+  on next refault will see mapped data by CPU B
+
+The concern of the patch would increase memory consumption since it
+could keep wasted memory with compressed form in zram as well as
+uncompressed form in address space.  However, most of cases of zram uses
+no readahead and do_swap_page is followed by swap_free so it will free
+the compressed form from in zram quickly.
+
+Link: https://lkml.kernel.org/r/YjTVVxIAsnKAXjTd@google.com
+Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device")
+Reported-by: Ivan Babrou <ivan@cloudflare.com>
+Tested-by: Ivan Babrou <ivan@cloudflare.com>
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Cc: Nitin Gupta <ngupta@vflare.org>
+Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: <stable@vger.kernel.org>   [4.14+]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_io.c |   54 ------------------------------------------------------
+ 1 file changed, 54 deletions(-)
+
+--- a/mm/page_io.c
++++ b/mm/page_io.c
+@@ -69,54 +69,6 @@ void end_swap_bio_write(struct bio *bio)
+       bio_put(bio);
+ }
+-static void swap_slot_free_notify(struct page *page)
+-{
+-      struct swap_info_struct *sis;
+-      struct gendisk *disk;
+-      swp_entry_t entry;
+-
+-      /*
+-       * There is no guarantee that the page is in swap cache - the software
+-       * suspend code (at least) uses end_swap_bio_read() against a non-
+-       * swapcache page.  So we must check PG_swapcache before proceeding with
+-       * this optimization.
+-       */
+-      if (unlikely(!PageSwapCache(page)))
+-              return;
+-
+-      sis = page_swap_info(page);
+-      if (data_race(!(sis->flags & SWP_BLKDEV)))
+-              return;
+-
+-      /*
+-       * The swap subsystem performs lazy swap slot freeing,
+-       * expecting that the page will be swapped out again.
+-       * So we can avoid an unnecessary write if the page
+-       * isn't redirtied.
+-       * This is good for real swap storage because we can
+-       * reduce unnecessary I/O and enhance wear-leveling
+-       * if an SSD is used as the as swap device.
+-       * But if in-memory swap device (eg zram) is used,
+-       * this causes a duplicated copy between uncompressed
+-       * data in VM-owned memory and compressed data in
+-       * zram-owned memory.  So let's free zram-owned memory
+-       * and make the VM-owned decompressed page *dirty*,
+-       * so the page should be swapped out somewhere again if
+-       * we again wish to reclaim it.
+-       */
+-      disk = sis->bdev->bd_disk;
+-      entry.val = page_private(page);
+-      if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
+-              unsigned long offset;
+-
+-              offset = swp_offset(entry);
+-
+-              SetPageDirty(page);
+-              disk->fops->swap_slot_free_notify(sis->bdev,
+-                              offset);
+-      }
+-}
+-
+ static void end_swap_bio_read(struct bio *bio)
+ {
+       struct page *page = bio_first_page_all(bio);
+@@ -132,7 +84,6 @@ static void end_swap_bio_read(struct bio
+       }
+       SetPageUptodate(page);
+-      swap_slot_free_notify(page);
+ out:
+       unlock_page(page);
+       WRITE_ONCE(bio->bi_private, NULL);
+@@ -409,11 +360,6 @@ int swap_readpage(struct page *page, boo
+       if (sis->flags & SWP_SYNCHRONOUS_IO) {
+               ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
+               if (!ret) {
+-                      if (trylock_page(page)) {
+-                              swap_slot_free_notify(page);
+-                              unlock_page(page);
+-                      }
+-
+                       count_vm_event(PSWPIN);
+                       goto out;
+               }
diff --git a/queue-5.10/mm-kmemleak-take-a-full-lowmem-check-in-kmemleak_-_phys.patch b/queue-5.10/mm-kmemleak-take-a-full-lowmem-check-in-kmemleak_-_phys.patch
new file mode 100644 (file)
index 0000000..aedc5a6
--- /dev/null
@@ -0,0 +1,96 @@
+From 23c2d497de21f25898fbea70aeb292ab8acc8c94 Mon Sep 17 00:00:00 2001
+From: Patrick Wang <patrick.wang.shcn@gmail.com>
+Date: Thu, 14 Apr 2022 19:14:04 -0700
+Subject: mm: kmemleak: take a full lowmem check in kmemleak_*_phys()
+
+From: Patrick Wang <patrick.wang.shcn@gmail.com>
+
+commit 23c2d497de21f25898fbea70aeb292ab8acc8c94 upstream.
+
+The kmemleak_*_phys() apis do not check the address for lowmem's min
+boundary, while the caller may pass an address below lowmem, which will
+trigger an oops:
+
+  # echo scan > /sys/kernel/debug/kmemleak
+  Unable to handle kernel paging request at virtual address ff5fffffffe00000
+  Oops [#1]
+  Modules linked in:
+  CPU: 2 PID: 134 Comm: bash Not tainted 5.18.0-rc1-next-20220407 #33
+  Hardware name: riscv-virtio,qemu (DT)
+  epc : scan_block+0x74/0x15c
+   ra : scan_block+0x72/0x15c
+  epc : ffffffff801e5806 ra : ffffffff801e5804 sp : ff200000104abc30
+   gp : ffffffff815cd4e8 tp : ff60000004cfa340 t0 : 0000000000000200
+   t1 : 00aaaaaac23954cc t2 : 00000000000003ff s0 : ff200000104abc90
+   s1 : ffffffff81b0ff28 a0 : 0000000000000000 a1 : ff5fffffffe01000
+   a2 : ffffffff81b0ff28 a3 : 0000000000000002 a4 : 0000000000000001
+   a5 : 0000000000000000 a6 : ff200000104abd7c a7 : 0000000000000005
+   s2 : ff5fffffffe00ff9 s3 : ffffffff815cd998 s4 : ffffffff815d0e90
+   s5 : ffffffff81b0ff28 s6 : 0000000000000020 s7 : ffffffff815d0eb0
+   s8 : ffffffffffffffff s9 : ff5fffffffe00000 s10: ff5fffffffe01000
+   s11: 0000000000000022 t3 : 00ffffffaa17db4c t4 : 000000000000000f
+   t5 : 0000000000000001 t6 : 0000000000000000
+  status: 0000000000000100 badaddr: ff5fffffffe00000 cause: 000000000000000d
+    scan_gray_list+0x12e/0x1a6
+    kmemleak_scan+0x2aa/0x57e
+    kmemleak_write+0x32a/0x40c
+    full_proxy_write+0x56/0x82
+    vfs_write+0xa6/0x2a6
+    ksys_write+0x6c/0xe2
+    sys_write+0x22/0x2a
+    ret_from_syscall+0x0/0x2
+
+The callers may not quite know the actual address they pass(e.g. from
+devicetree).  So the kmemleak_*_phys() apis should guarantee the address
+they finally use is in lowmem range, so check the address for lowmem's
+min boundary.
+
+Link: https://lkml.kernel.org/r/20220413122925.33856-1-patrick.wang.shcn@gmail.com
+Signed-off-by: Patrick Wang <patrick.wang.shcn@gmail.com>
+Acked-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/kmemleak.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/mm/kmemleak.c
++++ b/mm/kmemleak.c
+@@ -1123,7 +1123,7 @@ EXPORT_SYMBOL(kmemleak_no_scan);
+ void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
+                              gfp_t gfp)
+ {
+-      if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
++      if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
+               kmemleak_alloc(__va(phys), size, min_count, gfp);
+ }
+ EXPORT_SYMBOL(kmemleak_alloc_phys);
+@@ -1137,7 +1137,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys);
+  */
+ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size)
+ {
+-      if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
++      if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
+               kmemleak_free_part(__va(phys), size);
+ }
+ EXPORT_SYMBOL(kmemleak_free_part_phys);
+@@ -1149,7 +1149,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys);
+  */
+ void __ref kmemleak_not_leak_phys(phys_addr_t phys)
+ {
+-      if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
++      if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
+               kmemleak_not_leak(__va(phys));
+ }
+ EXPORT_SYMBOL(kmemleak_not_leak_phys);
+@@ -1161,7 +1161,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys);
+  */
+ void __ref kmemleak_ignore_phys(phys_addr_t phys)
+ {
+-      if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
++      if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
+               kmemleak_ignore(__va(phys));
+ }
+ EXPORT_SYMBOL(kmemleak_ignore_phys);
diff --git a/queue-5.10/mm-page_alloc-fix-build_zonerefs_node.patch b/queue-5.10/mm-page_alloc-fix-build_zonerefs_node.patch
new file mode 100644 (file)
index 0000000..672c554
--- /dev/null
@@ -0,0 +1,69 @@
+From e553f62f10d93551eb883eca227ac54d1a4fad84 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Thu, 14 Apr 2022 19:13:43 -0700
+Subject: mm, page_alloc: fix build_zonerefs_node()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Juergen Gross <jgross@suse.com>
+
+commit e553f62f10d93551eb883eca227ac54d1a4fad84 upstream.
+
+Since commit 6aa303defb74 ("mm, vmscan: only allocate and reclaim from
+zones with pages managed by the buddy allocator") only zones with free
+memory are included in a built zonelist.  This is problematic when e.g.
+all memory of a zone has been ballooned out when zonelists are being
+rebuilt.
+
+The decision whether to rebuild the zonelists when onlining new memory
+is done based on populated_zone() returning 0 for the zone the memory
+will be added to.  The new zone is added to the zonelists only, if it
+has free memory pages (managed_zone() returns a non-zero value) after
+the memory has been onlined.  This implies, that onlining memory will
+always free the added pages to the allocator immediately, but this is
+not true in all cases: when e.g. running as a Xen guest the onlined new
+memory will be added only to the ballooned memory list, it will be freed
+only when the guest is being ballooned up afterwards.
+
+Another problem with using managed_zone() for the decision whether a
+zone is being added to the zonelists is, that a zone with all memory
+used will in fact be removed from all zonelists in case the zonelists
+happen to be rebuilt.
+
+Use populated_zone() when building a zonelist as it has been done before
+that commit.
+
+There was a report that QubesOS (based on Xen) is hitting this problem.
+Xen has switched to use the zone device functionality in kernel 5.9 and
+QubesOS wants to use memory hotplugging for guests in order to be able
+to start a guest with minimal memory and expand it as needed.  This was
+the report leading to the patch.
+
+Link: https://lkml.kernel.org/r/20220407120637.9035-1-jgross@suse.com
+Fixes: 6aa303defb74 ("mm, vmscan: only allocate and reclaim from zones with pages managed by the buddy allocator")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -5653,7 +5653,7 @@ static int build_zonerefs_node(pg_data_t
+       do {
+               zone_type--;
+               zone = pgdat->node_zones + zone_type;
+-              if (managed_zone(zone)) {
++              if (populated_zone(zone)) {
+                       zoneref_set_zone(zone, &zonerefs[nr_zones++]);
+                       check_highest_zone(zone_type);
+               }
index f262be49b0d53729346366a02668a850efab2c34..af89bca19cb745ff3c28d1c4f8924f1aa9ec67b6 100644 (file)
@@ -67,3 +67,10 @@ scsi-mvsas-add-pci-id-of-rocketraid-2640.patch
 scsi-megaraid_sas-target-with-invalid-lun-id-is-dele.patch
 drivers-net-slip-fix-npd-bug-in-sl_tx_timeout.patch
 perf-imx_ddr-fix-undefined-behavior-due-to-shift-ove.patch
+mm-page_alloc-fix-build_zonerefs_node.patch
+mm-fix-unexpected-zeroed-page-mapping-with-zram-swap.patch
+mm-kmemleak-take-a-full-lowmem-check-in-kmemleak_-_phys.patch
+kvm-x86-mmu-resolve-nx_huge_pages-when-kvm.ko-is-loaded.patch
+kvm-don-t-create-vm-debugfs-files-outside-of-the-vm-directory.patch
+memory-renesas-rpc-if-fix-platform-device-leak-in-error-path.patch
+gcc-plugins-latent_entropy-use-dev-urandom.patch