]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 31 Jul 2022 10:55:26 +0000 (12:55 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 31 Jul 2022 10:55:26 +0000 (12:55 +0200)
added patches:
mm-hmm-fault-non-owner-device-private-entries.patch
page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch

queue-5.15/mm-hmm-fault-non-owner-device-private-entries.patch [new file with mode: 0644]
queue-5.15/page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/mm-hmm-fault-non-owner-device-private-entries.patch b/queue-5.15/mm-hmm-fault-non-owner-device-private-entries.patch
new file mode 100644 (file)
index 0000000..2b126d0
--- /dev/null
@@ -0,0 +1,79 @@
+From 8a295dbbaf7292c582a40ce469c326f472d51f66 Mon Sep 17 00:00:00 2001
+From: Ralph Campbell <rcampbell@nvidia.com>
+Date: Mon, 25 Jul 2022 11:36:14 -0700
+Subject: mm/hmm: fault non-owner device private entries
+
+From: Ralph Campbell <rcampbell@nvidia.com>
+
+commit 8a295dbbaf7292c582a40ce469c326f472d51f66 upstream.
+
+If hmm_range_fault() is called with the HMM_PFN_REQ_FAULT flag and a
+device private PTE is found, the hmm_range::dev_private_owner page is used
+to determine if the device private page should not be faulted in.
+However, if the device private page is not owned by the caller,
+hmm_range_fault() returns an error instead of calling migrate_to_ram() to
+fault in the page.
+
+For example, if a page is migrated to GPU private memory and a RDMA fault
+capable NIC tries to read the migrated page, without this patch it will
+get an error.  With this patch, the page will be migrated back to system
+memory and the NIC will be able to read the data.
+
+Link: https://lkml.kernel.org/r/20220727000837.4128709-2-rcampbell@nvidia.com
+Link: https://lkml.kernel.org/r/20220725183615.4118795-2-rcampbell@nvidia.com
+Fixes: 08ddddda667b ("mm/hmm: check the device private page owner in hmm_range_fault()")
+Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
+Reported-by: Felix Kuehling <felix.kuehling@amd.com>
+Reviewed-by: Alistair Popple <apopple@nvidia.com>
+Cc: Philip Yang <Philip.Yang@amd.com>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hmm.c |   19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+--- a/mm/hmm.c
++++ b/mm/hmm.c
+@@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *w
+               unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
+ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+-static inline bool hmm_is_device_private_entry(struct hmm_range *range,
+-              swp_entry_t entry)
+-{
+-      return is_device_private_entry(entry) &&
+-              pfn_swap_entry_to_page(entry)->pgmap->owner ==
+-              range->dev_private_owner;
+-}
+-
+ static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
+                                                pte_t pte)
+ {
+@@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_
+               swp_entry_t entry = pte_to_swp_entry(pte);
+               /*
+-               * Never fault in device private pages, but just report
+-               * the PFN even if not present.
++               * Don't fault in device private pages owned by the caller,
++               * just report the PFN.
+                */
+-              if (hmm_is_device_private_entry(range, entry)) {
++              if (is_device_private_entry(entry) &&
++                  pfn_swap_entry_to_page(entry)->pgmap->owner ==
++                  range->dev_private_owner) {
+                       cpu_flags = HMM_PFN_VALID;
+                       if (is_writable_device_private_entry(entry))
+                               cpu_flags |= HMM_PFN_WRITE;
+@@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_
+               if (!non_swap_entry(entry))
+                       goto fault;
++              if (is_device_private_entry(entry))
++                      goto fault;
++
+               if (is_device_exclusive_entry(entry))
+                       goto fault;
diff --git a/queue-5.15/page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch b/queue-5.15/page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch
new file mode 100644 (file)
index 0000000..ae8f600
--- /dev/null
@@ -0,0 +1,70 @@
+From 9282012fc0aa248b77a69f5eb802b67c5a16bb13 Mon Sep 17 00:00:00 2001
+From: Jaewon Kim <jaewon31.kim@samsung.com>
+Date: Mon, 25 Jul 2022 18:52:12 +0900
+Subject: page_alloc: fix invalid watermark check on a negative value
+
+From: Jaewon Kim <jaewon31.kim@samsung.com>
+
+commit 9282012fc0aa248b77a69f5eb802b67c5a16bb13 upstream.
+
+There was a report that a task is waiting at the
+throttle_direct_reclaim. The pgscan_direct_throttle in vmstat was
+increasing.
+
+This is a bug where zone_watermark_fast returns true even when the free
+is very low. The commit f27ce0e14088 ("page_alloc: consider highatomic
+reserve in watermark fast") changed the watermark fast to consider
+highatomic reserve. But it did not handle a negative value case which
+can be happened when reserved_highatomic pageblock is bigger than the
+actual free.
+
+If watermark is considered as ok for the negative value, allocating
+contexts for order-0 will consume all free pages without direct reclaim,
+and finally free page may become depleted except highatomic free.
+
+Then allocating contexts may fall into throttle_direct_reclaim. This
+symptom may easily happen in a system where wmark min is low and other
+reclaimers like kswapd does not make free pages quickly.
+
+Handle the negative case by using MIN.
+
+Link: https://lkml.kernel.org/r/20220725095212.25388-1-jaewon31.kim@samsung.com
+Fixes: f27ce0e14088 ("page_alloc: consider highatomic reserve in watermark fast")
+Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
+Reported-by: GyeongHwan Hong <gh21.hong@samsung.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Yong-Taek Lee <ytk.lee@samsung.com>
+Cc: <stable@vger.kerenl.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3928,11 +3928,15 @@ static inline bool zone_watermark_fast(s
+        * need to be calculated.
+        */
+       if (!order) {
+-              long fast_free;
++              long usable_free;
++              long reserved;
+-              fast_free = free_pages;
+-              fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
+-              if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
++              usable_free = free_pages;
++              reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
++
++              /* reserved may over estimate high-atomic reserves. */
++              usable_free -= min(usable_free, reserved);
++              if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
+                       return true;
+       }
index 442a7248b6b282214cbc180ef16b81f1f1191425..e9a9bbe59cc99fe6fc9984d7a1b262abccb80694 100644 (file)
@@ -60,3 +60,5 @@ perf-symbol-correct-address-for-bss-symbols.patch
 sfc-disable-softirqs-for-ptp-tx.patch
 sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch
 arm-crypto-comment-out-gcc-warning-that-breaks-clang-builds.patch
+mm-hmm-fault-non-owner-device-private-entries.patch
+page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch