From 4e8be599d747acfeae819ff009a838a0a6688c46 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 31 Jul 2022 12:55:26 +0200 Subject: [PATCH] 5.15-stable patches added patches: mm-hmm-fault-non-owner-device-private-entries.patch page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch --- ...ult-non-owner-device-private-entries.patch | 79 +++++++++++++++++++ ...-watermark-check-on-a-negative-value.patch | 70 ++++++++++++++++ queue-5.15/series | 2 + 3 files changed, 151 insertions(+) create mode 100644 queue-5.15/mm-hmm-fault-non-owner-device-private-entries.patch create mode 100644 queue-5.15/page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch diff --git a/queue-5.15/mm-hmm-fault-non-owner-device-private-entries.patch b/queue-5.15/mm-hmm-fault-non-owner-device-private-entries.patch new file mode 100644 index 00000000000..2b126d0cb4b --- /dev/null +++ b/queue-5.15/mm-hmm-fault-non-owner-device-private-entries.patch @@ -0,0 +1,79 @@ +From 8a295dbbaf7292c582a40ce469c326f472d51f66 Mon Sep 17 00:00:00 2001 +From: Ralph Campbell +Date: Mon, 25 Jul 2022 11:36:14 -0700 +Subject: mm/hmm: fault non-owner device private entries + +From: Ralph Campbell + +commit 8a295dbbaf7292c582a40ce469c326f472d51f66 upstream. + +If hmm_range_fault() is called with the HMM_PFN_REQ_FAULT flag and a +device private PTE is found, the hmm_range::dev_private_owner page is used +to determine if the device private page should not be faulted in. +However, if the device private page is not owned by the caller, +hmm_range_fault() returns an error instead of calling migrate_to_ram() to +fault in the page. + +For example, if a page is migrated to GPU private memory and a RDMA fault +capable NIC tries to read the migrated page, without this patch it will +get an error. With this patch, the page will be migrated back to system +memory and the NIC will be able to read the data. + +Link: https://lkml.kernel.org/r/20220727000837.4128709-2-rcampbell@nvidia.com +Link: https://lkml.kernel.org/r/20220725183615.4118795-2-rcampbell@nvidia.com +Fixes: 08ddddda667b ("mm/hmm: check the device private page owner in hmm_range_fault()") +Signed-off-by: Ralph Campbell +Reported-by: Felix Kuehling +Reviewed-by: Alistair Popple +Cc: Philip Yang +Cc: Jason Gunthorpe +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hmm.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +--- a/mm/hmm.c ++++ b/mm/hmm.c +@@ -212,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *w + unsigned long end, unsigned long hmm_pfns[], pmd_t pmd); + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +-static inline bool hmm_is_device_private_entry(struct hmm_range *range, +- swp_entry_t entry) +-{ +- return is_device_private_entry(entry) && +- pfn_swap_entry_to_page(entry)->pgmap->owner == +- range->dev_private_owner; +-} +- + static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range, + pte_t pte) + { +@@ -252,10 +244,12 @@ static int hmm_vma_handle_pte(struct mm_ + swp_entry_t entry = pte_to_swp_entry(pte); + + /* +- * Never fault in device private pages, but just report +- * the PFN even if not present. ++ * Don't fault in device private pages owned by the caller, ++ * just report the PFN. + */ +- if (hmm_is_device_private_entry(range, entry)) { ++ if (is_device_private_entry(entry) && ++ pfn_swap_entry_to_page(entry)->pgmap->owner == ++ range->dev_private_owner) { + cpu_flags = HMM_PFN_VALID; + if (is_writable_device_private_entry(entry)) + cpu_flags |= HMM_PFN_WRITE; +@@ -273,6 +267,9 @@ static int hmm_vma_handle_pte(struct mm_ + if (!non_swap_entry(entry)) + goto fault; + ++ if (is_device_private_entry(entry)) ++ goto fault; ++ + if (is_device_exclusive_entry(entry)) + goto fault; + diff --git a/queue-5.15/page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch b/queue-5.15/page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch new file mode 100644 index 00000000000..ae8f600854c --- /dev/null +++ b/queue-5.15/page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch @@ -0,0 +1,70 @@ +From 9282012fc0aa248b77a69f5eb802b67c5a16bb13 Mon Sep 17 00:00:00 2001 +From: Jaewon Kim +Date: Mon, 25 Jul 2022 18:52:12 +0900 +Subject: page_alloc: fix invalid watermark check on a negative value + +From: Jaewon Kim + +commit 9282012fc0aa248b77a69f5eb802b67c5a16bb13 upstream. + +There was a report that a task is waiting at the +throttle_direct_reclaim. The pgscan_direct_throttle in vmstat was +increasing. + +This is a bug where zone_watermark_fast returns true even when the free +is very low. The commit f27ce0e14088 ("page_alloc: consider highatomic +reserve in watermark fast") changed the watermark fast to consider +highatomic reserve. But it did not handle a negative value case which +can be happened when reserved_highatomic pageblock is bigger than the +actual free. + +If watermark is considered as ok for the negative value, allocating +contexts for order-0 will consume all free pages without direct reclaim, +and finally free page may become depleted except highatomic free. + +Then allocating contexts may fall into throttle_direct_reclaim. This +symptom may easily happen in a system where wmark min is low and other +reclaimers like kswapd does not make free pages quickly. + +Handle the negative case by using MIN. + +Link: https://lkml.kernel.org/r/20220725095212.25388-1-jaewon31.kim@samsung.com +Fixes: f27ce0e14088 ("page_alloc: consider highatomic reserve in watermark fast") +Signed-off-by: Jaewon Kim +Reported-by: GyeongHwan Hong +Acked-by: Mel Gorman +Cc: Minchan Kim +Cc: Baoquan He +Cc: Vlastimil Babka +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Yong-Taek Lee +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3928,11 +3928,15 @@ static inline bool zone_watermark_fast(s + * need to be calculated. + */ + if (!order) { +- long fast_free; ++ long usable_free; ++ long reserved; + +- fast_free = free_pages; +- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags); +- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx]) ++ usable_free = free_pages; ++ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags); ++ ++ /* reserved may over estimate high-atomic reserves. */ ++ usable_free -= min(usable_free, reserved); ++ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) + return true; + } + diff --git a/queue-5.15/series b/queue-5.15/series index 442a7248b6b..e9a9bbe59cc 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -60,3 +60,5 @@ perf-symbol-correct-address-for-bss-symbols.patch sfc-disable-softirqs-for-ptp-tx.patch sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch arm-crypto-comment-out-gcc-warning-that-breaks-clang-builds.patch +mm-hmm-fault-non-owner-device-private-entries.patch +page_alloc-fix-invalid-watermark-check-on-a-negative-value.patch -- 2.47.3