--- /dev/null
+From 803de9000f334b771afacb6ff3e78622916668b0 Mon Sep 17 00:00:00 2001
+From: Vlastimil Babka <vbabka@suse.cz>
+Date: Wed, 21 Feb 2024 12:43:58 +0100
+Subject: mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+commit 803de9000f334b771afacb6ff3e78622916668b0 upstream.
+
+Sven reports an infinite loop in __alloc_pages_slowpath() for costly order
+__GFP_RETRY_MAYFAIL allocations that are also GFP_NOIO. Such combination
+can happen in a suspend/resume context where a GFP_KERNEL allocation can
+have __GFP_IO masked out via gfp_allowed_mask.
+
+Quoting Sven:
+
+1. try to do a "costly" allocation (order > PAGE_ALLOC_COSTLY_ORDER)
+ with __GFP_RETRY_MAYFAIL set.
+
+2. page alloc's __alloc_pages_slowpath tries to get a page from the
+ freelist. This fails because there is nothing free of that costly
+ order.
+
+3. page alloc tries to reclaim by calling __alloc_pages_direct_reclaim,
+ which bails out because a zone is ready to be compacted; it pretends
+ to have made a single page of progress.
+
+4. page alloc tries to compact, but this always bails out early because
+ __GFP_IO is not set (it's not passed by the snd allocator, and even
+ if it were, we are suspending so the __GFP_IO flag would be cleared
+ anyway).
+
+5. page alloc believes reclaim progress was made (because of the
+ pretense in item 3) and so it checks whether it should retry
+ compaction. The compaction retry logic thinks it should try again,
+ because:
+ a) reclaim is needed because of the early bail-out in item 4
+ b) a zonelist is suitable for compaction
+
+6. goto 2. indefinite stall.
+
+(end quote)
+
+The immediate root cause is confusing the COMPACT_SKIPPED returned from
+__alloc_pages_direct_compact() (step 4) due to lack of __GFP_IO to be
+indicating a lack of order-0 pages, and in step 5 evaluating that in
+should_compact_retry() as a reason to retry, before incrementing and
+limiting the number of retries. There are however other places that
+wrongly assume that compaction can happen while we lack __GFP_IO.
+
+To fix this, introduce gfp_compaction_allowed() to abstract the __GFP_IO
+evaluation and switch the open-coded test in try_to_compact_pages() to use
+it.
+
+Also use the new helper in:
+- compaction_ready(), which will make reclaim not bail out in step 3, so
+ there's at least one attempt to actually reclaim, even if chances are
+ small for a costly order
+- in_reclaim_compaction() which will make should_continue_reclaim()
+ return false and we don't over-reclaim unnecessarily
+- in __alloc_pages_slowpath() to set a local variable can_compact,
+ which is then used to avoid retrying reclaim/compaction for costly
+ allocations (step 5) if we can't compact and also to skip the early
+ compaction attempt that we do in some cases
+
+Link: https://lkml.kernel.org/r/20240221114357.13655-2-vbabka@suse.cz
+Fixes: 3250845d0526 ("Revert "mm, oom: prevent premature OOM killer invocation for high order request"")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Sven van Ashbrook <svenva@chromium.org>
+Closes: https://lore.kernel.org/all/CAG-rBihs_xMKb3wrMO1%2B-%2Bp4fowP9oy1pa_OTkfxBzPUVOZF%2Bg@mail.gmail.com/
+Tested-by: Karthikeyan Ramasubramanian <kramasub@chromium.org>
+Cc: Brian Geffon <bgeffon@google.com>
+Cc: Curtis Malainey <cujomalainey@chromium.org>
+Cc: Jaroslav Kysela <perex@perex.cz>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Takashi Iwai <tiwai@suse.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/gfp.h | 9 +++++++++
+ mm/compaction.c | 7 +------
+ mm/page_alloc.c | 10 ++++++----
+ mm/vmscan.c | 5 ++++-
+ 4 files changed, 20 insertions(+), 11 deletions(-)
+
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -608,6 +608,15 @@ static inline bool pm_suspended_storage(
+ }
+ #endif /* CONFIG_PM_SLEEP */
+
++/*
++ * Check if the gfp flags allow compaction - GFP_NOIO is a really
++ * tricky context because the migration might require IO.
++ */
++static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
++{
++ return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO);
++}
++
+ #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
+ /* The below functions must be run on a range from a single zone. */
+ extern int alloc_contig_range(unsigned long start, unsigned long end,
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1751,16 +1751,11 @@ enum compact_result try_to_compact_pages
+ unsigned int alloc_flags, const struct alloc_context *ac,
+ enum compact_priority prio)
+ {
+- int may_perform_io = gfp_mask & __GFP_IO;
+ struct zoneref *z;
+ struct zone *zone;
+ enum compact_result rc = COMPACT_SKIPPED;
+
+- /*
+- * Check if the GFP flags allow compaction - GFP_NOIO is really
+- * tricky context because the migration might require IO
+- */
+- if (!may_perform_io)
++ if (!gfp_compaction_allowed(gfp_mask))
+ return COMPACT_SKIPPED;
+
+ trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -4099,6 +4099,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
+ struct alloc_context *ac)
+ {
+ bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
++ bool can_compact = gfp_compaction_allowed(gfp_mask);
+ const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
+ struct page *page = NULL;
+ unsigned int alloc_flags;
+@@ -4164,7 +4165,7 @@ restart:
+ * Don't try this for allocations that are allowed to ignore
+ * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
+ */
+- if (can_direct_reclaim &&
++ if (can_direct_reclaim && can_compact &&
+ (costly_order ||
+ (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
+ && !gfp_pfmemalloc_allowed(gfp_mask)) {
+@@ -4251,9 +4252,10 @@ retry:
+
+ /*
+ * Do not retry costly high order allocations unless they are
+- * __GFP_RETRY_MAYFAIL
++ * __GFP_RETRY_MAYFAIL and we can compact
+ */
+- if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
++ if (costly_order && (!can_compact ||
++ !(gfp_mask & __GFP_RETRY_MAYFAIL)))
+ goto nopage;
+
+ if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
+@@ -4266,7 +4268,7 @@ retry:
+ * implementation of the compaction depends on the sufficient amount
+ * of free memory (see __compaction_suitable)
+ */
+- if (did_some_progress > 0 &&
++ if (did_some_progress > 0 && can_compact &&
+ should_compact_retry(ac, order, alloc_flags,
+ compact_result, &compact_priority,
+ &compaction_retries))
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2595,7 +2595,7 @@ static void shrink_node_memcg(struct pgl
+ /* Use reclaim/compaction for costly allocs or under memory pressure */
+ static bool in_reclaim_compaction(struct scan_control *sc)
+ {
+- if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
++ if (gfp_compaction_allowed(sc->gfp_mask) && sc->order &&
+ (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
+ sc->priority < DEF_PRIORITY - 2))
+ return true;
+@@ -2869,6 +2869,9 @@ static inline bool compaction_ready(stru
+ unsigned long watermark;
+ enum compact_result suitable;
+
++ if (!gfp_compaction_allowed(sc->gfp_mask))
++ return false;
++
+ suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
+ if (suitable == COMPACT_SUCCESS)
+ /* Allocation should succeed already. Don't reclaim. */