]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-5.15/mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations.patch
5.4-stable patches
[thirdparty/kernel/stable-queue.git] / queue-5.15 / mm-vmscan-prevent-infinite-loop-for-costly-gfp_noio-__gfp_retry_mayfail-allocations.patch
1 From 803de9000f334b771afacb6ff3e78622916668b0 Mon Sep 17 00:00:00 2001
2 From: Vlastimil Babka <vbabka@suse.cz>
3 Date: Wed, 21 Feb 2024 12:43:58 +0100
4 Subject: mm, vmscan: prevent infinite loop for costly GFP_NOIO | __GFP_RETRY_MAYFAIL allocations
5
6 From: Vlastimil Babka <vbabka@suse.cz>
7
8 commit 803de9000f334b771afacb6ff3e78622916668b0 upstream.
9
10 Sven reports an infinite loop in __alloc_pages_slowpath() for costly order
11 __GFP_RETRY_MAYFAIL allocations that are also GFP_NOIO. Such combination
12 can happen in a suspend/resume context where a GFP_KERNEL allocation can
13 have __GFP_IO masked out via gfp_allowed_mask.
14
15 Quoting Sven:
16
17 1. try to do a "costly" allocation (order > PAGE_ALLOC_COSTLY_ORDER)
18 with __GFP_RETRY_MAYFAIL set.
19
20 2. page alloc's __alloc_pages_slowpath tries to get a page from the
21 freelist. This fails because there is nothing free of that costly
22 order.
23
24 3. page alloc tries to reclaim by calling __alloc_pages_direct_reclaim,
25 which bails out because a zone is ready to be compacted; it pretends
26 to have made a single page of progress.
27
28 4. page alloc tries to compact, but this always bails out early because
29 __GFP_IO is not set (it's not passed by the snd allocator, and even
30 if it were, we are suspending so the __GFP_IO flag would be cleared
31 anyway).
32
33 5. page alloc believes reclaim progress was made (because of the
34 pretense in item 3) and so it checks whether it should retry
35 compaction. The compaction retry logic thinks it should try again,
36 because:
37 a) reclaim is needed because of the early bail-out in item 4
38 b) a zonelist is suitable for compaction
39
40 6. goto 2. indefinite stall.
41
42 (end quote)
43
44 The immediate root cause is confusing the COMPACT_SKIPPED returned from
45 __alloc_pages_direct_compact() (step 4) due to lack of __GFP_IO to be
46 indicating a lack of order-0 pages, and in step 5 evaluating that in
47 should_compact_retry() as a reason to retry, before incrementing and
48 limiting the number of retries. There are however other places that
49 wrongly assume that compaction can happen while we lack __GFP_IO.
50
51 To fix this, introduce gfp_compaction_allowed() to abstract the __GFP_IO
52 evaluation and switch the open-coded test in try_to_compact_pages() to use
53 it.
54
55 Also use the new helper in:
56 - compaction_ready(), which will make reclaim not bail out in step 3, so
57 there's at least one attempt to actually reclaim, even if chances are
58 small for a costly order
59 - in_reclaim_compaction() which will make should_continue_reclaim()
60 return false and we don't over-reclaim unnecessarily
61 - in __alloc_pages_slowpath() to set a local variable can_compact,
62 which is then used to avoid retrying reclaim/compaction for costly
63 allocations (step 5) if we can't compact and also to skip the early
64 compaction attempt that we do in some cases
65
66 Link: https://lkml.kernel.org/r/20240221114357.13655-2-vbabka@suse.cz
67 Fixes: 3250845d0526 ("Revert "mm, oom: prevent premature OOM killer invocation for high order request"")
68 Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
69 Reported-by: Sven van Ashbrook <svenva@chromium.org>
70 Closes: https://lore.kernel.org/all/CAG-rBihs_xMKb3wrMO1%2B-%2Bp4fowP9oy1pa_OTkfxBzPUVOZF%2Bg@mail.gmail.com/
71 Tested-by: Karthikeyan Ramasubramanian <kramasub@chromium.org>
72 Cc: Brian Geffon <bgeffon@google.com>
73 Cc: Curtis Malainey <cujomalainey@chromium.org>
74 Cc: Jaroslav Kysela <perex@perex.cz>
75 Cc: Mel Gorman <mgorman@techsingularity.net>
76 Cc: Michal Hocko <mhocko@kernel.org>
77 Cc: Takashi Iwai <tiwai@suse.com>
78 Cc: <stable@vger.kernel.org>
79 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
80 Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
81 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
82 ---
83 include/linux/gfp.h | 9 +++++++++
84 mm/compaction.c | 7 +------
85 mm/page_alloc.c | 10 ++++++----
86 mm/vmscan.c | 5 ++++-
87 4 files changed, 20 insertions(+), 11 deletions(-)
88
89 --- a/include/linux/gfp.h
90 +++ b/include/linux/gfp.h
91 @@ -660,6 +660,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_ma
92 extern void pm_restrict_gfp_mask(void);
93 extern void pm_restore_gfp_mask(void);
94
95 +/*
96 + * Check if the gfp flags allow compaction - GFP_NOIO is a really
97 + * tricky context because the migration might require IO.
98 + */
99 +static inline bool gfp_compaction_allowed(gfp_t gfp_mask)
100 +{
101 + return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO);
102 +}
103 +
104 extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
105
106 #ifdef CONFIG_PM_SLEEP
107 --- a/mm/compaction.c
108 +++ b/mm/compaction.c
109 @@ -2582,16 +2582,11 @@ enum compact_result try_to_compact_pages
110 unsigned int alloc_flags, const struct alloc_context *ac,
111 enum compact_priority prio, struct page **capture)
112 {
113 - int may_perform_io = gfp_mask & __GFP_IO;
114 struct zoneref *z;
115 struct zone *zone;
116 enum compact_result rc = COMPACT_SKIPPED;
117
118 - /*
119 - * Check if the GFP flags allow compaction - GFP_NOIO is really
120 - * tricky context because the migration might require IO
121 - */
122 - if (!may_perform_io)
123 + if (!gfp_compaction_allowed(gfp_mask))
124 return COMPACT_SKIPPED;
125
126 trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
127 --- a/mm/page_alloc.c
128 +++ b/mm/page_alloc.c
129 @@ -4903,6 +4903,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
130 struct alloc_context *ac)
131 {
132 bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
133 + bool can_compact = gfp_compaction_allowed(gfp_mask);
134 const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
135 struct page *page = NULL;
136 unsigned int alloc_flags;
137 @@ -4968,7 +4969,7 @@ restart:
138 * Don't try this for allocations that are allowed to ignore
139 * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
140 */
141 - if (can_direct_reclaim &&
142 + if (can_direct_reclaim && can_compact &&
143 (costly_order ||
144 (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
145 && !gfp_pfmemalloc_allowed(gfp_mask)) {
146 @@ -5065,9 +5066,10 @@ retry:
147
148 /*
149 * Do not retry costly high order allocations unless they are
150 - * __GFP_RETRY_MAYFAIL
151 + * __GFP_RETRY_MAYFAIL and we can compact
152 */
153 - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
154 + if (costly_order && (!can_compact ||
155 + !(gfp_mask & __GFP_RETRY_MAYFAIL)))
156 goto nopage;
157
158 if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
159 @@ -5080,7 +5082,7 @@ retry:
160 * implementation of the compaction depends on the sufficient amount
161 * of free memory (see __compaction_suitable)
162 */
163 - if (did_some_progress > 0 &&
164 + if (did_some_progress > 0 && can_compact &&
165 should_compact_retry(ac, order, alloc_flags,
166 compact_result, &compact_priority,
167 &compaction_retries))
168 --- a/mm/vmscan.c
169 +++ b/mm/vmscan.c
170 @@ -2834,7 +2834,7 @@ static void shrink_lruvec(struct lruvec
171 /* Use reclaim/compaction for costly allocs or under memory pressure */
172 static bool in_reclaim_compaction(struct scan_control *sc)
173 {
174 - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
175 + if (gfp_compaction_allowed(sc->gfp_mask) && sc->order &&
176 (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
177 sc->priority < DEF_PRIORITY - 2))
178 return true;
179 @@ -3167,6 +3167,9 @@ static inline bool compaction_ready(stru
180 unsigned long watermark;
181 enum compact_result suitable;
182
183 + if (!gfp_compaction_allowed(sc->gfp_mask))
184 + return false;
185 +
186 suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
187 if (suitable == COMPACT_SUCCESS)
188 /* Allocation should succeed already. Don't reclaim. */