]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.14.33/percpu-add-__gfp_noretry-semantics-to-the-percpu-balancing-path.patch
4.14-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.14.33 / percpu-add-__gfp_noretry-semantics-to-the-percpu-balancing-path.patch
CommitLineData
c4e04f43
GKH
1From 47504ee04b9241548ae2c28be7d0b01cff3b7aa6 Mon Sep 17 00:00:00 2001
2From: Dennis Zhou <dennisszhou@gmail.com>
3Date: Fri, 16 Feb 2018 12:07:19 -0600
4Subject: percpu: add __GFP_NORETRY semantics to the percpu balancing path
5
6From: Dennis Zhou <dennisszhou@gmail.com>
7
8commit 47504ee04b9241548ae2c28be7d0b01cff3b7aa6 upstream.
9
10Percpu memory using the vmalloc area based chunk allocator lazily
11populates chunks by first requesting the full virtual address space
12required for the chunk and subsequently adding pages as allocations come
13through. To ensure atomic allocations can succeed, a workqueue item is
14used to maintain a minimum number of empty pages. In certain scenarios,
15such as reported in [1], it is possible that physical memory becomes
16quite scarce which can result in either a rather long time spent trying
17to find free pages or worse, a kernel panic.
18
19This patch adds support for __GFP_NORETRY and __GFP_NOWARN passing them
20through to the underlying allocators. This should prevent any
21unnecessary panics potentially caused by the workqueue item. The passing
22of gfp around is as additional flags rather than a full set of flags.
23The next patch will change these to caller passed semantics.
24
25V2:
26Added const modifier to gfp flags in the balance path.
27Removed an extra whitespace.
28
29[1] https://lkml.org/lkml/2018/2/12/551
30
31Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
32Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
33Reported-by: syzbot+adb03f3f0bb57ce3acda@syzkaller.appspotmail.com
34Acked-by: Christoph Lameter <cl@linux.com>
35Signed-off-by: Tejun Heo <tj@kernel.org>
36Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
37
38---
39 mm/percpu-km.c | 8 ++++----
40 mm/percpu-vm.c | 18 +++++++++++-------
41 mm/percpu.c | 45 ++++++++++++++++++++++++++++-----------------
42 3 files changed, 43 insertions(+), 28 deletions(-)
43
44--- a/mm/percpu-km.c
45+++ b/mm/percpu-km.c
46@@ -34,7 +34,7 @@
47 #include <linux/log2.h>
48
49 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
50- int page_start, int page_end)
51+ int page_start, int page_end, gfp_t gfp)
52 {
53 return 0;
54 }
55@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct
56 /* nada */
57 }
58
59-static struct pcpu_chunk *pcpu_create_chunk(void)
60+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
61 {
62 const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
63 struct pcpu_chunk *chunk;
64 struct page *pages;
65 int i;
66
67- chunk = pcpu_alloc_chunk();
68+ chunk = pcpu_alloc_chunk(gfp);
69 if (!chunk)
70 return NULL;
71
72- pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
73+ pages = alloc_pages(gfp | GFP_KERNEL, order_base_2(nr_pages));
74 if (!pages) {
75 pcpu_free_chunk(chunk);
76 return NULL;
77--- a/mm/percpu-vm.c
78+++ b/mm/percpu-vm.c
79@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void
80 lockdep_assert_held(&pcpu_alloc_mutex);
81
82 if (!pages)
83- pages = pcpu_mem_zalloc(pages_size);
84+ pages = pcpu_mem_zalloc(pages_size, 0);
85 return pages;
86 }
87
88@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_
89 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
90 * @page_start: page index of the first page to be allocated
91 * @page_end: page index of the last page to be allocated + 1
92+ * @gfp: allocation flags passed to the underlying allocator
93 *
94 * Allocate pages [@page_start,@page_end) into @pages for all units.
95 * The allocation is for @chunk. Percpu core doesn't care about the
96 * content of @pages and will pass it verbatim to pcpu_map_pages().
97 */
98 static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
99- struct page **pages, int page_start, int page_end)
100+ struct page **pages, int page_start, int page_end,
101+ gfp_t gfp)
102 {
103- const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
104 unsigned int cpu, tcpu;
105 int i;
106
107+ gfp |= GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
108+
109 for_each_possible_cpu(cpu) {
110 for (i = page_start; i < page_end; i++) {
111 struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
112@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct p
113 * @chunk: chunk of interest
114 * @page_start: the start page
115 * @page_end: the end page
116+ * @gfp: allocation flags passed to the underlying memory allocator
117 *
118 * For each cpu, populate and map pages [@page_start,@page_end) into
119 * @chunk.
120@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct p
121 * pcpu_alloc_mutex, does GFP_KERNEL allocation.
122 */
123 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
124- int page_start, int page_end)
125+ int page_start, int page_end, gfp_t gfp)
126 {
127 struct page **pages;
128
129@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pc
130 if (!pages)
131 return -ENOMEM;
132
133- if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
134+ if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
135 return -ENOMEM;
136
137 if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
138@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct
139 pcpu_free_pages(chunk, pages, page_start, page_end);
140 }
141
142-static struct pcpu_chunk *pcpu_create_chunk(void)
143+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
144 {
145 struct pcpu_chunk *chunk;
146 struct vm_struct **vms;
147
148- chunk = pcpu_alloc_chunk();
149+ chunk = pcpu_alloc_chunk(gfp);
150 if (!chunk)
151 return NULL;
152
153--- a/mm/percpu.c
154+++ b/mm/percpu.c
155@@ -447,10 +447,12 @@ static void pcpu_next_fit_region(struct
156 /**
157 * pcpu_mem_zalloc - allocate memory
158 * @size: bytes to allocate
159+ * @gfp: allocation flags
160 *
161 * Allocate @size bytes. If @size is smaller than PAGE_SIZE,
162- * kzalloc() is used; otherwise, vzalloc() is used. The returned
163- * memory is always zeroed.
164+ * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
165+ * This is to facilitate passing through whitelisted flags. The
166+ * returned memory is always zeroed.
167 *
168 * CONTEXT:
169 * Does GFP_KERNEL allocation.
170@@ -458,15 +460,16 @@ static void pcpu_next_fit_region(struct
171 * RETURNS:
172 * Pointer to the allocated area on success, NULL on failure.
173 */
174-static void *pcpu_mem_zalloc(size_t size)
175+static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
176 {
177 if (WARN_ON_ONCE(!slab_is_available()))
178 return NULL;
179
180 if (size <= PAGE_SIZE)
181- return kzalloc(size, GFP_KERNEL);
182+ return kzalloc(size, gfp | GFP_KERNEL);
183 else
184- return vzalloc(size);
185+ return __vmalloc(size, gfp | GFP_KERNEL | __GFP_ZERO,
186+ PAGE_KERNEL);
187 }
188
189 /**
190@@ -1154,12 +1157,12 @@ static struct pcpu_chunk * __init pcpu_a
191 return chunk;
192 }
193
194-static struct pcpu_chunk *pcpu_alloc_chunk(void)
195+static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
196 {
197 struct pcpu_chunk *chunk;
198 int region_bits;
199
200- chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
201+ chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
202 if (!chunk)
203 return NULL;
204
205@@ -1168,17 +1171,17 @@ static struct pcpu_chunk *pcpu_alloc_chu
206 region_bits = pcpu_chunk_map_bits(chunk);
207
208 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
209- sizeof(chunk->alloc_map[0]));
210+ sizeof(chunk->alloc_map[0]), gfp);
211 if (!chunk->alloc_map)
212 goto alloc_map_fail;
213
214 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
215- sizeof(chunk->bound_map[0]));
216+ sizeof(chunk->bound_map[0]), gfp);
217 if (!chunk->bound_map)
218 goto bound_map_fail;
219
220 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
221- sizeof(chunk->md_blocks[0]));
222+ sizeof(chunk->md_blocks[0]), gfp);
223 if (!chunk->md_blocks)
224 goto md_blocks_fail;
225
226@@ -1277,9 +1280,10 @@ static void pcpu_chunk_depopulated(struc
227 * pcpu_addr_to_page - translate address to physical address
228 * pcpu_verify_alloc_info - check alloc_info is acceptable during init
229 */
230-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
231+static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size,
232+ gfp_t gfp);
233 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
234-static struct pcpu_chunk *pcpu_create_chunk(void);
235+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
236 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
237 static struct page *pcpu_addr_to_page(void *addr);
238 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
239@@ -1421,7 +1425,7 @@ restart:
240 }
241
242 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
243- chunk = pcpu_create_chunk();
244+ chunk = pcpu_create_chunk(0);
245 if (!chunk) {
246 err = "failed to allocate new chunk";
247 goto fail;
248@@ -1450,7 +1454,7 @@ area_found:
249 page_start, page_end) {
250 WARN_ON(chunk->immutable);
251
252- ret = pcpu_populate_chunk(chunk, rs, re);
253+ ret = pcpu_populate_chunk(chunk, rs, re, 0);
254
255 spin_lock_irqsave(&pcpu_lock, flags);
256 if (ret) {
257@@ -1561,10 +1565,17 @@ void __percpu *__alloc_reserved_percpu(s
258 * pcpu_balance_workfn - manage the amount of free chunks and populated pages
259 * @work: unused
260 *
261- * Reclaim all fully free chunks except for the first one.
262+ * Reclaim all fully free chunks except for the first one. This is also
263+ * responsible for maintaining the pool of empty populated pages. However,
264+ * it is possible that this is called when physical memory is scarce causing
265+ * OOM killer to be triggered. We should avoid doing so until an actual
266+ * allocation causes the failure as it is possible that requests can be
267+ * serviced from already backed regions.
268 */
269 static void pcpu_balance_workfn(struct work_struct *work)
270 {
271+ /* gfp flags passed to underlying allocators */
272+ const gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN;
273 LIST_HEAD(to_free);
274 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
275 struct pcpu_chunk *chunk, *next;
276@@ -1645,7 +1656,7 @@ retry_pop:
277 chunk->nr_pages) {
278 int nr = min(re - rs, nr_to_pop);
279
280- ret = pcpu_populate_chunk(chunk, rs, rs + nr);
281+ ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
282 if (!ret) {
283 nr_to_pop -= nr;
284 spin_lock_irq(&pcpu_lock);
285@@ -1662,7 +1673,7 @@ retry_pop:
286
287 if (nr_to_pop) {
288 /* ran out of chunks to populate, create a new one and retry */
289- chunk = pcpu_create_chunk();
290+ chunk = pcpu_create_chunk(gfp);
291 if (chunk) {
292 spin_lock_irq(&pcpu_lock);
293 pcpu_chunk_relocate(chunk, -1);