]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-4.14/bpf-reduce-verifier-memory-consumption.patch
Linux 4.9.170
[thirdparty/kernel/stable-queue.git] / queue-4.14 / bpf-reduce-verifier-memory-consumption.patch
1 From foo@baz Wed Apr 17 20:59:12 CEST 2019
2 From: Balbir Singh <sblbir@amzn.com>
3 Date: Wed, 3 Apr 2019 18:39:01 +0000
4 Subject: bpf: reduce verifier memory consumption
5 To: <gregkh@linuxfoundation.org>
6 Cc: <stable@kernel.org>, <daniel@iogearbox.net>, <jannh@google.com>, <sblbir@amazon.com>, Alexei Starovoitov <ast@fb.com>, <jakub.kicinski@netronome.com>, Alexei Starovoitov <ast@kernel.org>, "David S . Miller" <davem@davemloft.net>, Balbir Singh <sblbir@amzn.com>
7 Message-ID: <20190403183917.13749-2-sblbir@amzn.com>
8
9 From: Alexei Starovoitov <ast@fb.com>
10
11 commit 638f5b90d46016372a8e3e0a434f199cc5e12b8c upstream.
12
13 the verifier got progressively smarter over time and size of its internal
14 state grew as well. Time to reduce the memory consumption.
15
16 Before:
17 sizeof(struct bpf_verifier_state) = 6520
18 After:
19 sizeof(struct bpf_verifier_state) = 896
20
21 It's done by observing that majority of BPF programs use little to
22 no stack whereas verifier kept all of 512 stack slots ready always.
23 Instead dynamically reallocate struct verifier state when stack
24 access is detected.
25 Runtime difference before vs after is within a noise.
26 The number of processed instructions stays the same.
27
28 Cc: jakub.kicinski@netronome.com
29
30 Signed-off-by: Alexei Starovoitov <ast@kernel.org>
31 Acked-by: Daniel Borkmann <daniel@iogearbox.net>
32 Signed-off-by: David S. Miller <davem@davemloft.net>
33 [Backported to 4.14 by sblbir]
34 Signed-off-by: Balbir Singh <sblbir@amzn.com>
35 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
36 ---
37 drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 9
38 include/linux/bpf_verifier.h | 16
39 kernel/bpf/verifier.c | 433 ++++++++++++++--------
40 3 files changed, 304 insertions(+), 154 deletions(-)
41
42 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
43 +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
44 @@ -76,9 +76,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_p
45
46 static int
47 nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
48 - const struct bpf_verifier_env *env)
49 + struct bpf_verifier_env *env)
50 {
51 - const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
52 + const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0;
53 u64 imm;
54
55 if (nfp_prog->act == NN_ACT_XDP)
56 @@ -113,9 +113,10 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_
57
58 static int
59 nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
60 - const struct bpf_verifier_env *env, u8 reg)
61 + struct bpf_verifier_env *env, u8 reg_no)
62 {
63 - if (env->cur_state.regs[reg].type != PTR_TO_CTX)
64 + const struct bpf_reg_state *reg = cur_regs(env) + reg_no;
65 + if (reg->type != PTR_TO_CTX)
66 return -EINVAL;
67
68 return 0;
69 --- a/include/linux/bpf_verifier.h
70 +++ b/include/linux/bpf_verifier.h
71 @@ -91,14 +91,19 @@ enum bpf_stack_slot_type {
72
73 #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
74
75 +struct bpf_stack_state {
76 + struct bpf_reg_state spilled_ptr;
77 + u8 slot_type[BPF_REG_SIZE];
78 +};
79 +
80 /* state of the program:
81 * type of all registers and stack info
82 */
83 struct bpf_verifier_state {
84 struct bpf_reg_state regs[MAX_BPF_REG];
85 - u8 stack_slot_type[MAX_BPF_STACK];
86 - struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
87 struct bpf_verifier_state *parent;
88 + int allocated_stack;
89 + struct bpf_stack_state *stack;
90 };
91
92 /* linked list of verifier states used to prune search */
93 @@ -133,7 +138,7 @@ struct bpf_verifier_env {
94 struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
95 int stack_size; /* number of states to be processed */
96 bool strict_alignment; /* perform strict pointer alignment checks */
97 - struct bpf_verifier_state cur_state; /* current verifier state */
98 + struct bpf_verifier_state *cur_state; /* current verifier state */
99 struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
100 const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
101 void *analyzer_priv; /* pointer to external analyzer's private data */
102 @@ -145,6 +150,11 @@ struct bpf_verifier_env {
103 struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
104 };
105
106 +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
107 +{
108 + return env->cur_state->regs;
109 +}
110 +
111 int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
112 void *priv);
113
114 --- a/kernel/bpf/verifier.c
115 +++ b/kernel/bpf/verifier.c
116 @@ -265,10 +265,11 @@ static void print_verifier_state(struct
117 verbose(")");
118 }
119 }
120 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
121 - if (state->stack_slot_type[i] == STACK_SPILL)
122 - verbose(" fp%d=%s", -MAX_BPF_STACK + i,
123 - reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]);
124 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
125 + if (state->stack[i].slot_type[0] == STACK_SPILL)
126 + verbose(" fp%d=%s",
127 + -MAX_BPF_STACK + i * BPF_REG_SIZE,
128 + reg_type_str[state->stack[i].spilled_ptr.type]);
129 }
130 verbose("\n");
131 }
132 @@ -434,35 +435,123 @@ static void print_bpf_insn(const struct
133 }
134 }
135
136 -static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
137 +static int copy_stack_state(struct bpf_verifier_state *dst,
138 + const struct bpf_verifier_state *src)
139 {
140 - struct bpf_verifier_stack_elem *elem;
141 - int insn_idx;
142 + if (!src->stack)
143 + return 0;
144 + if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
145 + /* internal bug, make state invalid to reject the program */
146 + memset(dst, 0, sizeof(*dst));
147 + return -EFAULT;
148 + }
149 + memcpy(dst->stack, src->stack,
150 + sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
151 + return 0;
152 +}
153 +
154 +/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
155 + * make it consume minimal amount of memory. check_stack_write() access from
156 + * the program calls into realloc_verifier_state() to grow the stack size.
157 + * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
158 + * which this function copies over. It points to previous bpf_verifier_state
159 + * which is never reallocated
160 + */
161 +static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
162 + bool copy_old)
163 +{
164 + u32 old_size = state->allocated_stack;
165 + struct bpf_stack_state *new_stack;
166 + int slot = size / BPF_REG_SIZE;
167 +
168 + if (size <= old_size || !size) {
169 + if (copy_old)
170 + return 0;
171 + state->allocated_stack = slot * BPF_REG_SIZE;
172 + if (!size && old_size) {
173 + kfree(state->stack);
174 + state->stack = NULL;
175 + }
176 + return 0;
177 + }
178 + new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
179 + GFP_KERNEL);
180 + if (!new_stack)
181 + return -ENOMEM;
182 + if (copy_old) {
183 + if (state->stack)
184 + memcpy(new_stack, state->stack,
185 + sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
186 + memset(new_stack + old_size / BPF_REG_SIZE, 0,
187 + sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
188 + }
189 + state->allocated_stack = slot * BPF_REG_SIZE;
190 + kfree(state->stack);
191 + state->stack = new_stack;
192 + return 0;
193 +}
194 +
195 +static void free_verifier_state(struct bpf_verifier_state *state)
196 +{
197 + kfree(state->stack);
198 + kfree(state);
199 +}
200 +
201 +/* copy verifier state from src to dst growing dst stack space
202 + * when necessary to accommodate larger src stack
203 + */
204 +static int copy_verifier_state(struct bpf_verifier_state *dst,
205 + const struct bpf_verifier_state *src)
206 +{
207 + int err;
208 +
209 + err = realloc_verifier_state(dst, src->allocated_stack, false);
210 + if (err)
211 + return err;
212 + memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack));
213 + return copy_stack_state(dst, src);
214 +}
215 +
216 +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
217 + int *insn_idx)
218 +{
219 + struct bpf_verifier_state *cur = env->cur_state;
220 + struct bpf_verifier_stack_elem *elem, *head = env->head;
221 + int err;
222
223 if (env->head == NULL)
224 - return -1;
225 + return -ENOENT;
226
227 - memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
228 - insn_idx = env->head->insn_idx;
229 + if (cur) {
230 + err = copy_verifier_state(cur, &head->st);
231 + if (err)
232 + return err;
233 + }
234 + if (insn_idx)
235 + *insn_idx = head->insn_idx;
236 if (prev_insn_idx)
237 - *prev_insn_idx = env->head->prev_insn_idx;
238 - elem = env->head->next;
239 - kfree(env->head);
240 + *prev_insn_idx = head->prev_insn_idx;
241 + elem = head->next;
242 + kfree(head);
243 env->head = elem;
244 env->stack_size--;
245 - return insn_idx;
246 + return 0;
247 }
248
249 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
250 int insn_idx, int prev_insn_idx)
251 {
252 struct bpf_verifier_stack_elem *elem;
253 + struct bpf_verifier_state *cur = env->cur_state;
254 + int err;
255
256 - elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
257 + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
258 if (!elem)
259 goto err;
260
261 - memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
262 + err = copy_verifier_state(&elem->st, cur);
263 + if (err)
264 + return NULL;
265 elem->insn_idx = insn_idx;
266 elem->prev_insn_idx = prev_insn_idx;
267 elem->next = env->head;
268 @@ -475,7 +564,7 @@ static struct bpf_verifier_state *push_s
269 return &elem->st;
270 err:
271 /* pop all elements and return */
272 - while (pop_stack(env, NULL) >= 0);
273 + while (!pop_stack(env, NULL, NULL));
274 return NULL;
275 }
276
277 @@ -671,7 +760,7 @@ static void mark_reg_read(const struct b
278 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
279 enum reg_arg_type t)
280 {
281 - struct bpf_reg_state *regs = env->cur_state.regs;
282 + struct bpf_reg_state *regs = env->cur_state->regs;
283
284 if (regno >= MAX_BPF_REG) {
285 verbose("R%d is invalid\n", regno);
286 @@ -684,7 +773,7 @@ static int check_reg_arg(struct bpf_veri
287 verbose("R%d !read_ok\n", regno);
288 return -EACCES;
289 }
290 - mark_reg_read(&env->cur_state, regno);
291 + mark_reg_read(env->cur_state, regno);
292 } else {
293 /* check whether register used as dest operand can be written to */
294 if (regno == BPF_REG_FP) {
295 @@ -721,10 +810,21 @@ static int check_stack_write(struct bpf_
296 struct bpf_verifier_state *state, int off,
297 int size, int value_regno, int insn_idx)
298 {
299 - int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
300 + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
301 +
302 + err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE),
303 + true);
304 + if (err)
305 + return err;
306 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
307 * so it's aligned access and [off, off + size) are within stack limits
308 */
309 + if (!env->allow_ptr_leaks &&
310 + state->stack[spi].slot_type[0] == STACK_SPILL &&
311 + size != BPF_REG_SIZE) {
312 + verbose("attempt to corrupt spilled pointer on stack\n");
313 + return -EACCES;
314 + }
315
316 if (value_regno >= 0 &&
317 is_spillable_regtype(state->regs[value_regno].type)) {
318 @@ -736,11 +836,11 @@ static int check_stack_write(struct bpf_
319 }
320
321 /* save register state */
322 - state->spilled_regs[spi] = state->regs[value_regno];
323 - state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
324 + state->stack[spi].spilled_ptr = state->regs[value_regno];
325 + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
326
327 for (i = 0; i < BPF_REG_SIZE; i++) {
328 - if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
329 + if (state->stack[spi].slot_type[i] == STACK_MISC &&
330 !env->allow_ptr_leaks) {
331 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
332 int soff = (-spi - 1) * BPF_REG_SIZE;
333 @@ -763,14 +863,15 @@ static int check_stack_write(struct bpf_
334 }
335 *poff = soff;
336 }
337 - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
338 + state->stack[spi].slot_type[i] = STACK_SPILL;
339 }
340 } else {
341 /* regular write of data into stack */
342 - state->spilled_regs[spi] = (struct bpf_reg_state) {};
343 + state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
344
345 for (i = 0; i < size; i++)
346 - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
347 + state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
348 + STACK_MISC;
349 }
350 return 0;
351 }
352 @@ -781,10 +882,10 @@ static void mark_stack_slot_read(const s
353
354 while (parent) {
355 /* if read wasn't screened by an earlier write ... */
356 - if (state->spilled_regs[slot].live & REG_LIVE_WRITTEN)
357 + if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
358 break;
359 /* ... then we depend on parent's value */
360 - parent->spilled_regs[slot].live |= REG_LIVE_READ;
361 + parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
362 state = parent;
363 parent = state->parent;
364 }
365 @@ -793,34 +894,37 @@ static void mark_stack_slot_read(const s
366 static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
367 int value_regno)
368 {
369 - u8 *slot_type;
370 - int i, spi;
371 + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
372 + u8 *stype;
373
374 - slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
375 + if (state->allocated_stack <= slot) {
376 + verbose("invalid read from stack off %d+0 size %d\n",
377 + off, size);
378 + return -EACCES;
379 + }
380 + stype = state->stack[spi].slot_type;
381
382 - if (slot_type[0] == STACK_SPILL) {
383 + if (stype[0] == STACK_SPILL) {
384 if (size != BPF_REG_SIZE) {
385 verbose("invalid size of register spill\n");
386 return -EACCES;
387 }
388 for (i = 1; i < BPF_REG_SIZE; i++) {
389 - if (slot_type[i] != STACK_SPILL) {
390 + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
391 verbose("corrupted spill memory\n");
392 return -EACCES;
393 }
394 }
395
396 - spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
397 -
398 if (value_regno >= 0) {
399 /* restore register state from stack */
400 - state->regs[value_regno] = state->spilled_regs[spi];
401 + state->regs[value_regno] = state->stack[spi].spilled_ptr;
402 mark_stack_slot_read(state, spi);
403 }
404 return 0;
405 } else {
406 for (i = 0; i < size; i++) {
407 - if (slot_type[i] != STACK_MISC) {
408 + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) {
409 verbose("invalid read from stack off %d+%d size %d\n",
410 off, i, size);
411 return -EACCES;
412 @@ -837,7 +941,8 @@ static int check_stack_read(struct bpf_v
413 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
414 int size)
415 {
416 - struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
417 + struct bpf_reg_state *regs = cur_regs(env);
418 + struct bpf_map *map = regs[regno].map_ptr;
419
420 if (off < 0 || size <= 0 || off + size > map->value_size) {
421 verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
422 @@ -849,9 +954,9 @@ static int __check_map_access(struct bpf
423
424 /* check read/write into a map element with possible variable offset */
425 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
426 - int off, int size)
427 + int off, int size)
428 {
429 - struct bpf_verifier_state *state = &env->cur_state;
430 + struct bpf_verifier_state *state = env->cur_state;
431 struct bpf_reg_state *reg = &state->regs[regno];
432 int err;
433
434 @@ -924,7 +1029,7 @@ static bool may_access_direct_pkt_data(s
435 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
436 int off, int size)
437 {
438 - struct bpf_reg_state *regs = env->cur_state.regs;
439 + struct bpf_reg_state *regs = cur_regs(env);
440 struct bpf_reg_state *reg = &regs[regno];
441
442 if (off < 0 || size <= 0 || (u64)off + size > reg->range) {
443 @@ -938,7 +1043,7 @@ static int __check_packet_access(struct
444 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
445 int size)
446 {
447 - struct bpf_reg_state *regs = env->cur_state.regs;
448 + struct bpf_reg_state *regs = cur_regs(env);
449 struct bpf_reg_state *reg = &regs[regno];
450 int err;
451
452 @@ -1008,19 +1113,19 @@ static bool __is_pointer_value(bool allo
453
454 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
455 {
456 - return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
457 + return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
458 }
459
460 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
461 {
462 - const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
463 + const struct bpf_reg_state *reg = cur_regs(env) + regno;
464
465 return reg->type == PTR_TO_CTX;
466 }
467
468 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
469 {
470 - const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
471 + const struct bpf_reg_state *reg = cur_regs(env) + regno;
472
473 return reg->type == PTR_TO_PACKET;
474 }
475 @@ -1145,8 +1250,9 @@ static int check_mem_access(struct bpf_v
476 int off, int bpf_size, enum bpf_access_type t,
477 int value_regno, bool strict_alignment_once)
478 {
479 - struct bpf_verifier_state *state = &env->cur_state;
480 - struct bpf_reg_state *reg = &state->regs[regno];
481 + struct bpf_verifier_state *state = env->cur_state;
482 + struct bpf_reg_state *regs = cur_regs(env);
483 + struct bpf_reg_state *reg = regs + regno;
484 int size, err = 0;
485
486 size = bpf_size_to_bytes(bpf_size);
487 @@ -1170,7 +1276,7 @@ static int check_mem_access(struct bpf_v
488
489 err = check_map_access(env, regno, off, size);
490 if (!err && t == BPF_READ && value_regno >= 0)
491 - mark_reg_unknown(state->regs, value_regno);
492 + mark_reg_unknown(regs, value_regno);
493
494 } else if (reg->type == PTR_TO_CTX) {
495 enum bpf_reg_type reg_type = SCALAR_VALUE;
496 @@ -1203,13 +1309,13 @@ static int check_mem_access(struct bpf_v
497 * the offset is zero.
498 */
499 if (reg_type == SCALAR_VALUE)
500 - mark_reg_unknown(state->regs, value_regno);
501 + mark_reg_unknown(regs, value_regno);
502 else
503 - mark_reg_known_zero(state->regs, value_regno);
504 - state->regs[value_regno].id = 0;
505 - state->regs[value_regno].off = 0;
506 - state->regs[value_regno].range = 0;
507 - state->regs[value_regno].type = reg_type;
508 + mark_reg_known_zero(regs, value_regno);
509 + regs[value_regno].id = 0;
510 + regs[value_regno].off = 0;
511 + regs[value_regno].range = 0;
512 + regs[value_regno].type = reg_type;
513 }
514
515 } else if (reg->type == PTR_TO_STACK) {
516 @@ -1234,18 +1340,11 @@ static int check_mem_access(struct bpf_v
517 if (env->prog->aux->stack_depth < -off)
518 env->prog->aux->stack_depth = -off;
519
520 - if (t == BPF_WRITE) {
521 - if (!env->allow_ptr_leaks &&
522 - state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
523 - size != BPF_REG_SIZE) {
524 - verbose("attempt to corrupt spilled pointer on stack\n");
525 - return -EACCES;
526 - }
527 + if (t == BPF_WRITE)
528 err = check_stack_write(env, state, off, size,
529 value_regno, insn_idx);
530 - } else {
531 + else
532 err = check_stack_read(state, off, size, value_regno);
533 - }
534 } else if (reg->type == PTR_TO_PACKET) {
535 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
536 verbose("cannot write into packet\n");
537 @@ -1258,7 +1357,7 @@ static int check_mem_access(struct bpf_v
538 }
539 err = check_packet_access(env, regno, off, size);
540 if (!err && t == BPF_READ && value_regno >= 0)
541 - mark_reg_unknown(state->regs, value_regno);
542 + mark_reg_unknown(regs, value_regno);
543 } else {
544 verbose("R%d invalid mem access '%s'\n",
545 regno, reg_type_str[reg->type]);
546 @@ -1266,9 +1365,9 @@ static int check_mem_access(struct bpf_v
547 }
548
549 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
550 - state->regs[value_regno].type == SCALAR_VALUE) {
551 + regs[value_regno].type == SCALAR_VALUE) {
552 /* b/h/w load zero-extends, mark upper bits as known 0 */
553 - coerce_reg_to_size(&state->regs[value_regno], size);
554 + coerce_reg_to_size(&regs[value_regno], size);
555 }
556 return err;
557 }
558 @@ -1333,9 +1432,9 @@ static int check_stack_boundary(struct b
559 int access_size, bool zero_size_allowed,
560 struct bpf_call_arg_meta *meta)
561 {
562 - struct bpf_verifier_state *state = &env->cur_state;
563 + struct bpf_verifier_state *state = env->cur_state;
564 struct bpf_reg_state *regs = state->regs;
565 - int off, i;
566 + int off, i, slot, spi;
567
568 if (regs[regno].type != PTR_TO_STACK) {
569 /* Allow zero-byte read from NULL, regardless of pointer type */
570 @@ -1376,7 +1475,11 @@ static int check_stack_boundary(struct b
571 }
572
573 for (i = 0; i < access_size; i++) {
574 - if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
575 + slot = -(off + i) - 1;
576 + spi = slot / BPF_REG_SIZE;
577 + if (state->allocated_stack <= slot ||
578 + state->stack[spi].slot_type[slot % BPF_REG_SIZE] !=
579 + STACK_MISC) {
580 verbose("invalid indirect read from stack off %d+%d size %d\n",
581 off, i, access_size);
582 return -EACCES;
583 @@ -1389,7 +1492,7 @@ static int check_helper_mem_access(struc
584 int access_size, bool zero_size_allowed,
585 struct bpf_call_arg_meta *meta)
586 {
587 - struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
588 + struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
589
590 switch (reg->type) {
591 case PTR_TO_PACKET:
592 @@ -1406,7 +1509,7 @@ static int check_func_arg(struct bpf_ver
593 enum bpf_arg_type arg_type,
594 struct bpf_call_arg_meta *meta)
595 {
596 - struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
597 + struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
598 enum bpf_reg_type expected_type, type = reg->type;
599 int err = 0;
600
601 @@ -1678,7 +1781,7 @@ static int check_raw_mode(const struct b
602 */
603 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
604 {
605 - struct bpf_verifier_state *state = &env->cur_state;
606 + struct bpf_verifier_state *state = env->cur_state;
607 struct bpf_reg_state *regs = state->regs, *reg;
608 int i;
609
610 @@ -1687,10 +1790,10 @@ static void clear_all_pkt_pointers(struc
611 regs[i].type == PTR_TO_PACKET_END)
612 mark_reg_unknown(regs, i);
613
614 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
615 - if (state->stack_slot_type[i] != STACK_SPILL)
616 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
617 + if (state->stack[i].slot_type[0] != STACK_SPILL)
618 continue;
619 - reg = &state->spilled_regs[i / BPF_REG_SIZE];
620 + reg = &state->stack[i].spilled_ptr;
621 if (reg->type != PTR_TO_PACKET &&
622 reg->type != PTR_TO_PACKET_END)
623 continue;
624 @@ -1700,9 +1803,8 @@ static void clear_all_pkt_pointers(struc
625
626 static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
627 {
628 - struct bpf_verifier_state *state = &env->cur_state;
629 const struct bpf_func_proto *fn = NULL;
630 - struct bpf_reg_state *regs = state->regs;
631 + struct bpf_reg_state *regs;
632 struct bpf_call_arg_meta meta;
633 bool changes_data;
634 int i, err;
635 @@ -1776,6 +1878,7 @@ static int check_call(struct bpf_verifie
636 return err;
637 }
638
639 + regs = cur_regs(env);
640 /* reset caller saved regs */
641 for (i = 0; i < CALLER_SAVED_REGS; i++) {
642 mark_reg_not_init(regs, caller_saved[i]);
643 @@ -1890,7 +1993,7 @@ static int adjust_ptr_min_max_vals(struc
644 const struct bpf_reg_state *ptr_reg,
645 const struct bpf_reg_state *off_reg)
646 {
647 - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
648 + struct bpf_reg_state *regs = cur_regs(env), *dst_reg;
649 bool known = tnum_is_const(off_reg->var_off);
650 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
651 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
652 @@ -2097,7 +2200,7 @@ static int adjust_scalar_min_max_vals(st
653 struct bpf_reg_state *dst_reg,
654 struct bpf_reg_state src_reg)
655 {
656 - struct bpf_reg_state *regs = env->cur_state.regs;
657 + struct bpf_reg_state *regs = cur_regs(env);
658 u8 opcode = BPF_OP(insn->code);
659 bool src_known, dst_known;
660 s64 smin_val, smax_val;
661 @@ -2345,7 +2448,7 @@ static int adjust_scalar_min_max_vals(st
662 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
663 struct bpf_insn *insn)
664 {
665 - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg;
666 + struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
667 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
668 u8 opcode = BPF_OP(insn->code);
669 int rc;
670 @@ -2419,12 +2522,12 @@ static int adjust_reg_min_max_vals(struc
671
672 /* Got here implies adding two SCALAR_VALUEs */
673 if (WARN_ON_ONCE(ptr_reg)) {
674 - print_verifier_state(&env->cur_state);
675 + print_verifier_state(env->cur_state);
676 verbose("verifier internal error: unexpected ptr_reg\n");
677 return -EINVAL;
678 }
679 if (WARN_ON(!src_reg)) {
680 - print_verifier_state(&env->cur_state);
681 + print_verifier_state(env->cur_state);
682 verbose("verifier internal error: no src_reg\n");
683 return -EINVAL;
684 }
685 @@ -2434,7 +2537,7 @@ static int adjust_reg_min_max_vals(struc
686 /* check validity of 32-bit and 64-bit arithmetic operations */
687 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
688 {
689 - struct bpf_reg_state *regs = env->cur_state.regs;
690 + struct bpf_reg_state *regs = cur_regs(env);
691 u8 opcode = BPF_OP(insn->code);
692 int err;
693
694 @@ -2661,10 +2764,10 @@ static void find_good_pkt_pointers(struc
695 /* keep the maximum range already checked */
696 regs[i].range = max(regs[i].range, new_range);
697
698 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
699 - if (state->stack_slot_type[i] != STACK_SPILL)
700 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
701 + if (state->stack[i].slot_type[0] != STACK_SPILL)
702 continue;
703 - reg = &state->spilled_regs[i / BPF_REG_SIZE];
704 + reg = &state->stack[i].spilled_ptr;
705 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
706 reg->range = max(reg->range, new_range);
707 }
708 @@ -2914,17 +3017,17 @@ static void mark_map_regs(struct bpf_ver
709 for (i = 0; i < MAX_BPF_REG; i++)
710 mark_map_reg(regs, i, id, is_null);
711
712 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
713 - if (state->stack_slot_type[i] != STACK_SPILL)
714 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
715 + if (state->stack[i].slot_type[0] != STACK_SPILL)
716 continue;
717 - mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null);
718 + mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
719 }
720 }
721
722 static int check_cond_jmp_op(struct bpf_verifier_env *env,
723 struct bpf_insn *insn, int *insn_idx)
724 {
725 - struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
726 + struct bpf_verifier_state *other_branch, *this_branch = env->cur_state;
727 struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
728 u8 opcode = BPF_OP(insn->code);
729 int err;
730 @@ -3087,7 +3190,7 @@ static struct bpf_map *ld_imm64_to_map_p
731 /* verify BPF_LD_IMM64 instruction */
732 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
733 {
734 - struct bpf_reg_state *regs = env->cur_state.regs;
735 + struct bpf_reg_state *regs = cur_regs(env);
736 int err;
737
738 if (BPF_SIZE(insn->code) != BPF_DW) {
739 @@ -3148,7 +3251,7 @@ static bool may_access_skb(enum bpf_prog
740 */
741 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
742 {
743 - struct bpf_reg_state *regs = env->cur_state.regs;
744 + struct bpf_reg_state *regs = cur_regs(env);
745 u8 mode = BPF_MODE(insn->code);
746 int i, err;
747
748 @@ -3534,6 +3637,57 @@ static bool regsafe(struct bpf_reg_state
749 return false;
750 }
751
752 +static bool stacksafe(struct bpf_verifier_state *old,
753 + struct bpf_verifier_state *cur,
754 + struct idpair *idmap)
755 +{
756 + int i, spi;
757 +
758 + /* if explored stack has more populated slots than current stack
759 + * such stacks are not equivalent
760 + */
761 + if (old->allocated_stack > cur->allocated_stack)
762 + return false;
763 +
764 + /* walk slots of the explored stack and ignore any additional
765 + * slots in the current stack, since explored(safe) state
766 + * didn't use them
767 + */
768 + for (i = 0; i < old->allocated_stack; i++) {
769 + spi = i / BPF_REG_SIZE;
770 +
771 + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
772 + continue;
773 + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
774 + cur->stack[spi].slot_type[i % BPF_REG_SIZE])
775 + /* Ex: old explored (safe) state has STACK_SPILL in
776 + * this stack slot, but current has has STACK_MISC ->
777 + * this verifier states are not equivalent,
778 + * return false to continue verification of this path
779 + */
780 + return false;
781 + if (i % BPF_REG_SIZE)
782 + continue;
783 + if (old->stack[spi].slot_type[0] != STACK_SPILL)
784 + continue;
785 + if (!regsafe(&old->stack[spi].spilled_ptr,
786 + &cur->stack[spi].spilled_ptr,
787 + idmap))
788 + /* when explored and current stack slot are both storing
789 + * spilled registers, check that stored pointers types
790 + * are the same as well.
791 + * Ex: explored safe path could have stored
792 + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
793 + * but current path has stored:
794 + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
795 + * such verifier states are not equivalent.
796 + * return false to continue verification of this path
797 + */
798 + return false;
799 + }
800 + return true;
801 +}
802 +
803 /* compare two verifier states
804 *
805 * all states stored in state_list are known to be valid, since
806 @@ -3578,37 +3732,8 @@ static bool states_equal(struct bpf_veri
807 goto out_free;
808 }
809
810 - for (i = 0; i < MAX_BPF_STACK; i++) {
811 - if (old->stack_slot_type[i] == STACK_INVALID)
812 - continue;
813 - if (old->stack_slot_type[i] != cur->stack_slot_type[i])
814 - /* Ex: old explored (safe) state has STACK_SPILL in
815 - * this stack slot, but current has has STACK_MISC ->
816 - * this verifier states are not equivalent,
817 - * return false to continue verification of this path
818 - */
819 - goto out_free;
820 - if (i % BPF_REG_SIZE)
821 - continue;
822 - if (old->stack_slot_type[i] != STACK_SPILL)
823 - continue;
824 - if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE],
825 - &cur->spilled_regs[i / BPF_REG_SIZE],
826 - idmap))
827 - /* when explored and current stack slot are both storing
828 - * spilled registers, check that stored pointers types
829 - * are the same as well.
830 - * Ex: explored safe path could have stored
831 - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
832 - * but current path has stored:
833 - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
834 - * such verifier states are not equivalent.
835 - * return false to continue verification of this path
836 - */
837 - goto out_free;
838 - else
839 - continue;
840 - }
841 + if (!stacksafe(old, cur, idmap))
842 + goto out_free;
843 ret = true;
844 out_free:
845 kfree(idmap);
846 @@ -3644,17 +3769,19 @@ static bool do_propagate_liveness(const
847 }
848 }
849 /* ... and stack slots */
850 - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) {
851 - if (parent->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
852 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
853 + i < parent->allocated_stack / BPF_REG_SIZE; i++) {
854 + if (parent->stack[i].slot_type[0] != STACK_SPILL)
855 continue;
856 - if (state->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
857 + if (state->stack[i].slot_type[0] != STACK_SPILL)
858 continue;
859 - if (parent->spilled_regs[i].live & REG_LIVE_READ)
860 + if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
861 continue;
862 - if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN))
863 + if (writes &&
864 + (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
865 continue;
866 - if (state->spilled_regs[i].live & REG_LIVE_READ) {
867 - parent->spilled_regs[i].live |= REG_LIVE_READ;
868 + if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
869 + parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
870 touched = true;
871 }
872 }
873 @@ -3684,6 +3811,7 @@ static int is_state_visited(struct bpf_v
874 {
875 struct bpf_verifier_state_list *new_sl;
876 struct bpf_verifier_state_list *sl;
877 + struct bpf_verifier_state *cur = env->cur_state;
878 int i;
879
880 sl = env->explored_states[insn_idx];
881 @@ -3694,7 +3822,7 @@ static int is_state_visited(struct bpf_v
882 return 0;
883
884 while (sl != STATE_LIST_MARK) {
885 - if (states_equal(env, &sl->state, &env->cur_state)) {
886 + if (states_equal(env, &sl->state, cur)) {
887 /* reached equivalent register/stack state,
888 * prune the search.
889 * Registers read by the continuation are read by us.
890 @@ -3705,7 +3833,7 @@ static int is_state_visited(struct bpf_v
891 * they'll be immediately forgotten as we're pruning
892 * this state and will pop a new one.
893 */
894 - propagate_liveness(&sl->state, &env->cur_state);
895 + propagate_liveness(&sl->state, cur);
896 return 1;
897 }
898 sl = sl->next;
899 @@ -3717,16 +3845,16 @@ static int is_state_visited(struct bpf_v
900 * it will be rejected. Since there are no loops, we won't be
901 * seeing this 'insn_idx' instruction again on the way to bpf_exit
902 */
903 - new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
904 + new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
905 if (!new_sl)
906 return -ENOMEM;
907
908 /* add new state to the head of linked list */
909 - memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
910 + copy_verifier_state(&new_sl->state, cur);
911 new_sl->next = env->explored_states[insn_idx];
912 env->explored_states[insn_idx] = new_sl;
913 /* connect new state to parentage chain */
914 - env->cur_state.parent = &new_sl->state;
915 + cur->parent = &new_sl->state;
916 /* clear write marks in current state: the writes we did are not writes
917 * our child did, so they don't screen off its reads from us.
918 * (There are no read marks in current state, because reads always mark
919 @@ -3734,10 +3862,10 @@ static int is_state_visited(struct bpf_v
920 * explored_states can get read marks.)
921 */
922 for (i = 0; i < BPF_REG_FP; i++)
923 - env->cur_state.regs[i].live = REG_LIVE_NONE;
924 - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++)
925 - if (env->cur_state.stack_slot_type[i * BPF_REG_SIZE] == STACK_SPILL)
926 - env->cur_state.spilled_regs[i].live = REG_LIVE_NONE;
927 + cur->regs[i].live = REG_LIVE_NONE;
928 + for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++)
929 + if (cur->stack[i].slot_type[0] == STACK_SPILL)
930 + cur->stack[i].spilled_ptr.live = REG_LIVE_NONE;
931 return 0;
932 }
933
934 @@ -3752,15 +3880,19 @@ static int ext_analyzer_insn_hook(struct
935
936 static int do_check(struct bpf_verifier_env *env)
937 {
938 - struct bpf_verifier_state *state = &env->cur_state;
939 + struct bpf_verifier_state *state;
940 struct bpf_insn *insns = env->prog->insnsi;
941 - struct bpf_reg_state *regs = state->regs;
942 + struct bpf_reg_state *regs;
943 int insn_cnt = env->prog->len;
944 int insn_idx, prev_insn_idx = 0;
945 int insn_processed = 0;
946 bool do_print_state = false;
947
948 - init_reg_state(regs);
949 + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
950 + if (!state)
951 + return -ENOMEM;
952 + env->cur_state = state;
953 + init_reg_state(state->regs);
954 state->parent = NULL;
955 insn_idx = 0;
956 for (;;) {
957 @@ -3807,7 +3939,7 @@ static int do_check(struct bpf_verifier_
958 else
959 verbose("\nfrom %d to %d:",
960 prev_insn_idx, insn_idx);
961 - print_verifier_state(&env->cur_state);
962 + print_verifier_state(env->cur_state);
963 do_print_state = false;
964 }
965
966 @@ -3820,6 +3952,7 @@ static int do_check(struct bpf_verifier_
967 if (err)
968 return err;
969
970 + regs = cur_regs(env);
971 env->insn_aux_data[insn_idx].seen = true;
972 if (class == BPF_ALU || class == BPF_ALU64) {
973 err = check_alu_op(env, insn);
974 @@ -3991,8 +4124,10 @@ static int do_check(struct bpf_verifier_
975 }
976
977 process_bpf_exit:
978 - insn_idx = pop_stack(env, &prev_insn_idx);
979 - if (insn_idx < 0) {
980 + err = pop_stack(env, &prev_insn_idx, &insn_idx);
981 + if (err < 0) {
982 + if (err != -ENOENT)
983 + return err;
984 break;
985 } else {
986 do_print_state = true;
987 @@ -4633,9 +4768,11 @@ int bpf_check(struct bpf_prog **prog, un
988 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
989
990 ret = do_check(env);
991 + free_verifier_state(env->cur_state);
992 + env->cur_state = NULL;
993
994 skip_full_check:
995 - while (pop_stack(env, NULL) >= 0);
996 + while (!pop_stack(env, NULL, NULL));
997 free_states(env);
998
999 if (ret == 0)
1000 @@ -4741,9 +4878,11 @@ int bpf_analyzer(struct bpf_prog *prog,
1001 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
1002
1003 ret = do_check(env);
1004 + free_verifier_state(env->cur_state);
1005 + env->cur_state = NULL;
1006
1007 skip_full_check:
1008 - while (pop_stack(env, NULL) >= 0);
1009 + while (!pop_stack(env, NULL, NULL));
1010 free_states(env);
1011
1012 mutex_unlock(&bpf_verifier_lock);