1 From foo@baz Wed Apr 17 20:59:12 CEST 2019
2 From: Balbir Singh <sblbir@amzn.com>
3 Date: Wed, 3 Apr 2019 18:39:01 +0000
4 Subject: bpf: reduce verifier memory consumption
5 To: <gregkh@linuxfoundation.org>
6 Cc: <stable@kernel.org>, <daniel@iogearbox.net>, <jannh@google.com>, <sblbir@amazon.com>, Alexei Starovoitov <ast@fb.com>, <jakub.kicinski@netronome.com>, Alexei Starovoitov <ast@kernel.org>, "David S . Miller" <davem@davemloft.net>, Balbir Singh <sblbir@amzn.com>
7 Message-ID: <20190403183917.13749-2-sblbir@amzn.com>
9 From: Alexei Starovoitov <ast@fb.com>
11 commit 638f5b90d46016372a8e3e0a434f199cc5e12b8c upstream.
13 the verifier got progressively smarter over time and size of its internal
14 state grew as well. Time to reduce the memory consumption.
17 sizeof(struct bpf_verifier_state) = 6520
19 sizeof(struct bpf_verifier_state) = 896
21 It's done by observing that majority of BPF programs use little to
22 no stack whereas verifier kept all of 512 stack slots ready always.
23 Instead dynamically reallocate struct verifier state when stack
25 Runtime difference before vs after is within a noise.
26 The number of processed instructions stays the same.
28 Cc: jakub.kicinski@netronome.com
30 Signed-off-by: Alexei Starovoitov <ast@kernel.org>
31 Acked-by: Daniel Borkmann <daniel@iogearbox.net>
32 Signed-off-by: David S. Miller <davem@davemloft.net>
33 [Backported to 4.14 by sblbir]
34 Signed-off-by: Balbir Singh <sblbir@amzn.com>
35 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
37 drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 9
38 include/linux/bpf_verifier.h | 16
39 kernel/bpf/verifier.c | 433 ++++++++++++++--------
40 3 files changed, 304 insertions(+), 154 deletions(-)
42 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
43 +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
44 @@ -76,9 +76,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_p
47 nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
48 - const struct bpf_verifier_env *env)
49 + struct bpf_verifier_env *env)
51 - const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
52 + const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0;
55 if (nfp_prog->act == NN_ACT_XDP)
56 @@ -113,9 +113,10 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_
59 nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
60 - const struct bpf_verifier_env *env, u8 reg)
61 + struct bpf_verifier_env *env, u8 reg_no)
63 - if (env->cur_state.regs[reg].type != PTR_TO_CTX)
64 + const struct bpf_reg_state *reg = cur_regs(env) + reg_no;
65 + if (reg->type != PTR_TO_CTX)
69 --- a/include/linux/bpf_verifier.h
70 +++ b/include/linux/bpf_verifier.h
71 @@ -91,14 +91,19 @@ enum bpf_stack_slot_type {
73 #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
75 +struct bpf_stack_state {
76 + struct bpf_reg_state spilled_ptr;
77 + u8 slot_type[BPF_REG_SIZE];
80 /* state of the program:
81 * type of all registers and stack info
83 struct bpf_verifier_state {
84 struct bpf_reg_state regs[MAX_BPF_REG];
85 - u8 stack_slot_type[MAX_BPF_STACK];
86 - struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
87 struct bpf_verifier_state *parent;
88 + int allocated_stack;
89 + struct bpf_stack_state *stack;
92 /* linked list of verifier states used to prune search */
93 @@ -133,7 +138,7 @@ struct bpf_verifier_env {
94 struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
95 int stack_size; /* number of states to be processed */
96 bool strict_alignment; /* perform strict pointer alignment checks */
97 - struct bpf_verifier_state cur_state; /* current verifier state */
98 + struct bpf_verifier_state *cur_state; /* current verifier state */
99 struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
100 const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
101 void *analyzer_priv; /* pointer to external analyzer's private data */
102 @@ -145,6 +150,11 @@ struct bpf_verifier_env {
103 struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
106 +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
108 + return env->cur_state->regs;
111 int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
114 --- a/kernel/bpf/verifier.c
115 +++ b/kernel/bpf/verifier.c
116 @@ -265,10 +265,11 @@ static void print_verifier_state(struct
120 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
121 - if (state->stack_slot_type[i] == STACK_SPILL)
122 - verbose(" fp%d=%s", -MAX_BPF_STACK + i,
123 - reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]);
124 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
125 + if (state->stack[i].slot_type[0] == STACK_SPILL)
126 + verbose(" fp%d=%s",
127 + -MAX_BPF_STACK + i * BPF_REG_SIZE,
128 + reg_type_str[state->stack[i].spilled_ptr.type]);
132 @@ -434,35 +435,123 @@ static void print_bpf_insn(const struct
136 -static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
137 +static int copy_stack_state(struct bpf_verifier_state *dst,
138 + const struct bpf_verifier_state *src)
140 - struct bpf_verifier_stack_elem *elem;
144 + if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) {
145 + /* internal bug, make state invalid to reject the program */
146 + memset(dst, 0, sizeof(*dst));
149 + memcpy(dst->stack, src->stack,
150 + sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE));
154 +/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
155 + * make it consume minimal amount of memory. check_stack_write() access from
156 + * the program calls into realloc_verifier_state() to grow the stack size.
157 + * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
158 + * which this function copies over. It points to previous bpf_verifier_state
159 + * which is never reallocated
161 +static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
164 + u32 old_size = state->allocated_stack;
165 + struct bpf_stack_state *new_stack;
166 + int slot = size / BPF_REG_SIZE;
168 + if (size <= old_size || !size) {
171 + state->allocated_stack = slot * BPF_REG_SIZE;
172 + if (!size && old_size) {
173 + kfree(state->stack);
174 + state->stack = NULL;
178 + new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state),
184 + memcpy(new_stack, state->stack,
185 + sizeof(*new_stack) * (old_size / BPF_REG_SIZE));
186 + memset(new_stack + old_size / BPF_REG_SIZE, 0,
187 + sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE);
189 + state->allocated_stack = slot * BPF_REG_SIZE;
190 + kfree(state->stack);
191 + state->stack = new_stack;
195 +static void free_verifier_state(struct bpf_verifier_state *state)
197 + kfree(state->stack);
201 +/* copy verifier state from src to dst growing dst stack space
202 + * when necessary to accommodate larger src stack
204 +static int copy_verifier_state(struct bpf_verifier_state *dst,
205 + const struct bpf_verifier_state *src)
209 + err = realloc_verifier_state(dst, src->allocated_stack, false);
212 + memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack));
213 + return copy_stack_state(dst, src);
216 +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
219 + struct bpf_verifier_state *cur = env->cur_state;
220 + struct bpf_verifier_stack_elem *elem, *head = env->head;
223 if (env->head == NULL)
227 - memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
228 - insn_idx = env->head->insn_idx;
230 + err = copy_verifier_state(cur, &head->st);
235 + *insn_idx = head->insn_idx;
237 - *prev_insn_idx = env->head->prev_insn_idx;
238 - elem = env->head->next;
240 + *prev_insn_idx = head->prev_insn_idx;
249 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
250 int insn_idx, int prev_insn_idx)
252 struct bpf_verifier_stack_elem *elem;
253 + struct bpf_verifier_state *cur = env->cur_state;
256 - elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
257 + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
261 - memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
262 + err = copy_verifier_state(&elem->st, cur);
265 elem->insn_idx = insn_idx;
266 elem->prev_insn_idx = prev_insn_idx;
267 elem->next = env->head;
268 @@ -475,7 +564,7 @@ static struct bpf_verifier_state *push_s
271 /* pop all elements and return */
272 - while (pop_stack(env, NULL) >= 0);
273 + while (!pop_stack(env, NULL, NULL));
277 @@ -671,7 +760,7 @@ static void mark_reg_read(const struct b
278 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
281 - struct bpf_reg_state *regs = env->cur_state.regs;
282 + struct bpf_reg_state *regs = env->cur_state->regs;
284 if (regno >= MAX_BPF_REG) {
285 verbose("R%d is invalid\n", regno);
286 @@ -684,7 +773,7 @@ static int check_reg_arg(struct bpf_veri
287 verbose("R%d !read_ok\n", regno);
290 - mark_reg_read(&env->cur_state, regno);
291 + mark_reg_read(env->cur_state, regno);
293 /* check whether register used as dest operand can be written to */
294 if (regno == BPF_REG_FP) {
295 @@ -721,10 +810,21 @@ static int check_stack_write(struct bpf_
296 struct bpf_verifier_state *state, int off,
297 int size, int value_regno, int insn_idx)
299 - int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
300 + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
302 + err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE),
306 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
307 * so it's aligned access and [off, off + size) are within stack limits
309 + if (!env->allow_ptr_leaks &&
310 + state->stack[spi].slot_type[0] == STACK_SPILL &&
311 + size != BPF_REG_SIZE) {
312 + verbose("attempt to corrupt spilled pointer on stack\n");
316 if (value_regno >= 0 &&
317 is_spillable_regtype(state->regs[value_regno].type)) {
318 @@ -736,11 +836,11 @@ static int check_stack_write(struct bpf_
321 /* save register state */
322 - state->spilled_regs[spi] = state->regs[value_regno];
323 - state->spilled_regs[spi].live |= REG_LIVE_WRITTEN;
324 + state->stack[spi].spilled_ptr = state->regs[value_regno];
325 + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
327 for (i = 0; i < BPF_REG_SIZE; i++) {
328 - if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
329 + if (state->stack[spi].slot_type[i] == STACK_MISC &&
330 !env->allow_ptr_leaks) {
331 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
332 int soff = (-spi - 1) * BPF_REG_SIZE;
333 @@ -763,14 +863,15 @@ static int check_stack_write(struct bpf_
337 - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
338 + state->stack[spi].slot_type[i] = STACK_SPILL;
341 /* regular write of data into stack */
342 - state->spilled_regs[spi] = (struct bpf_reg_state) {};
343 + state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
345 for (i = 0; i < size; i++)
346 - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
347 + state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
352 @@ -781,10 +882,10 @@ static void mark_stack_slot_read(const s
355 /* if read wasn't screened by an earlier write ... */
356 - if (state->spilled_regs[slot].live & REG_LIVE_WRITTEN)
357 + if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
359 /* ... then we depend on parent's value */
360 - parent->spilled_regs[slot].live |= REG_LIVE_READ;
361 + parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
363 parent = state->parent;
365 @@ -793,34 +894,37 @@ static void mark_stack_slot_read(const s
366 static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
371 + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
374 - slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
375 + if (state->allocated_stack <= slot) {
376 + verbose("invalid read from stack off %d+0 size %d\n",
380 + stype = state->stack[spi].slot_type;
382 - if (slot_type[0] == STACK_SPILL) {
383 + if (stype[0] == STACK_SPILL) {
384 if (size != BPF_REG_SIZE) {
385 verbose("invalid size of register spill\n");
388 for (i = 1; i < BPF_REG_SIZE; i++) {
389 - if (slot_type[i] != STACK_SPILL) {
390 + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
391 verbose("corrupted spill memory\n");
396 - spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
398 if (value_regno >= 0) {
399 /* restore register state from stack */
400 - state->regs[value_regno] = state->spilled_regs[spi];
401 + state->regs[value_regno] = state->stack[spi].spilled_ptr;
402 mark_stack_slot_read(state, spi);
406 for (i = 0; i < size; i++) {
407 - if (slot_type[i] != STACK_MISC) {
408 + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) {
409 verbose("invalid read from stack off %d+%d size %d\n",
412 @@ -837,7 +941,8 @@ static int check_stack_read(struct bpf_v
413 static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
416 - struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
417 + struct bpf_reg_state *regs = cur_regs(env);
418 + struct bpf_map *map = regs[regno].map_ptr;
420 if (off < 0 || size <= 0 || off + size > map->value_size) {
421 verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
422 @@ -849,9 +954,9 @@ static int __check_map_access(struct bpf
424 /* check read/write into a map element with possible variable offset */
425 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
429 - struct bpf_verifier_state *state = &env->cur_state;
430 + struct bpf_verifier_state *state = env->cur_state;
431 struct bpf_reg_state *reg = &state->regs[regno];
434 @@ -924,7 +1029,7 @@ static bool may_access_direct_pkt_data(s
435 static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
438 - struct bpf_reg_state *regs = env->cur_state.regs;
439 + struct bpf_reg_state *regs = cur_regs(env);
440 struct bpf_reg_state *reg = ®s[regno];
442 if (off < 0 || size <= 0 || (u64)off + size > reg->range) {
443 @@ -938,7 +1043,7 @@ static int __check_packet_access(struct
444 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
447 - struct bpf_reg_state *regs = env->cur_state.regs;
448 + struct bpf_reg_state *regs = cur_regs(env);
449 struct bpf_reg_state *reg = ®s[regno];
452 @@ -1008,19 +1113,19 @@ static bool __is_pointer_value(bool allo
454 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
456 - return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
457 + return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
460 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
462 - const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
463 + const struct bpf_reg_state *reg = cur_regs(env) + regno;
465 return reg->type == PTR_TO_CTX;
468 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
470 - const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
471 + const struct bpf_reg_state *reg = cur_regs(env) + regno;
473 return reg->type == PTR_TO_PACKET;
475 @@ -1145,8 +1250,9 @@ static int check_mem_access(struct bpf_v
476 int off, int bpf_size, enum bpf_access_type t,
477 int value_regno, bool strict_alignment_once)
479 - struct bpf_verifier_state *state = &env->cur_state;
480 - struct bpf_reg_state *reg = &state->regs[regno];
481 + struct bpf_verifier_state *state = env->cur_state;
482 + struct bpf_reg_state *regs = cur_regs(env);
483 + struct bpf_reg_state *reg = regs + regno;
486 size = bpf_size_to_bytes(bpf_size);
487 @@ -1170,7 +1276,7 @@ static int check_mem_access(struct bpf_v
489 err = check_map_access(env, regno, off, size);
490 if (!err && t == BPF_READ && value_regno >= 0)
491 - mark_reg_unknown(state->regs, value_regno);
492 + mark_reg_unknown(regs, value_regno);
494 } else if (reg->type == PTR_TO_CTX) {
495 enum bpf_reg_type reg_type = SCALAR_VALUE;
496 @@ -1203,13 +1309,13 @@ static int check_mem_access(struct bpf_v
497 * the offset is zero.
499 if (reg_type == SCALAR_VALUE)
500 - mark_reg_unknown(state->regs, value_regno);
501 + mark_reg_unknown(regs, value_regno);
503 - mark_reg_known_zero(state->regs, value_regno);
504 - state->regs[value_regno].id = 0;
505 - state->regs[value_regno].off = 0;
506 - state->regs[value_regno].range = 0;
507 - state->regs[value_regno].type = reg_type;
508 + mark_reg_known_zero(regs, value_regno);
509 + regs[value_regno].id = 0;
510 + regs[value_regno].off = 0;
511 + regs[value_regno].range = 0;
512 + regs[value_regno].type = reg_type;
515 } else if (reg->type == PTR_TO_STACK) {
516 @@ -1234,18 +1340,11 @@ static int check_mem_access(struct bpf_v
517 if (env->prog->aux->stack_depth < -off)
518 env->prog->aux->stack_depth = -off;
520 - if (t == BPF_WRITE) {
521 - if (!env->allow_ptr_leaks &&
522 - state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
523 - size != BPF_REG_SIZE) {
524 - verbose("attempt to corrupt spilled pointer on stack\n");
527 + if (t == BPF_WRITE)
528 err = check_stack_write(env, state, off, size,
529 value_regno, insn_idx);
532 err = check_stack_read(state, off, size, value_regno);
534 } else if (reg->type == PTR_TO_PACKET) {
535 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
536 verbose("cannot write into packet\n");
537 @@ -1258,7 +1357,7 @@ static int check_mem_access(struct bpf_v
539 err = check_packet_access(env, regno, off, size);
540 if (!err && t == BPF_READ && value_regno >= 0)
541 - mark_reg_unknown(state->regs, value_regno);
542 + mark_reg_unknown(regs, value_regno);
544 verbose("R%d invalid mem access '%s'\n",
545 regno, reg_type_str[reg->type]);
546 @@ -1266,9 +1365,9 @@ static int check_mem_access(struct bpf_v
549 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
550 - state->regs[value_regno].type == SCALAR_VALUE) {
551 + regs[value_regno].type == SCALAR_VALUE) {
552 /* b/h/w load zero-extends, mark upper bits as known 0 */
553 - coerce_reg_to_size(&state->regs[value_regno], size);
554 + coerce_reg_to_size(®s[value_regno], size);
558 @@ -1333,9 +1432,9 @@ static int check_stack_boundary(struct b
559 int access_size, bool zero_size_allowed,
560 struct bpf_call_arg_meta *meta)
562 - struct bpf_verifier_state *state = &env->cur_state;
563 + struct bpf_verifier_state *state = env->cur_state;
564 struct bpf_reg_state *regs = state->regs;
566 + int off, i, slot, spi;
568 if (regs[regno].type != PTR_TO_STACK) {
569 /* Allow zero-byte read from NULL, regardless of pointer type */
570 @@ -1376,7 +1475,11 @@ static int check_stack_boundary(struct b
573 for (i = 0; i < access_size; i++) {
574 - if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
575 + slot = -(off + i) - 1;
576 + spi = slot / BPF_REG_SIZE;
577 + if (state->allocated_stack <= slot ||
578 + state->stack[spi].slot_type[slot % BPF_REG_SIZE] !=
580 verbose("invalid indirect read from stack off %d+%d size %d\n",
581 off, i, access_size);
583 @@ -1389,7 +1492,7 @@ static int check_helper_mem_access(struc
584 int access_size, bool zero_size_allowed,
585 struct bpf_call_arg_meta *meta)
587 - struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno];
588 + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
592 @@ -1406,7 +1509,7 @@ static int check_func_arg(struct bpf_ver
593 enum bpf_arg_type arg_type,
594 struct bpf_call_arg_meta *meta)
596 - struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno];
597 + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
598 enum bpf_reg_type expected_type, type = reg->type;
601 @@ -1678,7 +1781,7 @@ static int check_raw_mode(const struct b
603 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
605 - struct bpf_verifier_state *state = &env->cur_state;
606 + struct bpf_verifier_state *state = env->cur_state;
607 struct bpf_reg_state *regs = state->regs, *reg;
610 @@ -1687,10 +1790,10 @@ static void clear_all_pkt_pointers(struc
611 regs[i].type == PTR_TO_PACKET_END)
612 mark_reg_unknown(regs, i);
614 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
615 - if (state->stack_slot_type[i] != STACK_SPILL)
616 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
617 + if (state->stack[i].slot_type[0] != STACK_SPILL)
619 - reg = &state->spilled_regs[i / BPF_REG_SIZE];
620 + reg = &state->stack[i].spilled_ptr;
621 if (reg->type != PTR_TO_PACKET &&
622 reg->type != PTR_TO_PACKET_END)
624 @@ -1700,9 +1803,8 @@ static void clear_all_pkt_pointers(struc
626 static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
628 - struct bpf_verifier_state *state = &env->cur_state;
629 const struct bpf_func_proto *fn = NULL;
630 - struct bpf_reg_state *regs = state->regs;
631 + struct bpf_reg_state *regs;
632 struct bpf_call_arg_meta meta;
635 @@ -1776,6 +1878,7 @@ static int check_call(struct bpf_verifie
639 + regs = cur_regs(env);
640 /* reset caller saved regs */
641 for (i = 0; i < CALLER_SAVED_REGS; i++) {
642 mark_reg_not_init(regs, caller_saved[i]);
643 @@ -1890,7 +1993,7 @@ static int adjust_ptr_min_max_vals(struc
644 const struct bpf_reg_state *ptr_reg,
645 const struct bpf_reg_state *off_reg)
647 - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
648 + struct bpf_reg_state *regs = cur_regs(env), *dst_reg;
649 bool known = tnum_is_const(off_reg->var_off);
650 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
651 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
652 @@ -2097,7 +2200,7 @@ static int adjust_scalar_min_max_vals(st
653 struct bpf_reg_state *dst_reg,
654 struct bpf_reg_state src_reg)
656 - struct bpf_reg_state *regs = env->cur_state.regs;
657 + struct bpf_reg_state *regs = cur_regs(env);
658 u8 opcode = BPF_OP(insn->code);
659 bool src_known, dst_known;
660 s64 smin_val, smax_val;
661 @@ -2345,7 +2448,7 @@ static int adjust_scalar_min_max_vals(st
662 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
663 struct bpf_insn *insn)
665 - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg;
666 + struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
667 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
668 u8 opcode = BPF_OP(insn->code);
670 @@ -2419,12 +2522,12 @@ static int adjust_reg_min_max_vals(struc
672 /* Got here implies adding two SCALAR_VALUEs */
673 if (WARN_ON_ONCE(ptr_reg)) {
674 - print_verifier_state(&env->cur_state);
675 + print_verifier_state(env->cur_state);
676 verbose("verifier internal error: unexpected ptr_reg\n");
679 if (WARN_ON(!src_reg)) {
680 - print_verifier_state(&env->cur_state);
681 + print_verifier_state(env->cur_state);
682 verbose("verifier internal error: no src_reg\n");
685 @@ -2434,7 +2537,7 @@ static int adjust_reg_min_max_vals(struc
686 /* check validity of 32-bit and 64-bit arithmetic operations */
687 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
689 - struct bpf_reg_state *regs = env->cur_state.regs;
690 + struct bpf_reg_state *regs = cur_regs(env);
691 u8 opcode = BPF_OP(insn->code);
694 @@ -2661,10 +2764,10 @@ static void find_good_pkt_pointers(struc
695 /* keep the maximum range already checked */
696 regs[i].range = max(regs[i].range, new_range);
698 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
699 - if (state->stack_slot_type[i] != STACK_SPILL)
700 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
701 + if (state->stack[i].slot_type[0] != STACK_SPILL)
703 - reg = &state->spilled_regs[i / BPF_REG_SIZE];
704 + reg = &state->stack[i].spilled_ptr;
705 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
706 reg->range = max(reg->range, new_range);
708 @@ -2914,17 +3017,17 @@ static void mark_map_regs(struct bpf_ver
709 for (i = 0; i < MAX_BPF_REG; i++)
710 mark_map_reg(regs, i, id, is_null);
712 - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
713 - if (state->stack_slot_type[i] != STACK_SPILL)
714 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
715 + if (state->stack[i].slot_type[0] != STACK_SPILL)
717 - mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null);
718 + mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
722 static int check_cond_jmp_op(struct bpf_verifier_env *env,
723 struct bpf_insn *insn, int *insn_idx)
725 - struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
726 + struct bpf_verifier_state *other_branch, *this_branch = env->cur_state;
727 struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
728 u8 opcode = BPF_OP(insn->code);
730 @@ -3087,7 +3190,7 @@ static struct bpf_map *ld_imm64_to_map_p
731 /* verify BPF_LD_IMM64 instruction */
732 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
734 - struct bpf_reg_state *regs = env->cur_state.regs;
735 + struct bpf_reg_state *regs = cur_regs(env);
738 if (BPF_SIZE(insn->code) != BPF_DW) {
739 @@ -3148,7 +3251,7 @@ static bool may_access_skb(enum bpf_prog
741 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
743 - struct bpf_reg_state *regs = env->cur_state.regs;
744 + struct bpf_reg_state *regs = cur_regs(env);
745 u8 mode = BPF_MODE(insn->code);
748 @@ -3534,6 +3637,57 @@ static bool regsafe(struct bpf_reg_state
752 +static bool stacksafe(struct bpf_verifier_state *old,
753 + struct bpf_verifier_state *cur,
754 + struct idpair *idmap)
758 + /* if explored stack has more populated slots than current stack
759 + * such stacks are not equivalent
761 + if (old->allocated_stack > cur->allocated_stack)
764 + /* walk slots of the explored stack and ignore any additional
765 + * slots in the current stack, since explored(safe) state
768 + for (i = 0; i < old->allocated_stack; i++) {
769 + spi = i / BPF_REG_SIZE;
771 + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
773 + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
774 + cur->stack[spi].slot_type[i % BPF_REG_SIZE])
775 + /* Ex: old explored (safe) state has STACK_SPILL in
776 + * this stack slot, but current has has STACK_MISC ->
777 + * this verifier states are not equivalent,
778 + * return false to continue verification of this path
781 + if (i % BPF_REG_SIZE)
783 + if (old->stack[spi].slot_type[0] != STACK_SPILL)
785 + if (!regsafe(&old->stack[spi].spilled_ptr,
786 + &cur->stack[spi].spilled_ptr,
788 + /* when explored and current stack slot are both storing
789 + * spilled registers, check that stored pointers types
790 + * are the same as well.
791 + * Ex: explored safe path could have stored
792 + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
793 + * but current path has stored:
794 + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
795 + * such verifier states are not equivalent.
796 + * return false to continue verification of this path
803 /* compare two verifier states
805 * all states stored in state_list are known to be valid, since
806 @@ -3578,37 +3732,8 @@ static bool states_equal(struct bpf_veri
810 - for (i = 0; i < MAX_BPF_STACK; i++) {
811 - if (old->stack_slot_type[i] == STACK_INVALID)
813 - if (old->stack_slot_type[i] != cur->stack_slot_type[i])
814 - /* Ex: old explored (safe) state has STACK_SPILL in
815 - * this stack slot, but current has has STACK_MISC ->
816 - * this verifier states are not equivalent,
817 - * return false to continue verification of this path
820 - if (i % BPF_REG_SIZE)
822 - if (old->stack_slot_type[i] != STACK_SPILL)
824 - if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE],
825 - &cur->spilled_regs[i / BPF_REG_SIZE],
827 - /* when explored and current stack slot are both storing
828 - * spilled registers, check that stored pointers types
829 - * are the same as well.
830 - * Ex: explored safe path could have stored
831 - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
832 - * but current path has stored:
833 - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
834 - * such verifier states are not equivalent.
835 - * return false to continue verification of this path
841 + if (!stacksafe(old, cur, idmap))
846 @@ -3644,17 +3769,19 @@ static bool do_propagate_liveness(const
849 /* ... and stack slots */
850 - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) {
851 - if (parent->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
852 + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
853 + i < parent->allocated_stack / BPF_REG_SIZE; i++) {
854 + if (parent->stack[i].slot_type[0] != STACK_SPILL)
856 - if (state->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL)
857 + if (state->stack[i].slot_type[0] != STACK_SPILL)
859 - if (parent->spilled_regs[i].live & REG_LIVE_READ)
860 + if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
862 - if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN))
864 + (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
866 - if (state->spilled_regs[i].live & REG_LIVE_READ) {
867 - parent->spilled_regs[i].live |= REG_LIVE_READ;
868 + if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
869 + parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
873 @@ -3684,6 +3811,7 @@ static int is_state_visited(struct bpf_v
875 struct bpf_verifier_state_list *new_sl;
876 struct bpf_verifier_state_list *sl;
877 + struct bpf_verifier_state *cur = env->cur_state;
880 sl = env->explored_states[insn_idx];
881 @@ -3694,7 +3822,7 @@ static int is_state_visited(struct bpf_v
884 while (sl != STATE_LIST_MARK) {
885 - if (states_equal(env, &sl->state, &env->cur_state)) {
886 + if (states_equal(env, &sl->state, cur)) {
887 /* reached equivalent register/stack state,
889 * Registers read by the continuation are read by us.
890 @@ -3705,7 +3833,7 @@ static int is_state_visited(struct bpf_v
891 * they'll be immediately forgotten as we're pruning
892 * this state and will pop a new one.
894 - propagate_liveness(&sl->state, &env->cur_state);
895 + propagate_liveness(&sl->state, cur);
899 @@ -3717,16 +3845,16 @@ static int is_state_visited(struct bpf_v
900 * it will be rejected. Since there are no loops, we won't be
901 * seeing this 'insn_idx' instruction again on the way to bpf_exit
903 - new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
904 + new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
908 /* add new state to the head of linked list */
909 - memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
910 + copy_verifier_state(&new_sl->state, cur);
911 new_sl->next = env->explored_states[insn_idx];
912 env->explored_states[insn_idx] = new_sl;
913 /* connect new state to parentage chain */
914 - env->cur_state.parent = &new_sl->state;
915 + cur->parent = &new_sl->state;
916 /* clear write marks in current state: the writes we did are not writes
917 * our child did, so they don't screen off its reads from us.
918 * (There are no read marks in current state, because reads always mark
919 @@ -3734,10 +3862,10 @@ static int is_state_visited(struct bpf_v
920 * explored_states can get read marks.)
922 for (i = 0; i < BPF_REG_FP; i++)
923 - env->cur_state.regs[i].live = REG_LIVE_NONE;
924 - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++)
925 - if (env->cur_state.stack_slot_type[i * BPF_REG_SIZE] == STACK_SPILL)
926 - env->cur_state.spilled_regs[i].live = REG_LIVE_NONE;
927 + cur->regs[i].live = REG_LIVE_NONE;
928 + for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++)
929 + if (cur->stack[i].slot_type[0] == STACK_SPILL)
930 + cur->stack[i].spilled_ptr.live = REG_LIVE_NONE;
934 @@ -3752,15 +3880,19 @@ static int ext_analyzer_insn_hook(struct
936 static int do_check(struct bpf_verifier_env *env)
938 - struct bpf_verifier_state *state = &env->cur_state;
939 + struct bpf_verifier_state *state;
940 struct bpf_insn *insns = env->prog->insnsi;
941 - struct bpf_reg_state *regs = state->regs;
942 + struct bpf_reg_state *regs;
943 int insn_cnt = env->prog->len;
944 int insn_idx, prev_insn_idx = 0;
945 int insn_processed = 0;
946 bool do_print_state = false;
948 - init_reg_state(regs);
949 + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
952 + env->cur_state = state;
953 + init_reg_state(state->regs);
954 state->parent = NULL;
957 @@ -3807,7 +3939,7 @@ static int do_check(struct bpf_verifier_
959 verbose("\nfrom %d to %d:",
960 prev_insn_idx, insn_idx);
961 - print_verifier_state(&env->cur_state);
962 + print_verifier_state(env->cur_state);
963 do_print_state = false;
966 @@ -3820,6 +3952,7 @@ static int do_check(struct bpf_verifier_
970 + regs = cur_regs(env);
971 env->insn_aux_data[insn_idx].seen = true;
972 if (class == BPF_ALU || class == BPF_ALU64) {
973 err = check_alu_op(env, insn);
974 @@ -3991,8 +4124,10 @@ static int do_check(struct bpf_verifier_
978 - insn_idx = pop_stack(env, &prev_insn_idx);
979 - if (insn_idx < 0) {
980 + err = pop_stack(env, &prev_insn_idx, &insn_idx);
982 + if (err != -ENOENT)
986 do_print_state = true;
987 @@ -4633,9 +4768,11 @@ int bpf_check(struct bpf_prog **prog, un
988 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
991 + free_verifier_state(env->cur_state);
992 + env->cur_state = NULL;
995 - while (pop_stack(env, NULL) >= 0);
996 + while (!pop_stack(env, NULL, NULL));
1000 @@ -4741,9 +4878,11 @@ int bpf_analyzer(struct bpf_prog *prog,
1001 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
1003 ret = do_check(env);
1004 + free_verifier_state(env->cur_state);
1005 + env->cur_state = NULL;
1008 - while (pop_stack(env, NULL) >= 0);
1009 + while (!pop_stack(env, NULL, NULL));
1012 mutex_unlock(&bpf_verifier_lock);