From 50d080d4b52744ab53c9efefc11413e5c65211e1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 17 Apr 2019 22:00:21 +0200 Subject: [PATCH] 4.14-stable patches added patches: bpf-do-not-restore-dst_reg-when-cur_state-is-freed.patch bpf-enable-access-to-ax-register-also-from-verifier-rewrite.patch bpf-fix-check_map_access-smin_value-test-when-pointer-contains-offset.patch bpf-fix-inner-map-masking-to-prevent-oob-under-speculation.patch bpf-fix-sanitation-of-alu-op-with-pointer-scalar-type-from-different-paths.patch bpf-fix-sanitation-rewrite-in-case-of-non-pointers.patch bpf-fix-selftests-are-changes-for-cve-2019-7308.patch bpf-fix-stack-state-printing-in-verifier-log.patch bpf-fix-verifier-memory-leaks.patch bpf-fix-verifier-null-pointer-dereference.patch bpf-move-prev_-insn_idx-into-verifier-env.patch bpf-move-tmp-variable-into-ax-register-in-interpreter.patch bpf-prevent-out-of-bounds-speculation-on-pointer-arithmetic.patch bpf-reduce-verifier-memory-consumption.patch bpf-restrict-map-value-pointer-arithmetic-for-unprivileged.patch bpf-restrict-stack-pointer-arithmetic-for-unprivileged.patch bpf-restrict-unknown-scalars-of-mixed-signed-bounds-for-unprivileged.patch tools-include-adopt-linux-bits.h.patch --- ...tore-dst_reg-when-cur_state-is-freed.patch | 70 ++ ...-register-also-from-verifier-rewrite.patch | 78 ++ ...ue-test-when-pointer-contains-offset.patch | 54 + ...ing-to-prevent-oob-under-speculation.patch | 141 +++ ...ter-scalar-type-from-different-paths.patch | 150 +++ ...tion-rewrite-in-case-of-non-pointers.patch | 55 + ...ftests-are-changes-for-cve-2019-7308.patch | 61 + ...stack-state-printing-in-verifier-log.patch | 36 + .../bpf-fix-verifier-memory-leaks.patch | 115 ++ ...ix-verifier-null-pointer-dereference.patch | 95 ++ ...ove-prev_-insn_idx-into-verifier-env.patch | 245 ++++ ...able-into-ax-register-in-interpreter.patch | 116 ++ ...ds-speculation-on-pointer-arithmetic.patch | 598 ++++++++++ ...f-reduce-verifier-memory-consumption.patch | 1012 +++++++++++++++++ ...-pointer-arithmetic-for-unprivileged.patch | 50 + ...-pointer-arithmetic-for-unprivileged.patch | 117 ++ ...mixed-signed-bounds-for-unprivileged.patch | 56 + queue-4.14/series | 18 + .../tools-include-adopt-linux-bits.h.patch | 98 ++ 19 files changed, 3165 insertions(+) create mode 100644 queue-4.14/bpf-do-not-restore-dst_reg-when-cur_state-is-freed.patch create mode 100644 queue-4.14/bpf-enable-access-to-ax-register-also-from-verifier-rewrite.patch create mode 100644 queue-4.14/bpf-fix-check_map_access-smin_value-test-when-pointer-contains-offset.patch create mode 100644 queue-4.14/bpf-fix-inner-map-masking-to-prevent-oob-under-speculation.patch create mode 100644 queue-4.14/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-type-from-different-paths.patch create mode 100644 queue-4.14/bpf-fix-sanitation-rewrite-in-case-of-non-pointers.patch create mode 100644 queue-4.14/bpf-fix-selftests-are-changes-for-cve-2019-7308.patch create mode 100644 queue-4.14/bpf-fix-stack-state-printing-in-verifier-log.patch create mode 100644 queue-4.14/bpf-fix-verifier-memory-leaks.patch create mode 100644 queue-4.14/bpf-fix-verifier-null-pointer-dereference.patch create mode 100644 queue-4.14/bpf-move-prev_-insn_idx-into-verifier-env.patch create mode 100644 queue-4.14/bpf-move-tmp-variable-into-ax-register-in-interpreter.patch create mode 100644 queue-4.14/bpf-prevent-out-of-bounds-speculation-on-pointer-arithmetic.patch create mode 100644 queue-4.14/bpf-reduce-verifier-memory-consumption.patch create mode 100644 queue-4.14/bpf-restrict-map-value-pointer-arithmetic-for-unprivileged.patch create mode 100644 queue-4.14/bpf-restrict-stack-pointer-arithmetic-for-unprivileged.patch create mode 100644 queue-4.14/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-for-unprivileged.patch create mode 100644 queue-4.14/tools-include-adopt-linux-bits.h.patch diff --git a/queue-4.14/bpf-do-not-restore-dst_reg-when-cur_state-is-freed.patch b/queue-4.14/bpf-do-not-restore-dst_reg-when-cur_state-is-freed.patch new file mode 100644 index 00000000000..81a50d1189f --- /dev/null +++ b/queue-4.14/bpf-do-not-restore-dst_reg-when-cur_state-is-freed.patch @@ -0,0 +1,70 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:15 +0000 +Subject: bpf: do not restore dst_reg when cur_state is freed +To: +Cc: , , , , Xu Yu +Message-ID: <20190403183917.13749-16-sblbir@amzn.com> + +From: Xu Yu + +commit 0803278b0b4d8eeb2b461fb698785df65a725d9e upstream. + +Syzkaller hit 'KASAN: use-after-free Write in sanitize_ptr_alu' bug. + +Call trace: + + dump_stack+0xbf/0x12e + print_address_description+0x6a/0x280 + kasan_report+0x237/0x360 + sanitize_ptr_alu+0x85a/0x8d0 + adjust_ptr_min_max_vals+0x8f2/0x1ca0 + adjust_reg_min_max_vals+0x8ed/0x22e0 + do_check+0x1ca6/0x5d00 + bpf_check+0x9ca/0x2570 + bpf_prog_load+0xc91/0x1030 + __se_sys_bpf+0x61e/0x1f00 + do_syscall_64+0xc8/0x550 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Fault injection trace: + +  kfree+0xea/0x290 +  free_func_state+0x4a/0x60 +  free_verifier_state+0x61/0xe0 +  push_stack+0x216/0x2f0 <- inject failslab +  sanitize_ptr_alu+0x2b1/0x8d0 +  adjust_ptr_min_max_vals+0x8f2/0x1ca0 +  adjust_reg_min_max_vals+0x8ed/0x22e0 +  do_check+0x1ca6/0x5d00 +  bpf_check+0x9ca/0x2570 +  bpf_prog_load+0xc91/0x1030 +  __se_sys_bpf+0x61e/0x1f00 +  do_syscall_64+0xc8/0x550 +  entry_SYSCALL_64_after_hwframe+0x49/0xbe + +When kzalloc() fails in push_stack(), free_verifier_state() will free +current verifier state. As push_stack() returns, dst_reg was restored +if ptr_is_dst_reg is false. However, as member of the cur_state, +dst_reg is also freed, and error occurs when dereferencing dst_reg. +Simply fix it by testing ret of push_stack() before restoring dst_reg. + +Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") +Signed-off-by: Xu Yu +Signed-off-by: Daniel Borkmann +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2118,7 +2118,7 @@ do_sim: + *dst_reg = *ptr_reg; + } + ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); +- if (!ptr_is_dst_reg) ++ if (!ptr_is_dst_reg && ret) + *dst_reg = tmp; + return !ret ? -EFAULT : 0; + } diff --git a/queue-4.14/bpf-enable-access-to-ax-register-also-from-verifier-rewrite.patch b/queue-4.14/bpf-enable-access-to-ax-register-also-from-verifier-rewrite.patch new file mode 100644 index 00000000000..8a239c6aa46 --- /dev/null +++ b/queue-4.14/bpf-enable-access-to-ax-register-also-from-verifier-rewrite.patch @@ -0,0 +1,78 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:07 +0000 +Subject: bpf: enable access to ax register also from verifier rewrite +To: +Cc: , , , , Alexei Starovoitov , Balbir Singh +Message-ID: <20190403183917.13749-8-sblbir@amzn.com> + +From: Daniel Borkmann + +commit 9b73bfdd08e73231d6a90ae6db4b46b3fbf56c30 upstream. + +Right now we are using BPF ax register in JIT for constant blinding as +well as in interpreter as temporary variable. Verifier will not be able +to use it simply because its use will get overridden from the former in +bpf_jit_blind_insn(). However, it can be made to work in that blinding +will be skipped if there is prior use in either source or destination +register on the instruction. Taking constraints of ax into account, the +verifier is then open to use it in rewrites under some constraints. Note, +ax register already has mappings in every eBPF JIT. + + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +[backported to 4.14 sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/filter.h | 7 +------ + kernel/bpf/core.c | 20 ++++++++++++++++++++ + 2 files changed, 21 insertions(+), 6 deletions(-) + +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -46,12 +46,7 @@ struct bpf_prog_aux; + #define BPF_REG_X BPF_REG_7 + #define BPF_REG_TMP BPF_REG_8 + +-/* Kernel hidden auxiliary/helper register for hardening step. +- * Only used by eBPF JITs. It's nothing more than a temporary +- * register that JITs use internally, only that here it's part +- * of eBPF instructions that have been rewritten for blinding +- * constants. See JIT pre-step in bpf_jit_blind_constants(). +- */ ++/* Kernel hidden auxiliary/helper register. */ + #define BPF_REG_AX MAX_BPF_REG + #define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) + #define MAX_BPF_JIT_REG MAX_BPF_EXT_REG +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -553,6 +553,26 @@ static int bpf_jit_blind_insn(const stru + BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); + BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + ++ /* Constraints on AX register: ++ * ++ * AX register is inaccessible from user space. It is mapped in ++ * all JITs, and used here for constant blinding rewrites. It is ++ * typically "stateless" meaning its contents are only valid within ++ * the executed instruction, but not across several instructions. ++ * There are a few exceptions however which are further detailed ++ * below. ++ * ++ * Constant blinding is only used by JITs, not in the interpreter. ++ * The interpreter uses AX in some occasions as a local temporary ++ * register e.g. in DIV or MOD instructions. ++ * ++ * In restricted circumstances, the verifier can also use the AX ++ * register for rewrites as long as they do not interfere with ++ * the above cases! ++ */ ++ if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) ++ goto out; ++ + if (from->imm == 0 && + (from->code == (BPF_ALU | BPF_MOV | BPF_K) || + from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { diff --git a/queue-4.14/bpf-fix-check_map_access-smin_value-test-when-pointer-contains-offset.patch b/queue-4.14/bpf-fix-check_map_access-smin_value-test-when-pointer-contains-offset.patch new file mode 100644 index 00000000000..8b7a2577d63 --- /dev/null +++ b/queue-4.14/bpf-fix-check_map_access-smin_value-test-when-pointer-contains-offset.patch @@ -0,0 +1,54 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:11 +0000 +Subject: bpf: fix check_map_access smin_value test when pointer contains offset +To: +Cc: , , , , Alexei Starovoitov , Balbir Singh +Message-ID: <20190403183917.13749-12-sblbir@amzn.com> + +From: Daniel Borkmann + +commit b7137c4eab85c1cf3d46acdde90ce1163b28c873 upstream. + +In check_map_access() we probe actual bounds through __check_map_access() +with offset of reg->smin_value + off for lower bound and offset of +reg->umax_value + off for the upper bound. However, even though the +reg->smin_value could have a negative value, the final result of the +sum with off could be positive when pointer arithmetic with known and +unknown scalars is combined. In this case we reject the program with +an error such as "R min value is negative, either use unsigned index +or do a if (index >=0) check." even though the access itself would be +fine. Therefore extend the check to probe whether the actual resulting +reg->smin_value + off is less than zero. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +[backported to 4.14 sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -994,13 +994,17 @@ static int check_map_access(struct bpf_v + */ + if (log_level) + print_verifier_state(state); ++ + /* The minimum value is only important with signed + * comparisons where we can't assume the floor of a + * value is 0. If we are using signed variables for our + * index'es we need to make sure that whatever we use + * will have a set floor within our range. + */ +- if (reg->smin_value < 0) { ++ if (reg->smin_value < 0 && ++ (reg->smin_value == S64_MIN || ++ (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || ++ reg->smin_value + off < 0)) { + verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; diff --git a/queue-4.14/bpf-fix-inner-map-masking-to-prevent-oob-under-speculation.patch b/queue-4.14/bpf-fix-inner-map-masking-to-prevent-oob-under-speculation.patch new file mode 100644 index 00000000000..dba200c76b7 --- /dev/null +++ b/queue-4.14/bpf-fix-inner-map-masking-to-prevent-oob-under-speculation.patch @@ -0,0 +1,141 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:14 +0000 +Subject: bpf: fix inner map masking to prevent oob under speculation +To: +Cc: , , , , Alexei Starovoitov , "Vallish Vaidyeshwara" , Balbir Singh +Message-ID: <20190403183917.13749-15-sblbir@amzn.com> + +From: Daniel Borkmann + +commit 9d5564ddcf2a0f5ba3fa1c3a1f8a1b59ad309553 upstream. + +During review I noticed that inner meta map setup for map in +map is buggy in that it does not propagate all needed data +from the reference map which the verifier is later accessing. + +In particular one such case is index masking to prevent out of +bounds access under speculative execution due to missing the +map's unpriv_array/index_mask field propagation. Fix this such +that the verifier is generating the correct code for inlined +lookups in case of unpriviledged use. + +Before patch (test_verifier's 'map in map access' dump): + + # bpftool prog dump xla id 3 + 0: (62) *(u32 *)(r10 -4) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -4 + 3: (18) r1 = map[id:4] + 5: (07) r1 += 272 | + 6: (61) r0 = *(u32 *)(r2 +0) | + 7: (35) if r0 >= 0x1 goto pc+6 | Inlined map in map lookup + 8: (54) (u32) r0 &= (u32) 0 | with index masking for + 9: (67) r0 <<= 3 | map->unpriv_array. + 10: (0f) r0 += r1 | + 11: (79) r0 = *(u64 *)(r0 +0) | + 12: (15) if r0 == 0x0 goto pc+1 | + 13: (05) goto pc+1 | + 14: (b7) r0 = 0 | + 15: (15) if r0 == 0x0 goto pc+11 + 16: (62) *(u32 *)(r10 -4) = 0 + 17: (bf) r2 = r10 + 18: (07) r2 += -4 + 19: (bf) r1 = r0 + 20: (07) r1 += 272 | + 21: (61) r0 = *(u32 *)(r2 +0) | Index masking missing (!) + 22: (35) if r0 >= 0x1 goto pc+3 | for inner map despite + 23: (67) r0 <<= 3 | map->unpriv_array set. + 24: (0f) r0 += r1 | + 25: (05) goto pc+1 | + 26: (b7) r0 = 0 | + 27: (b7) r0 = 0 + 28: (95) exit + +After patch: + + # bpftool prog dump xla id 1 + 0: (62) *(u32 *)(r10 -4) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -4 + 3: (18) r1 = map[id:2] + 5: (07) r1 += 272 | + 6: (61) r0 = *(u32 *)(r2 +0) | + 7: (35) if r0 >= 0x1 goto pc+6 | Same inlined map in map lookup + 8: (54) (u32) r0 &= (u32) 0 | with index masking due to + 9: (67) r0 <<= 3 | map->unpriv_array. + 10: (0f) r0 += r1 | + 11: (79) r0 = *(u64 *)(r0 +0) | + 12: (15) if r0 == 0x0 goto pc+1 | + 13: (05) goto pc+1 | + 14: (b7) r0 = 0 | + 15: (15) if r0 == 0x0 goto pc+12 + 16: (62) *(u32 *)(r10 -4) = 0 + 17: (bf) r2 = r10 + 18: (07) r2 += -4 + 19: (bf) r1 = r0 + 20: (07) r1 += 272 | + 21: (61) r0 = *(u32 *)(r2 +0) | + 22: (35) if r0 >= 0x1 goto pc+4 | Now fixed inlined inner map + 23: (54) (u32) r0 &= (u32) 0 | lookup with proper index masking + 24: (67) r0 <<= 3 | for map->unpriv_array. + 25: (0f) r0 += r1 | + 26: (05) goto pc+1 | + 27: (b7) r0 = 0 | + 28: (b7) r0 = 0 + 29: (95) exit + + +Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") +Signed-off-by: Daniel Borkmann +Acked-by: Martin KaFai Lau +Signed-off-by: Alexei Starovoitov +Signed-off-by: Vallish Vaidyeshwara +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/map_in_map.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/map_in_map.c ++++ b/kernel/bpf/map_in_map.c +@@ -12,6 +12,7 @@ + struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) + { + struct bpf_map *inner_map, *inner_map_meta; ++ u32 inner_map_meta_size; + struct fd f; + + f = fdget(inner_map_ufd); +@@ -34,7 +35,12 @@ struct bpf_map *bpf_map_meta_alloc(int i + return ERR_PTR(-EINVAL); + } + +- inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); ++ inner_map_meta_size = sizeof(*inner_map_meta); ++ /* In some cases verifier needs to access beyond just base map. */ ++ if (inner_map->ops == &array_map_ops) ++ inner_map_meta_size = sizeof(struct bpf_array); ++ ++ inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); + if (!inner_map_meta) { + fdput(f); + return ERR_PTR(-ENOMEM); +@@ -44,9 +50,16 @@ struct bpf_map *bpf_map_meta_alloc(int i + inner_map_meta->key_size = inner_map->key_size; + inner_map_meta->value_size = inner_map->value_size; + inner_map_meta->map_flags = inner_map->map_flags; +- inner_map_meta->ops = inner_map->ops; + inner_map_meta->max_entries = inner_map->max_entries; + ++ /* Misc members not needed in bpf_map_meta_equal() check. */ ++ inner_map_meta->ops = inner_map->ops; ++ if (inner_map->ops == &array_map_ops) { ++ inner_map_meta->unpriv_array = inner_map->unpriv_array; ++ container_of(inner_map_meta, struct bpf_array, map)->index_mask = ++ container_of(inner_map, struct bpf_array, map)->index_mask; ++ } ++ + fdput(f); + return inner_map_meta; + } diff --git a/queue-4.14/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-type-from-different-paths.patch b/queue-4.14/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-type-from-different-paths.patch new file mode 100644 index 00000000000..621e42bf562 --- /dev/null +++ b/queue-4.14/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-type-from-different-paths.patch @@ -0,0 +1,150 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:13 +0000 +Subject: bpf: fix sanitation of alu op with pointer / scalar type from different paths +To: +Cc: , , , , Alexei Starovoitov , "Vallish Vaidyeshwara" , Balbir Singh +Message-ID: <20190403183917.13749-14-sblbir@amzn.com> + +From: Daniel Borkmann + +commit d3bd7413e0ca40b60cf60d4003246d067cafdeda upstream. + +While 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer +arithmetic") took care of rejecting alu op on pointer when e.g. pointer +came from two different map values with different map properties such as +value size, Jann reported that a case was not covered yet when a given +alu op is used in both "ptr_reg += reg" and "numeric_reg += reg" from +different branches where we would incorrectly try to sanitize based +on the pointer's limit. Catch this corner case and reject the program +instead. + +Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") +Reported-by: Jann Horn +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Vallish Vaidyeshwara +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 61 +++++++++++++++++++++++++++++++++---------- + 2 files changed, 49 insertions(+), 13 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -117,6 +117,7 @@ struct bpf_verifier_state_list { + #define BPF_ALU_SANITIZE_SRC 1U + #define BPF_ALU_SANITIZE_DST 2U + #define BPF_ALU_NEG_VALUE (1U << 2) ++#define BPF_ALU_NON_POINTER (1U << 3) + #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ + BPF_ALU_SANITIZE_DST) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2037,6 +2037,40 @@ static int retrieve_ptr_limit(const stru + } + } + ++static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, ++ const struct bpf_insn *insn) ++{ ++ return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; ++} ++ ++static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, ++ u32 alu_state, u32 alu_limit) ++{ ++ /* If we arrived here from different branches with different ++ * state or limits to sanitize, then this won't work. ++ */ ++ if (aux->alu_state && ++ (aux->alu_state != alu_state || ++ aux->alu_limit != alu_limit)) ++ return -EACCES; ++ ++ /* Corresponding fixup done in fixup_bpf_calls(). */ ++ aux->alu_state = alu_state; ++ aux->alu_limit = alu_limit; ++ return 0; ++} ++ ++static int sanitize_val_alu(struct bpf_verifier_env *env, ++ struct bpf_insn *insn) ++{ ++ struct bpf_insn_aux_data *aux = cur_aux(env); ++ ++ if (can_skip_alu_sanitation(env, insn)) ++ return 0; ++ ++ return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); ++} ++ + static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, +@@ -2051,7 +2085,7 @@ static int sanitize_ptr_alu(struct bpf_v + struct bpf_reg_state tmp; + bool ret; + +- if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) ++ if (can_skip_alu_sanitation(env, insn)) + return 0; + + /* We already marked aux for masking from non-speculative +@@ -2067,19 +2101,8 @@ static int sanitize_ptr_alu(struct bpf_v + + if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) + return 0; +- +- /* If we arrived here from different branches with different +- * limits to sanitize, then this won't work. +- */ +- if (aux->alu_state && +- (aux->alu_state != alu_state || +- aux->alu_limit != alu_limit)) ++ if (update_alu_sanitation_state(aux, alu_state, alu_limit)) + return -EACCES; +- +- /* Corresponding fixup done in fixup_bpf_calls(). */ +- aux->alu_state = alu_state; +- aux->alu_limit = alu_limit; +- + do_sim: + /* Simulate and find potential out-of-bounds access under + * speculative execution from truncation as a result of +@@ -2360,6 +2383,8 @@ static int adjust_scalar_min_max_vals(st + s64 smin_val, smax_val; + u64 umin_val, umax_val; + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; ++ u32 dst = insn->dst_reg; ++ int ret; + + if (insn_bitness == 32) { + /* Relevant for 32-bit RSH: Information can propagate towards +@@ -2394,6 +2419,11 @@ static int adjust_scalar_min_max_vals(st + + switch (opcode) { + case BPF_ADD: ++ ret = sanitize_val_alu(env, insn); ++ if (ret < 0) { ++ verbose("R%d tried to add from different pointers or scalars\n", dst); ++ return ret; ++ } + if (signed_add_overflows(dst_reg->smin_value, smin_val) || + signed_add_overflows(dst_reg->smax_value, smax_val)) { + dst_reg->smin_value = S64_MIN; +@@ -2413,6 +2443,11 @@ static int adjust_scalar_min_max_vals(st + dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); + break; + case BPF_SUB: ++ ret = sanitize_val_alu(env, insn); ++ if (ret < 0) { ++ verbose("R%d tried to sub from different pointers or scalars\n", dst); ++ return ret; ++ } + if (signed_sub_overflows(dst_reg->smin_value, smax_val) || + signed_sub_overflows(dst_reg->smax_value, smin_val)) { + /* Overflow possible, we know nothing */ diff --git a/queue-4.14/bpf-fix-sanitation-rewrite-in-case-of-non-pointers.patch b/queue-4.14/bpf-fix-sanitation-rewrite-in-case-of-non-pointers.patch new file mode 100644 index 00000000000..4d46043db04 --- /dev/null +++ b/queue-4.14/bpf-fix-sanitation-rewrite-in-case-of-non-pointers.patch @@ -0,0 +1,55 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:16 +0000 +Subject: bpf: fix sanitation rewrite in case of non-pointers +To: +Cc: , , , , Alexei Starovoitov , Balbir Singh +Message-ID: <20190403183917.13749-17-sblbir@amzn.com> + +From: Daniel Borkmann + +commit 3612af783cf52c74a031a2f11b82247b2599d3cd upstream. + +Marek reported that he saw an issue with the below snippet in that +timing measurements where off when loaded as unpriv while results +were reasonable when loaded as privileged: + + [...] + uint64_t a = bpf_ktime_get_ns(); + uint64_t b = bpf_ktime_get_ns(); + uint64_t delta = b - a; + if ((int64_t)delta > 0) { + [...] + +Turns out there is a bug where a corner case is missing in the fix +d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar +type from different paths"), namely fixup_bpf_calls() only checks +whether aux has a non-zero alu_state, but it also needs to test for +the case of BPF_ALU_NON_POINTER since in both occasions we need to +skip the masking rewrite (as there is nothing to mask). + +Fixes: d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar type from different paths") +Reported-by: Marek Majkowski +Reported-by: Arthur Fabre +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/netdev/CAJPywTJqP34cK20iLM5YmUMz9KXQOdu1-+BZrGMAGgLuBWz7fg@mail.gmail.com/T/ +Acked-by: Song Liu +Signed-off-by: Alexei Starovoitov +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4771,7 +4771,8 @@ static int fixup_bpf_calls(struct bpf_ve + u32 off_reg; + + aux = &env->insn_aux_data[i + delta]; +- if (!aux->alu_state) ++ if (!aux->alu_state || ++ aux->alu_state == BPF_ALU_NON_POINTER) + continue; + + isneg = aux->alu_state & BPF_ALU_NEG_VALUE; diff --git a/queue-4.14/bpf-fix-selftests-are-changes-for-cve-2019-7308.patch b/queue-4.14/bpf-fix-selftests-are-changes-for-cve-2019-7308.patch new file mode 100644 index 00000000000..6aec81af451 --- /dev/null +++ b/queue-4.14/bpf-fix-selftests-are-changes-for-cve-2019-7308.patch @@ -0,0 +1,61 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:17 +0000 +Subject: bpf: Fix selftests are changes for CVE 2019-7308 +To: +Cc: , , , , Balbir Singh +Message-ID: <20190403183917.13749-18-sblbir@amzn.com> + +From: Balbir Singh + +The error strings need to be updated, since they fail early. + +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/test_verifier.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -1860,6 +1860,7 @@ static struct bpf_test tests[] = { + }, + .result = REJECT, + .errstr = "invalid stack off=-79992 size=8", ++ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + }, + { + "PTR_TO_STACK store/load - out of bounds high", +@@ -2243,6 +2244,8 @@ static struct bpf_test tests[] = { + BPF_EXIT_INSN(), + }, + .result = ACCEPT, ++ .result_unpriv = REJECT, ++ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + }, + { + "unpriv: cmp of stack pointer", +@@ -7013,6 +7016,7 @@ static struct bpf_test tests[] = { + }, + .fixup_map1 = { 3 }, + .errstr = "pointer offset 1073741822", ++ .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .result = REJECT + }, + { +@@ -7034,6 +7038,7 @@ static struct bpf_test tests[] = { + }, + .fixup_map1 = { 3 }, + .errstr = "pointer offset -1073741822", ++ .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .result = REJECT + }, + { +@@ -7203,6 +7208,7 @@ static struct bpf_test tests[] = { + BPF_EXIT_INSN() + }, + .errstr = "fp pointer offset 1073741822", ++ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result = REJECT + }, + { diff --git a/queue-4.14/bpf-fix-stack-state-printing-in-verifier-log.patch b/queue-4.14/bpf-fix-stack-state-printing-in-verifier-log.patch new file mode 100644 index 00000000000..d56fd3b4b98 --- /dev/null +++ b/queue-4.14/bpf-fix-stack-state-printing-in-verifier-log.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:04 +0000 +Subject: bpf: fix stack state printing in verifier log +To: +Cc: , , , , Alexei Starovoitov , Alexei Starovoitov , Balbir Singh +Message-ID: <20190403183917.13749-5-sblbir@amzn.com> + +From: Alexei Starovoitov + +commit 12a3cc8424fe1237aaeb982dec4f0914ddd22f3e upstream. + +fix incorrect stack state prints in print_verifier_state() + +Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") +Signed-off-by: Alexei Starovoitov +Acked-by: John Fastabend +Acked-by: Daniel Borkmann +Signed-off-by: Daniel Borkmann +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -268,7 +268,7 @@ static void print_verifier_state(struct + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] == STACK_SPILL) + verbose(" fp%d=%s", +- -MAX_BPF_STACK + i * BPF_REG_SIZE, ++ (-i - 1) * BPF_REG_SIZE, + reg_type_str[state->stack[i].spilled_ptr.type]); + } + verbose("\n"); diff --git a/queue-4.14/bpf-fix-verifier-memory-leaks.patch b/queue-4.14/bpf-fix-verifier-memory-leaks.patch new file mode 100644 index 00000000000..db9171d423b --- /dev/null +++ b/queue-4.14/bpf-fix-verifier-memory-leaks.patch @@ -0,0 +1,115 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:02 +0000 +Subject: bpf: fix verifier memory leaks +To: +Cc: , , , , Alexei Starovoitov , "David S . Miller" , Balbir Singh +Message-ID: <20190403183917.13749-3-sblbir@amzn.com> + +From: Alexei Starovoitov + +commit 1969db47f8d0e800397abd4ee4e8d27d2b578587 upstream. + +fix verifier memory leaks + +Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") +Signed-off-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 27 ++++++++++++++++++--------- + 1 file changed, 18 insertions(+), 9 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -491,10 +491,12 @@ static int realloc_verifier_state(struct + return 0; + } + +-static void free_verifier_state(struct bpf_verifier_state *state) ++static void free_verifier_state(struct bpf_verifier_state *state, ++ bool free_self) + { + kfree(state->stack); +- kfree(state); ++ if (free_self) ++ kfree(state); + } + + /* copy verifier state from src to dst growing dst stack space +@@ -532,6 +534,7 @@ static int pop_stack(struct bpf_verifier + if (prev_insn_idx) + *prev_insn_idx = head->prev_insn_idx; + elem = head->next; ++ free_verifier_state(&head->st, false); + kfree(head); + env->head = elem; + env->stack_size--; +@@ -549,14 +552,14 @@ static struct bpf_verifier_state *push_s + if (!elem) + goto err; + +- err = copy_verifier_state(&elem->st, cur); +- if (err) +- return NULL; + elem->insn_idx = insn_idx; + elem->prev_insn_idx = prev_insn_idx; + elem->next = env->head; + env->head = elem; + env->stack_size++; ++ err = copy_verifier_state(&elem->st, cur); ++ if (err) ++ goto err; + if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { + verbose("BPF program is too complex\n"); + goto err; +@@ -3812,7 +3815,7 @@ static int is_state_visited(struct bpf_v + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; + struct bpf_verifier_state *cur = env->cur_state; +- int i; ++ int i, err; + + sl = env->explored_states[insn_idx]; + if (!sl) +@@ -3850,7 +3853,12 @@ static int is_state_visited(struct bpf_v + return -ENOMEM; + + /* add new state to the head of linked list */ +- copy_verifier_state(&new_sl->state, cur); ++ err = copy_verifier_state(&new_sl->state, cur); ++ if (err) { ++ free_verifier_state(&new_sl->state, false); ++ kfree(new_sl); ++ return err; ++ } + new_sl->next = env->explored_states[insn_idx]; + env->explored_states[insn_idx] = new_sl; + /* connect new state to parentage chain */ +@@ -4692,6 +4700,7 @@ static void free_states(struct bpf_verif + if (sl) + while (sl != STATE_LIST_MARK) { + sln = sl->next; ++ free_verifier_state(&sl->state, false); + kfree(sl); + sl = sln; + } +@@ -4768,7 +4777,7 @@ int bpf_check(struct bpf_prog **prog, un + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); +- free_verifier_state(env->cur_state); ++ free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + + skip_full_check: +@@ -4878,7 +4887,7 @@ int bpf_analyzer(struct bpf_prog *prog, + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); +- free_verifier_state(env->cur_state); ++ free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + + skip_full_check: diff --git a/queue-4.14/bpf-fix-verifier-null-pointer-dereference.patch b/queue-4.14/bpf-fix-verifier-null-pointer-dereference.patch new file mode 100644 index 00000000000..e8e74758b28 --- /dev/null +++ b/queue-4.14/bpf-fix-verifier-null-pointer-dereference.patch @@ -0,0 +1,95 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:03 +0000 +Subject: bpf: fix verifier NULL pointer dereference +To: +Cc: , , , , Craig Gallek , "David S . Miller" , Balbir Singh +Message-ID: <20190403183917.13749-4-sblbir@amzn.com> + +From: Craig Gallek + +commit 8c01c4f896aa3404af948880dcb29a2d51c833dc upstream. + +do_check() can fail early without allocating env->cur_state under +memory pressure. Syzkaller found the stack below on the linux-next +tree because of this. + + kasan: CONFIG_KASAN_INLINE enabled + kasan: GPF could be caused by NULL-ptr deref or user memory access + general protection fault: 0000 [#1] SMP KASAN + Dumping ftrace buffer: + (ftrace buffer empty) + Modules linked in: + CPU: 1 PID: 27062 Comm: syz-executor5 Not tainted 4.14.0-rc7+ #106 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + task: ffff8801c2c74700 task.stack: ffff8801c3e28000 + RIP: 0010:free_verifier_state kernel/bpf/verifier.c:347 [inline] + RIP: 0010:bpf_check+0xcf4/0x19c0 kernel/bpf/verifier.c:4533 + RSP: 0018:ffff8801c3e2f5c8 EFLAGS: 00010202 + RAX: dffffc0000000000 RBX: 00000000fffffff4 RCX: 0000000000000000 + RDX: 0000000000000070 RSI: ffffffff817d5aa9 RDI: 0000000000000380 + RBP: ffff8801c3e2f668 R08: 0000000000000000 R09: 1ffff100387c5d9f + R10: 00000000218c4e80 R11: ffffffff85b34380 R12: ffff8801c4dc6a28 + R13: 0000000000000000 R14: ffff8801c4dc6a00 R15: ffff8801c4dc6a20 + FS: 00007f311079b700(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00000000004d4a24 CR3: 00000001cbcd0000 CR4: 00000000001406e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + Call Trace: + bpf_prog_load+0xcbb/0x18e0 kernel/bpf/syscall.c:1166 + SYSC_bpf kernel/bpf/syscall.c:1690 [inline] + SyS_bpf+0xae9/0x4620 kernel/bpf/syscall.c:1652 + entry_SYSCALL_64_fastpath+0x1f/0xbe + RIP: 0033:0x452869 + RSP: 002b:00007f311079abe8 EFLAGS: 00000212 ORIG_RAX: 0000000000000141 + RAX: ffffffffffffffda RBX: 0000000000758020 RCX: 0000000000452869 + RDX: 0000000000000030 RSI: 0000000020168000 RDI: 0000000000000005 + RBP: 00007f311079aa20 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000212 R12: 00000000004b7550 + R13: 00007f311079ab58 R14: 00000000004b7560 R15: 0000000000000000 + Code: df 48 c1 ea 03 80 3c 02 00 0f 85 e6 0b 00 00 4d 8b 6e 20 48 b8 00 00 00 00 00 fc ff df 49 8d bd 80 03 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 b6 0b 00 00 49 8b bd 80 03 00 00 e8 d6 0c 26 + RIP: free_verifier_state kernel/bpf/verifier.c:347 [inline] RSP: ffff8801c3e2f5c8 + RIP: bpf_check+0xcf4/0x19c0 kernel/bpf/verifier.c:4533 RSP: ffff8801c3e2f5c8 + ---[ end trace c8d37f339dc64004 ]--- + +Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") +Fixes: 1969db47f8d0 ("bpf: fix verifier memory leaks") +Signed-off-by: Craig Gallek +Acked-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4777,8 +4777,10 @@ int bpf_check(struct bpf_prog **prog, un + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); +- free_verifier_state(env->cur_state, true); +- env->cur_state = NULL; ++ if (env->cur_state) { ++ free_verifier_state(env->cur_state, true); ++ env->cur_state = NULL; ++ } + + skip_full_check: + while (!pop_stack(env, NULL, NULL)); +@@ -4887,8 +4889,10 @@ int bpf_analyzer(struct bpf_prog *prog, + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); +- free_verifier_state(env->cur_state, true); +- env->cur_state = NULL; ++ if (env->cur_state) { ++ free_verifier_state(env->cur_state, true); ++ env->cur_state = NULL; ++ } + + skip_full_check: + while (!pop_stack(env, NULL, NULL)); diff --git a/queue-4.14/bpf-move-prev_-insn_idx-into-verifier-env.patch b/queue-4.14/bpf-move-prev_-insn_idx-into-verifier-env.patch new file mode 100644 index 00000000000..a9a97079033 --- /dev/null +++ b/queue-4.14/bpf-move-prev_-insn_idx-into-verifier-env.patch @@ -0,0 +1,245 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:05 +0000 +Subject: bpf: move {prev_,}insn_idx into verifier env +To: +Cc: , , , , Alexei Starovoitov , "Vallish Vaidyeshwara" , Balbir Singh +Message-ID: <20190403183917.13749-6-sblbir@amzn.com> + +From: Daniel Borkmann + +commit c08435ec7f2bc8f4109401f696fd55159b4b40cb upstream. + +Move prev_insn_idx and insn_idx from the do_check() function into +the verifier environment, so they can be read inside the various +helper functions for handling the instructions. It's easier to put +this into the environment rather than changing all call-sites only +to pass it along. insn_idx is useful in particular since this later +on allows to hold state in env->insn_aux_data[env->insn_idx]. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Vallish Vaidyeshwara +[Backported to 4.14 by sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 2 + + kernel/bpf/verifier.c | 64 ++++++++++++++++++++----------------------- + 2 files changed, 33 insertions(+), 33 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -134,6 +134,8 @@ struct bpf_ext_analyzer_ops { + * one verifier_env per bpf_check() call + */ + struct bpf_verifier_env { ++ u32 insn_idx; ++ u32 prev_insn_idx; + struct bpf_prog *prog; /* eBPF program being verified */ + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3892,7 +3892,6 @@ static int do_check(struct bpf_verifier_ + struct bpf_insn *insns = env->prog->insnsi; + struct bpf_reg_state *regs; + int insn_cnt = env->prog->len; +- int insn_idx, prev_insn_idx = 0; + int insn_processed = 0; + bool do_print_state = false; + +@@ -3902,19 +3901,18 @@ static int do_check(struct bpf_verifier_ + env->cur_state = state; + init_reg_state(state->regs); + state->parent = NULL; +- insn_idx = 0; + for (;;) { + struct bpf_insn *insn; + u8 class; + int err; + +- if (insn_idx >= insn_cnt) { ++ if (env->insn_idx >= insn_cnt) { + verbose("invalid insn idx %d insn_cnt %d\n", +- insn_idx, insn_cnt); ++ env->insn_idx, insn_cnt); + return -EFAULT; + } + +- insn = &insns[insn_idx]; ++ insn = &insns[env->insn_idx]; + class = BPF_CLASS(insn->code); + + if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { +@@ -3923,7 +3921,7 @@ static int do_check(struct bpf_verifier_ + return -E2BIG; + } + +- err = is_state_visited(env, insn_idx); ++ err = is_state_visited(env, env->insn_idx); + if (err < 0) + return err; + if (err == 1) { +@@ -3931,9 +3929,9 @@ static int do_check(struct bpf_verifier_ + if (log_level) { + if (do_print_state) + verbose("\nfrom %d to %d: safe\n", +- prev_insn_idx, insn_idx); ++ env->prev_insn_idx, env->insn_idx); + else +- verbose("%d: safe\n", insn_idx); ++ verbose("%d: safe\n", env->insn_idx); + } + goto process_bpf_exit; + } +@@ -3943,25 +3941,25 @@ static int do_check(struct bpf_verifier_ + + if (log_level > 1 || (log_level && do_print_state)) { + if (log_level > 1) +- verbose("%d:", insn_idx); ++ verbose("%d:", env->insn_idx); + else + verbose("\nfrom %d to %d:", +- prev_insn_idx, insn_idx); ++ env->prev_insn_idx, env->insn_idx); + print_verifier_state(env->cur_state); + do_print_state = false; + } + + if (log_level) { +- verbose("%d: ", insn_idx); ++ verbose("%d: ", env->insn_idx); + print_bpf_insn(env, insn); + } + +- err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); ++ err = ext_analyzer_insn_hook(env, env->insn_idx, env->prev_insn_idx); + if (err) + return err; + + regs = cur_regs(env); +- env->insn_aux_data[insn_idx].seen = true; ++ env->insn_aux_data[env->insn_idx].seen = true; + if (class == BPF_ALU || class == BPF_ALU64) { + err = check_alu_op(env, insn); + if (err) +@@ -3986,13 +3984,13 @@ static int do_check(struct bpf_verifier_ + /* check that memory (src_reg + off) is readable, + * the state of dst_reg will be updated by this func + */ +- err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, +- BPF_SIZE(insn->code), BPF_READ, +- insn->dst_reg, false); ++ err = check_mem_access(env, env->insn_idx, insn->src_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_READ, insn->dst_reg, false); + if (err) + return err; + +- prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; ++ prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; + + if (*prev_src_type == NOT_INIT) { + /* saw a valid insn +@@ -4019,10 +4017,10 @@ static int do_check(struct bpf_verifier_ + enum bpf_reg_type *prev_dst_type, dst_reg_type; + + if (BPF_MODE(insn->code) == BPF_XADD) { +- err = check_xadd(env, insn_idx, insn); ++ err = check_xadd(env, env->insn_idx, insn); + if (err) + return err; +- insn_idx++; ++ env->insn_idx++; + continue; + } + +@@ -4038,13 +4036,13 @@ static int do_check(struct bpf_verifier_ + dst_reg_type = regs[insn->dst_reg].type; + + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_WRITE, +- insn->src_reg, false); ++ err = check_mem_access(env, env->insn_idx, insn->dst_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_WRITE, insn->src_reg, false); + if (err) + return err; + +- prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; ++ prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; + + if (*prev_dst_type == NOT_INIT) { + *prev_dst_type = dst_reg_type; +@@ -4073,9 +4071,9 @@ static int do_check(struct bpf_verifier_ + } + + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_WRITE, +- -1, false); ++ err = check_mem_access(env, env->insn_idx, insn->dst_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_WRITE, -1, false); + if (err) + return err; + +@@ -4091,7 +4089,7 @@ static int do_check(struct bpf_verifier_ + return -EINVAL; + } + +- err = check_call(env, insn->imm, insn_idx); ++ err = check_call(env, insn->imm, env->insn_idx); + if (err) + return err; + +@@ -4104,7 +4102,7 @@ static int do_check(struct bpf_verifier_ + return -EINVAL; + } + +- insn_idx += insn->off + 1; ++ env->insn_idx += insn->off + 1; + continue; + + } else if (opcode == BPF_EXIT) { +@@ -4132,7 +4130,7 @@ static int do_check(struct bpf_verifier_ + } + + process_bpf_exit: +- err = pop_stack(env, &prev_insn_idx, &insn_idx); ++ err = pop_stack(env, &env->prev_insn_idx, &env->insn_idx); + if (err < 0) { + if (err != -ENOENT) + return err; +@@ -4142,7 +4140,7 @@ process_bpf_exit: + continue; + } + } else { +- err = check_cond_jmp_op(env, insn, &insn_idx); ++ err = check_cond_jmp_op(env, insn, &env->insn_idx); + if (err) + return err; + } +@@ -4159,8 +4157,8 @@ process_bpf_exit: + if (err) + return err; + +- insn_idx++; +- env->insn_aux_data[insn_idx].seen = true; ++ env->insn_idx++; ++ env->insn_aux_data[env->insn_idx].seen = true; + } else { + verbose("invalid BPF_LD mode\n"); + return -EINVAL; +@@ -4170,7 +4168,7 @@ process_bpf_exit: + return -EINVAL; + } + +- insn_idx++; ++ env->insn_idx++; + } + + verbose("processed %d insns, stack depth %d\n", diff --git a/queue-4.14/bpf-move-tmp-variable-into-ax-register-in-interpreter.patch b/queue-4.14/bpf-move-tmp-variable-into-ax-register-in-interpreter.patch new file mode 100644 index 00000000000..247fa3a4757 --- /dev/null +++ b/queue-4.14/bpf-move-tmp-variable-into-ax-register-in-interpreter.patch @@ -0,0 +1,116 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:06 +0000 +Subject: bpf: move tmp variable into ax register in interpreter +To: +Cc: , , , , Alexei Starovoitov , Balbir Singh +Message-ID: <20190403183917.13749-7-sblbir@amzn.com> + +From: Daniel Borkmann + +commit 144cd91c4c2bced6eb8a7e25e590f6618a11e854 upstream. + +This change moves the on-stack 64 bit tmp variable in ___bpf_prog_run() +into the hidden ax register. The latter is currently only used in JITs +for constant blinding as a temporary scratch register, meaning the BPF +interpreter will never see the use of ax. Therefore it is safe to use +it for the cases where tmp has been used earlier. This is needed to later +on allow restricted hidden use of ax in both interpreter and JITs. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +[backported to 4.14 sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/filter.h | 3 ++- + kernel/bpf/core.c | 31 ++++++++++++++++--------------- + 2 files changed, 18 insertions(+), 16 deletions(-) + +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -53,7 +53,8 @@ struct bpf_prog_aux; + * constants. See JIT pre-step in bpf_jit_blind_constants(). + */ + #define BPF_REG_AX MAX_BPF_REG +-#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) ++#define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) ++#define MAX_BPF_JIT_REG MAX_BPF_EXT_REG + + /* unused opcode to mark special call to bpf_tail_call() helper */ + #define BPF_TAIL_CALL 0xf0 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -51,6 +51,7 @@ + #define DST regs[insn->dst_reg] + #define SRC regs[insn->src_reg] + #define FP regs[BPF_REG_FP] ++#define AX regs[BPF_REG_AX] + #define ARG1 regs[BPF_REG_ARG1] + #define CTX regs[BPF_REG_CTX] + #define IMM insn->imm +@@ -939,22 +940,22 @@ select_insn: + ALU64_MOD_X: + if (unlikely(SRC == 0)) + return 0; +- div64_u64_rem(DST, SRC, &tmp); +- DST = tmp; ++ div64_u64_rem(DST, SRC, &AX); ++ DST = AX; + CONT; + ALU_MOD_X: + if (unlikely((u32)SRC == 0)) + return 0; +- tmp = (u32) DST; +- DST = do_div(tmp, (u32) SRC); ++ AX = (u32) DST; ++ DST = do_div(AX, (u32) SRC); + CONT; + ALU64_MOD_K: +- div64_u64_rem(DST, IMM, &tmp); +- DST = tmp; ++ div64_u64_rem(DST, IMM, &AX); ++ DST = AX; + CONT; + ALU_MOD_K: +- tmp = (u32) DST; +- DST = do_div(tmp, (u32) IMM); ++ AX = (u32) DST; ++ DST = do_div(AX, (u32) IMM); + CONT; + ALU64_DIV_X: + if (unlikely(SRC == 0)) +@@ -964,17 +965,17 @@ select_insn: + ALU_DIV_X: + if (unlikely((u32)SRC == 0)) + return 0; +- tmp = (u32) DST; +- do_div(tmp, (u32) SRC); +- DST = (u32) tmp; ++ AX = (u32) DST; ++ do_div(AX, (u32) SRC); ++ DST = (u32) AX; + CONT; + ALU64_DIV_K: + DST = div64_u64(DST, IMM); + CONT; + ALU_DIV_K: +- tmp = (u32) DST; +- do_div(tmp, (u32) IMM); +- DST = (u32) tmp; ++ AX = (u32) DST; ++ do_div(AX, (u32) IMM); ++ DST = (u32) AX; + CONT; + ALU_END_TO_BE: + switch (IMM) { +@@ -1278,7 +1279,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run + static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ + { \ + u64 stack[stack_size / sizeof(u64)]; \ +- u64 regs[MAX_BPF_REG]; \ ++ u64 regs[MAX_BPF_EXT_REG]; \ + \ + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ + ARG1 = (u64) (unsigned long) ctx; \ diff --git a/queue-4.14/bpf-prevent-out-of-bounds-speculation-on-pointer-arithmetic.patch b/queue-4.14/bpf-prevent-out-of-bounds-speculation-on-pointer-arithmetic.patch new file mode 100644 index 00000000000..19cbb1576a4 --- /dev/null +++ b/queue-4.14/bpf-prevent-out-of-bounds-speculation-on-pointer-arithmetic.patch @@ -0,0 +1,598 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:12 +0000 +Subject: bpf: prevent out of bounds speculation on pointer arithmetic +To: +Cc: , , , , Alexei Starovoitov , "Vallish Vaidyeshwara" , Balbir Singh +Message-ID: <20190403183917.13749-13-sblbir@amzn.com> + +From: Daniel Borkmann + +commit 979d63d50c0c0f7bc537bf821e056cc9fe5abd38 upstream. + +Jann reported that the original commit back in b2157399cc98 +("bpf: prevent out-of-bounds speculation") was not sufficient +to stop CPU from speculating out of bounds memory access: +While b2157399cc98 only focussed on masking array map access +for unprivileged users for tail calls and data access such +that the user provided index gets sanitized from BPF program +and syscall side, there is still a more generic form affected +from BPF programs that applies to most maps that hold user +data in relation to dynamic map access when dealing with +unknown scalars or "slow" known scalars as access offset, for +example: + + - Load a map value pointer into R6 + - Load an index into R7 + - Do a slow computation (e.g. with a memory dependency) that + loads a limit into R8 (e.g. load the limit from a map for + high latency, then mask it to make the verifier happy) + - Exit if R7 >= R8 (mispredicted branch) + - Load R0 = R6[R7] + - Load R0 = R6[R0] + +For unknown scalars there are two options in the BPF verifier +where we could derive knowledge from in order to guarantee +safe access to the memory: i) While /<=/>= variants won't +allow to derive any lower or upper bounds from the unknown +scalar where it would be safe to add it to the map value +pointer, it is possible through ==/!= test however. ii) another +option is to transform the unknown scalar into a known scalar, +for example, through ALU ops combination such as R &= +followed by R |= or any similar combination where the +original information from the unknown scalar would be destroyed +entirely leaving R with a constant. The initial slow load still +precedes the latter ALU ops on that register, so the CPU +executes speculatively from that point. Once we have the known +scalar, any compare operation would work then. A third option +only involving registers with known scalars could be crafted +as described in [0] where a CPU port (e.g. Slow Int unit) +would be filled with many dependent computations such that +the subsequent condition depending on its outcome has to wait +for evaluation on its execution port and thereby executing +speculatively if the speculated code can be scheduled on a +different execution port, or any other form of mistraining +as described in [1], for example. Given this is not limited +to only unknown scalars, not only map but also stack access +is affected since both is accessible for unprivileged users +and could potentially be used for out of bounds access under +speculation. + +In order to prevent any of these cases, the verifier is now +sanitizing pointer arithmetic on the offset such that any +out of bounds speculation would be masked in a way where the +pointer arithmetic result in the destination register will +stay unchanged, meaning offset masked into zero similar as +in array_index_nospec() case. With regards to implementation, +there are three options that were considered: i) new insn +for sanitation, ii) push/pop insn and sanitation as inlined +BPF, iii) reuse of ax register and sanitation as inlined BPF. + +Option i) has the downside that we end up using from reserved +bits in the opcode space, but also that we would require +each JIT to emit masking as native arch opcodes meaning +mitigation would have slow adoption till everyone implements +it eventually which is counter-productive. Option ii) and iii) +have both in common that a temporary register is needed in +order to implement the sanitation as inlined BPF since we +are not allowed to modify the source register. While a push / +pop insn in ii) would be useful to have in any case, it +requires once again that every JIT needs to implement it +first. While possible, amount of changes needed would also +be unsuitable for a -stable patch. Therefore, the path which +has fewer changes, less BPF instructions for the mitigation +and does not require anything to be changed in the JITs is +option iii) which this work is pursuing. The ax register is +already mapped to a register in all JITs (modulo arm32 where +it's mapped to stack as various other BPF registers there) +and used in constant blinding for JITs-only so far. It can +be reused for verifier rewrites under certain constraints. +The interpreter's tmp "register" has therefore been remapped +into extending the register set with hidden ax register and +reusing that for a number of instructions that needed the +prior temporary variable internally (e.g. div, mod). This +allows for zero increase in stack space usage in the interpreter, +and enables (restricted) generic use in rewrites otherwise as +long as such a patchlet does not make use of these instructions. +The sanitation mask is dynamic and relative to the offset the +map value or stack pointer currently holds. + +There are various cases that need to be taken under consideration +for the masking, e.g. such operation could look as follows: +ptr += val or val += ptr or ptr -= val. Thus, the value to be +sanitized could reside either in source or in destination +register, and the limit is different depending on whether +the ALU op is addition or subtraction and depending on the +current known and bounded offset. The limit is derived as +follows: limit := max_value_size - (smin_value + off). For +subtraction: limit := umax_value + off. This holds because +we do not allow any pointer arithmetic that would +temporarily go out of bounds or would have an unknown +value with mixed signed bounds where it is unclear at +verification time whether the actual runtime value would +be either negative or positive. For example, we have a +derived map pointer value with constant offset and bounded +one, so limit based on smin_value works because the verifier +requires that statically analyzed arithmetic on the pointer +must be in bounds, and thus it checks if resulting +smin_value + off and umax_value + off is still within map +value bounds at time of arithmetic in addition to time of +access. Similarly, for the case of stack access we derive +the limit as follows: MAX_BPF_STACK + off for subtraction +and -off for the case of addition where off := ptr_reg->off + +ptr_reg->var_off.value. Subtraction is a special case for +the masking which can be in form of ptr += -val, ptr -= -val, +or ptr -= val. In the first two cases where we know that +the value is negative, we need to temporarily negate the +value in order to do the sanitation on a positive value +where we later swap the ALU op, and restore original source +register if the value was in source. + +The sanitation of pointer arithmetic alone is still not fully +sufficient as is, since a scenario like the following could +happen ... + + PTR += 0x1000 (e.g. K-based imm) + PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON + PTR += 0x1000 + PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON + [...] + +... which under speculation could end up as ... + + PTR += 0x1000 + PTR -= 0 [ truncated by mitigation ] + PTR += 0x1000 + PTR -= 0 [ truncated by mitigation ] + [...] + +... and therefore still access out of bounds. To prevent such +case, the verifier is also analyzing safety for potential out +of bounds access under speculative execution. Meaning, it is +also simulating pointer access under truncation. We therefore +"branch off" and push the current verification state after the +ALU operation with known 0 to the verification stack for later +analysis. Given the current path analysis succeeded it is +likely that the one under speculation can be pruned. In any +case, it is also subject to existing complexity limits and +therefore anything beyond this point will be rejected. In +terms of pruning, it needs to be ensured that the verification +state from speculative execution simulation must never prune +a non-speculative execution path, therefore, we mark verifier +state accordingly at the time of push_stack(). If verifier +detects out of bounds access under speculative execution from +one of the possible paths that includes a truncation, it will +reject such program. + +Given we mask every reg-based pointer arithmetic for +unprivileged programs, we've been looking into how it could +affect real-world programs in terms of size increase. As the +majority of programs are targeted for privileged-only use +case, we've unconditionally enabled masking (with its alu +restrictions on top of it) for privileged programs for the +sake of testing in order to check i) whether they get rejected +in its current form, and ii) by how much the number of +instructions and size will increase. We've tested this by +using Katran, Cilium and test_l4lb from the kernel selftests. +For Katran we've evaluated balancer_kern.o, Cilium bpf_lxc.o +and an older test object bpf_lxc_opt_-DUNKNOWN.o and l4lb +we've used test_l4lb.o as well as test_l4lb_noinline.o. We +found that none of the programs got rejected by the verifier +with this change, and that impact is rather minimal to none. +balancer_kern.o had 13,904 bytes (1,738 insns) xlated and +7,797 bytes JITed before and after the change. Most complex +program in bpf_lxc.o had 30,544 bytes (3,817 insns) xlated +and 18,538 bytes JITed before and after and none of the other +tail call programs in bpf_lxc.o had any changes either. For +the older bpf_lxc_opt_-DUNKNOWN.o object we found a small +increase from 20,616 bytes (2,576 insns) and 12,536 bytes JITed +before to 20,664 bytes (2,582 insns) and 12,558 bytes JITed +after the change. Other programs from that object file had +similar small increase. Both test_l4lb.o had no change and +remained at 6,544 bytes (817 insns) xlated and 3,401 bytes +JITed and for test_l4lb_noinline.o constant at 5,080 bytes +(634 insns) xlated and 3,313 bytes JITed. This can be explained +in that LLVM typically optimizes stack based pointer arithmetic +by using K-based operations and that use of dynamic map access +is not overly frequent. However, in future we may decide to +optimize the algorithm further under known guarantees from +branch and value speculation. Latter seems also unclear in +terms of prediction heuristics that today's CPUs apply as well +as whether there could be collisions in e.g. the predictor's +Value History/Pattern Table for triggering out of bounds access, +thus masking is performed unconditionally at this point but could +be subject to relaxation later on. We were generally also +brainstorming various other approaches for mitigation, but the +blocker was always lack of available registers at runtime and/or +overhead for runtime tracking of limits belonging to a specific +pointer. Thus, we found this to be minimally intrusive under +given constraints. + +With that in place, a simple example with sanitized access on +unprivileged load at post-verification time looks as follows: + + # bpftool prog dump xlated id 282 + [...] + 28: (79) r1 = *(u64 *)(r7 +0) + 29: (79) r2 = *(u64 *)(r7 +8) + 30: (57) r1 &= 15 + 31: (79) r3 = *(u64 *)(r0 +4608) + 32: (57) r3 &= 1 + 33: (47) r3 |= 1 + 34: (2d) if r2 > r3 goto pc+19 + 35: (b4) (u32) r11 = (u32) 20479 | + 36: (1f) r11 -= r2 | Dynamic sanitation for pointer + 37: (4f) r11 |= r2 | arithmetic with registers + 38: (87) r11 = -r11 | containing bounded or known + 39: (c7) r11 s>>= 63 | scalars in order to prevent + 40: (5f) r11 &= r2 | out of bounds speculation. + 41: (0f) r4 += r11 | + 42: (71) r4 = *(u8 *)(r4 +0) + 43: (6f) r4 <<= r1 + [...] + +For the case where the scalar sits in the destination register +as opposed to the source register, the following code is emitted +for the above example: + + [...] + 16: (b4) (u32) r11 = (u32) 20479 + 17: (1f) r11 -= r2 + 18: (4f) r11 |= r2 + 19: (87) r11 = -r11 + 20: (c7) r11 s>>= 63 + 21: (5f) r2 &= r11 + 22: (0f) r2 += r0 + 23: (61) r0 = *(u32 *)(r2 +0) + [...] + +JIT blinding example with non-conflicting use of r10: + + [...] + d5: je 0x0000000000000106 _ + d7: mov 0x0(%rax),%edi | + da: mov $0xf153246,%r10d | Index load from map value and + e0: xor $0xf153259,%r10 | (const blinded) mask with 0x1f. + e7: and %r10,%rdi |_ + ea: mov $0x2f,%r10d | + f0: sub %rdi,%r10 | Sanitized addition. Both use r10 + f3: or %rdi,%r10 | but do not interfere with each + f6: neg %r10 | other. (Neither do these instructions + f9: sar $0x3f,%r10 | interfere with the use of ax as temp + fd: and %r10,%rdi | in interpreter.) + 100: add %rax,%rdi |_ + 103: mov 0x0(%rdi),%eax + [...] + +Tested that it fixes Jann's reproducer, and also checked that test_verifier +and test_progs suite with interpreter, JIT and JIT with hardening enabled +on x86-64 and arm64 runs successfully. + + [0] Speculose: Analyzing the Security Implications of Speculative + Execution in CPUs, Giorgi Maisuradze and Christian Rossow, + https://arxiv.org/pdf/1801.04084.pdf + + [1] A Systematic Evaluation of Transient Execution Attacks and + Defenses, Claudio Canella, Jo Van Bulck, Michael Schwarz, + Moritz Lipp, Benjamin von Berg, Philipp Ortner, Frank Piessens, + Dmitry Evtyushkin, Daniel Gruss, + https://arxiv.org/pdf/1811.05441.pdf + +Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") +Reported-by: Jann Horn +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Vallish Vaidyeshwara +[some checkpatch cleanups and backported to 4.14 by sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 10 ++ + kernel/bpf/verifier.c | 184 +++++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 188 insertions(+), 6 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -104,6 +104,7 @@ struct bpf_verifier_state { + struct bpf_verifier_state *parent; + int allocated_stack; + struct bpf_stack_state *stack; ++ bool speculative; + }; + + /* linked list of verifier states used to prune search */ +@@ -112,14 +113,23 @@ struct bpf_verifier_state_list { + struct bpf_verifier_state_list *next; + }; + ++/* Possible states for alu_state member. */ ++#define BPF_ALU_SANITIZE_SRC 1U ++#define BPF_ALU_SANITIZE_DST 2U ++#define BPF_ALU_NEG_VALUE (1U << 2) ++#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ ++ BPF_ALU_SANITIZE_DST) ++ + struct bpf_insn_aux_data { + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ ++ u32 alu_limit; /* limit for add/sub register with pointer */ + }; + int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int sanitize_stack_off; /* stack slot to be cleared */ + bool seen; /* this insn was processed by the verifier */ ++ u8 alu_state; /* used in combination with alu_limit */ + }; + + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -542,7 +542,8 @@ static int pop_stack(struct bpf_verifier + } + + static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, +- int insn_idx, int prev_insn_idx) ++ int insn_idx, int prev_insn_idx, ++ bool speculative) + { + struct bpf_verifier_stack_elem *elem; + struct bpf_verifier_state *cur = env->cur_state; +@@ -555,6 +556,7 @@ static struct bpf_verifier_state *push_s + elem->insn_idx = insn_idx; + elem->prev_insn_idx = prev_insn_idx; + elem->next = env->head; ++ elem->st.speculative |= speculative; + env->head = elem; + env->stack_size++; + err = copy_verifier_state(&elem->st, cur); +@@ -2002,6 +2004,102 @@ static bool check_reg_sane_offset(struct + return true; + } + ++static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) ++{ ++ return &env->insn_aux_data[env->insn_idx]; ++} ++ ++static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, ++ u32 *ptr_limit, u8 opcode, bool off_is_neg) ++{ ++ bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || ++ (opcode == BPF_SUB && !off_is_neg); ++ u32 off; ++ ++ switch (ptr_reg->type) { ++ case PTR_TO_STACK: ++ off = ptr_reg->off + ptr_reg->var_off.value; ++ if (mask_to_left) ++ *ptr_limit = MAX_BPF_STACK + off; ++ else ++ *ptr_limit = -off; ++ return 0; ++ case PTR_TO_MAP_VALUE: ++ if (mask_to_left) { ++ *ptr_limit = ptr_reg->umax_value + ptr_reg->off; ++ } else { ++ off = ptr_reg->smin_value + ptr_reg->off; ++ *ptr_limit = ptr_reg->map_ptr->value_size - off; ++ } ++ return 0; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int sanitize_ptr_alu(struct bpf_verifier_env *env, ++ struct bpf_insn *insn, ++ const struct bpf_reg_state *ptr_reg, ++ struct bpf_reg_state *dst_reg, ++ bool off_is_neg) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_insn_aux_data *aux = cur_aux(env); ++ bool ptr_is_dst_reg = ptr_reg == dst_reg; ++ u8 opcode = BPF_OP(insn->code); ++ u32 alu_state, alu_limit; ++ struct bpf_reg_state tmp; ++ bool ret; ++ ++ if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) ++ return 0; ++ ++ /* We already marked aux for masking from non-speculative ++ * paths, thus we got here in the first place. We only care ++ * to explore bad access from here. ++ */ ++ if (vstate->speculative) ++ goto do_sim; ++ ++ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; ++ alu_state |= ptr_is_dst_reg ? ++ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; ++ ++ if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) ++ return 0; ++ ++ /* If we arrived here from different branches with different ++ * limits to sanitize, then this won't work. ++ */ ++ if (aux->alu_state && ++ (aux->alu_state != alu_state || ++ aux->alu_limit != alu_limit)) ++ return -EACCES; ++ ++ /* Corresponding fixup done in fixup_bpf_calls(). */ ++ aux->alu_state = alu_state; ++ aux->alu_limit = alu_limit; ++ ++do_sim: ++ /* Simulate and find potential out-of-bounds access under ++ * speculative execution from truncation as a result of ++ * masking when off was not within expected range. If off ++ * sits in dst, then we temporarily need to move ptr there ++ * to simulate dst (== 0) +/-= ptr. Needed, for example, ++ * for cases where we use K-based arithmetic in one direction ++ * and truncated reg-based in the other in order to explore ++ * bad access. ++ */ ++ if (!ptr_is_dst_reg) { ++ tmp = *dst_reg; ++ *dst_reg = *ptr_reg; ++ } ++ ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); ++ if (!ptr_is_dst_reg) ++ *dst_reg = tmp; ++ return !ret ? -EFAULT : 0; ++} ++ + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. + * Caller should also handle BPF_MOV case separately. + * If we return -EACCES, caller may want to try again treating pointer as a +@@ -2020,6 +2118,7 @@ static int adjust_ptr_min_max_vals(struc + umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + u32 dst = insn->dst_reg, src = insn->src_reg; + u8 opcode = BPF_OP(insn->code); ++ int ret; + + dst_reg = ®s[dst]; + +@@ -2071,6 +2170,11 @@ static int adjust_ptr_min_max_vals(struc + + switch (opcode) { + case BPF_ADD: ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); ++ if (ret < 0) { ++ verbose("R%d tried to add from different maps or paths\n", dst); ++ return ret; ++ } + /* We can take a fixed offset as long as it doesn't overflow + * the s32 'off' field + */ +@@ -2121,6 +2225,11 @@ static int adjust_ptr_min_max_vals(struc + } + break; + case BPF_SUB: ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); ++ if (ret < 0) { ++ verbose("R%d tried to sub from different maps or paths\n", dst); ++ return ret; ++ } + if (dst_reg == off_reg) { + /* scalar -= pointer. Creates an unknown scalar */ + if (!env->allow_ptr_leaks) +@@ -3132,7 +3241,8 @@ static int check_cond_jmp_op(struct bpf_ + } + } + +- other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); ++ other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, ++ false); + if (!other_branch) + return -EFAULT; + +@@ -3767,6 +3877,12 @@ static bool states_equal(struct bpf_veri + bool ret = false; + int i; + ++ /* Verification state from speculative execution simulation ++ * must never prune a non-speculative execution one. ++ */ ++ if (old->speculative && !cur->speculative) ++ return false; ++ + idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); + /* If we failed to allocate the idmap, just say it's not safe */ + if (!idmap) +@@ -3970,8 +4086,10 @@ static int do_check(struct bpf_verifier_ + /* found equivalent state, can prune the search */ + if (log_level) { + if (do_print_state) +- verbose("\nfrom %d to %d: safe\n", +- env->prev_insn_idx, env->insn_idx); ++ verbose("\nfrom %d to %d%s: safe\n", ++ env->prev_insn_idx, env->insn_idx, ++ env->cur_state->speculative ? ++ " (speculative execution)" : ""); + else + verbose("%d: safe\n", env->insn_idx); + } +@@ -3985,8 +4103,10 @@ static int do_check(struct bpf_verifier_ + if (log_level > 1) + verbose("%d:", env->insn_idx); + else +- verbose("\nfrom %d to %d:", +- env->prev_insn_idx, env->insn_idx); ++ verbose("\nfrom %d to %d%s:", ++ env->prev_insn_idx, env->insn_idx, ++ env->cur_state->speculative ? ++ " (speculative execution)" : ""); + print_verifier_state(env->cur_state); + do_print_state = false; + } +@@ -4585,6 +4705,7 @@ static int fixup_bpf_calls(struct bpf_ve + struct bpf_prog *new_prog; + struct bpf_map *map_ptr; + int i, cnt, delta = 0; ++ struct bpf_insn_aux_data *aux; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || +@@ -4598,6 +4719,57 @@ static int fixup_bpf_calls(struct bpf_ve + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ ++ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || ++ insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { ++ const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; ++ const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; ++ struct bpf_insn insn_buf[16]; ++ struct bpf_insn *patch = &insn_buf[0]; ++ bool issrc, isneg; ++ u32 off_reg; ++ ++ aux = &env->insn_aux_data[i + delta]; ++ if (!aux->alu_state) ++ continue; ++ ++ isneg = aux->alu_state & BPF_ALU_NEG_VALUE; ++ issrc = (aux->alu_state & BPF_ALU_SANITIZE) == ++ BPF_ALU_SANITIZE_SRC; ++ ++ off_reg = issrc ? insn->src_reg : insn->dst_reg; ++ if (isneg) ++ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); ++ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); ++ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); ++ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); ++ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); ++ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); ++ if (issrc) { ++ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, ++ off_reg); ++ insn->src_reg = BPF_REG_AX; ++ } else { ++ *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, ++ BPF_REG_AX); ++ } ++ if (isneg) ++ insn->code = insn->code == code_add ? ++ code_sub : code_add; ++ *patch++ = *insn; ++ if (issrc && isneg) ++ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); ++ cnt = patch - insn_buf; ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; diff --git a/queue-4.14/bpf-reduce-verifier-memory-consumption.patch b/queue-4.14/bpf-reduce-verifier-memory-consumption.patch new file mode 100644 index 00000000000..9a0b9a08090 --- /dev/null +++ b/queue-4.14/bpf-reduce-verifier-memory-consumption.patch @@ -0,0 +1,1012 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:01 +0000 +Subject: bpf: reduce verifier memory consumption +To: +Cc: , , , , Alexei Starovoitov , , Alexei Starovoitov , "David S . Miller" , Balbir Singh +Message-ID: <20190403183917.13749-2-sblbir@amzn.com> + +From: Alexei Starovoitov + +commit 638f5b90d46016372a8e3e0a434f199cc5e12b8c upstream. + +the verifier got progressively smarter over time and size of its internal +state grew as well. Time to reduce the memory consumption. + +Before: +sizeof(struct bpf_verifier_state) = 6520 +After: +sizeof(struct bpf_verifier_state) = 896 + +It's done by observing that majority of BPF programs use little to +no stack whereas verifier kept all of 512 stack slots ready always. +Instead dynamically reallocate struct verifier state when stack +access is detected. +Runtime difference before vs after is within a noise. +The number of processed instructions stays the same. + +Cc: jakub.kicinski@netronome.com + +Signed-off-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +[Backported to 4.14 by sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 9 + include/linux/bpf_verifier.h | 16 + kernel/bpf/verifier.c | 433 ++++++++++++++-------- + 3 files changed, 304 insertions(+), 154 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c ++++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +@@ -76,9 +76,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_p + + static int + nfp_bpf_check_exit(struct nfp_prog *nfp_prog, +- const struct bpf_verifier_env *env) ++ struct bpf_verifier_env *env) + { +- const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; ++ const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; + u64 imm; + + if (nfp_prog->act == NN_ACT_XDP) +@@ -113,9 +113,10 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_ + + static int + nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, +- const struct bpf_verifier_env *env, u8 reg) ++ struct bpf_verifier_env *env, u8 reg_no) + { +- if (env->cur_state.regs[reg].type != PTR_TO_CTX) ++ const struct bpf_reg_state *reg = cur_regs(env) + reg_no; ++ if (reg->type != PTR_TO_CTX) + return -EINVAL; + + return 0; +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -91,14 +91,19 @@ enum bpf_stack_slot_type { + + #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + ++struct bpf_stack_state { ++ struct bpf_reg_state spilled_ptr; ++ u8 slot_type[BPF_REG_SIZE]; ++}; ++ + /* state of the program: + * type of all registers and stack info + */ + struct bpf_verifier_state { + struct bpf_reg_state regs[MAX_BPF_REG]; +- u8 stack_slot_type[MAX_BPF_STACK]; +- struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; + struct bpf_verifier_state *parent; ++ int allocated_stack; ++ struct bpf_stack_state *stack; + }; + + /* linked list of verifier states used to prune search */ +@@ -133,7 +138,7 @@ struct bpf_verifier_env { + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ + bool strict_alignment; /* perform strict pointer alignment checks */ +- struct bpf_verifier_state cur_state; /* current verifier state */ ++ struct bpf_verifier_state *cur_state; /* current verifier state */ + struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + void *analyzer_priv; /* pointer to external analyzer's private data */ +@@ -145,6 +150,11 @@ struct bpf_verifier_env { + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ + }; + ++static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) ++{ ++ return env->cur_state->regs; ++} ++ + int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv); + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -265,10 +265,11 @@ static void print_verifier_state(struct + verbose(")"); + } + } +- for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { +- if (state->stack_slot_type[i] == STACK_SPILL) +- verbose(" fp%d=%s", -MAX_BPF_STACK + i, +- reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]); ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { ++ if (state->stack[i].slot_type[0] == STACK_SPILL) ++ verbose(" fp%d=%s", ++ -MAX_BPF_STACK + i * BPF_REG_SIZE, ++ reg_type_str[state->stack[i].spilled_ptr.type]); + } + verbose("\n"); + } +@@ -434,35 +435,123 @@ static void print_bpf_insn(const struct + } + } + +-static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) ++static int copy_stack_state(struct bpf_verifier_state *dst, ++ const struct bpf_verifier_state *src) + { +- struct bpf_verifier_stack_elem *elem; +- int insn_idx; ++ if (!src->stack) ++ return 0; ++ if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) { ++ /* internal bug, make state invalid to reject the program */ ++ memset(dst, 0, sizeof(*dst)); ++ return -EFAULT; ++ } ++ memcpy(dst->stack, src->stack, ++ sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE)); ++ return 0; ++} ++ ++/* do_check() starts with zero-sized stack in struct bpf_verifier_state to ++ * make it consume minimal amount of memory. check_stack_write() access from ++ * the program calls into realloc_verifier_state() to grow the stack size. ++ * Note there is a non-zero 'parent' pointer inside bpf_verifier_state ++ * which this function copies over. It points to previous bpf_verifier_state ++ * which is never reallocated ++ */ ++static int realloc_verifier_state(struct bpf_verifier_state *state, int size, ++ bool copy_old) ++{ ++ u32 old_size = state->allocated_stack; ++ struct bpf_stack_state *new_stack; ++ int slot = size / BPF_REG_SIZE; ++ ++ if (size <= old_size || !size) { ++ if (copy_old) ++ return 0; ++ state->allocated_stack = slot * BPF_REG_SIZE; ++ if (!size && old_size) { ++ kfree(state->stack); ++ state->stack = NULL; ++ } ++ return 0; ++ } ++ new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state), ++ GFP_KERNEL); ++ if (!new_stack) ++ return -ENOMEM; ++ if (copy_old) { ++ if (state->stack) ++ memcpy(new_stack, state->stack, ++ sizeof(*new_stack) * (old_size / BPF_REG_SIZE)); ++ memset(new_stack + old_size / BPF_REG_SIZE, 0, ++ sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE); ++ } ++ state->allocated_stack = slot * BPF_REG_SIZE; ++ kfree(state->stack); ++ state->stack = new_stack; ++ return 0; ++} ++ ++static void free_verifier_state(struct bpf_verifier_state *state) ++{ ++ kfree(state->stack); ++ kfree(state); ++} ++ ++/* copy verifier state from src to dst growing dst stack space ++ * when necessary to accommodate larger src stack ++ */ ++static int copy_verifier_state(struct bpf_verifier_state *dst, ++ const struct bpf_verifier_state *src) ++{ ++ int err; ++ ++ err = realloc_verifier_state(dst, src->allocated_stack, false); ++ if (err) ++ return err; ++ memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack)); ++ return copy_stack_state(dst, src); ++} ++ ++static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, ++ int *insn_idx) ++{ ++ struct bpf_verifier_state *cur = env->cur_state; ++ struct bpf_verifier_stack_elem *elem, *head = env->head; ++ int err; + + if (env->head == NULL) +- return -1; ++ return -ENOENT; + +- memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state)); +- insn_idx = env->head->insn_idx; ++ if (cur) { ++ err = copy_verifier_state(cur, &head->st); ++ if (err) ++ return err; ++ } ++ if (insn_idx) ++ *insn_idx = head->insn_idx; + if (prev_insn_idx) +- *prev_insn_idx = env->head->prev_insn_idx; +- elem = env->head->next; +- kfree(env->head); ++ *prev_insn_idx = head->prev_insn_idx; ++ elem = head->next; ++ kfree(head); + env->head = elem; + env->stack_size--; +- return insn_idx; ++ return 0; + } + + static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) + { + struct bpf_verifier_stack_elem *elem; ++ struct bpf_verifier_state *cur = env->cur_state; ++ int err; + +- elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); ++ elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + if (!elem) + goto err; + +- memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state)); ++ err = copy_verifier_state(&elem->st, cur); ++ if (err) ++ return NULL; + elem->insn_idx = insn_idx; + elem->prev_insn_idx = prev_insn_idx; + elem->next = env->head; +@@ -475,7 +564,7 @@ static struct bpf_verifier_state *push_s + return &elem->st; + err: + /* pop all elements and return */ +- while (pop_stack(env, NULL) >= 0); ++ while (!pop_stack(env, NULL, NULL)); + return NULL; + } + +@@ -671,7 +760,7 @@ static void mark_reg_read(const struct b + static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, + enum reg_arg_type t) + { +- struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = env->cur_state->regs; + + if (regno >= MAX_BPF_REG) { + verbose("R%d is invalid\n", regno); +@@ -684,7 +773,7 @@ static int check_reg_arg(struct bpf_veri + verbose("R%d !read_ok\n", regno); + return -EACCES; + } +- mark_reg_read(&env->cur_state, regno); ++ mark_reg_read(env->cur_state, regno); + } else { + /* check whether register used as dest operand can be written to */ + if (regno == BPF_REG_FP) { +@@ -721,10 +810,21 @@ static int check_stack_write(struct bpf_ + struct bpf_verifier_state *state, int off, + int size, int value_regno, int insn_idx) + { +- int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; ++ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; ++ ++ err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE), ++ true); ++ if (err) ++ return err; + /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, + * so it's aligned access and [off, off + size) are within stack limits + */ ++ if (!env->allow_ptr_leaks && ++ state->stack[spi].slot_type[0] == STACK_SPILL && ++ size != BPF_REG_SIZE) { ++ verbose("attempt to corrupt spilled pointer on stack\n"); ++ return -EACCES; ++ } + + if (value_regno >= 0 && + is_spillable_regtype(state->regs[value_regno].type)) { +@@ -736,11 +836,11 @@ static int check_stack_write(struct bpf_ + } + + /* save register state */ +- state->spilled_regs[spi] = state->regs[value_regno]; +- state->spilled_regs[spi].live |= REG_LIVE_WRITTEN; ++ state->stack[spi].spilled_ptr = state->regs[value_regno]; ++ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + + for (i = 0; i < BPF_REG_SIZE; i++) { +- if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && ++ if (state->stack[spi].slot_type[i] == STACK_MISC && + !env->allow_ptr_leaks) { + int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; + int soff = (-spi - 1) * BPF_REG_SIZE; +@@ -763,14 +863,15 @@ static int check_stack_write(struct bpf_ + } + *poff = soff; + } +- state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; ++ state->stack[spi].slot_type[i] = STACK_SPILL; + } + } else { + /* regular write of data into stack */ +- state->spilled_regs[spi] = (struct bpf_reg_state) {}; ++ state->stack[spi].spilled_ptr = (struct bpf_reg_state) {}; + + for (i = 0; i < size; i++) +- state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; ++ state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = ++ STACK_MISC; + } + return 0; + } +@@ -781,10 +882,10 @@ static void mark_stack_slot_read(const s + + while (parent) { + /* if read wasn't screened by an earlier write ... */ +- if (state->spilled_regs[slot].live & REG_LIVE_WRITTEN) ++ if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN) + break; + /* ... then we depend on parent's value */ +- parent->spilled_regs[slot].live |= REG_LIVE_READ; ++ parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ; + state = parent; + parent = state->parent; + } +@@ -793,34 +894,37 @@ static void mark_stack_slot_read(const s + static int check_stack_read(struct bpf_verifier_state *state, int off, int size, + int value_regno) + { +- u8 *slot_type; +- int i, spi; ++ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; ++ u8 *stype; + +- slot_type = &state->stack_slot_type[MAX_BPF_STACK + off]; ++ if (state->allocated_stack <= slot) { ++ verbose("invalid read from stack off %d+0 size %d\n", ++ off, size); ++ return -EACCES; ++ } ++ stype = state->stack[spi].slot_type; + +- if (slot_type[0] == STACK_SPILL) { ++ if (stype[0] == STACK_SPILL) { + if (size != BPF_REG_SIZE) { + verbose("invalid size of register spill\n"); + return -EACCES; + } + for (i = 1; i < BPF_REG_SIZE; i++) { +- if (slot_type[i] != STACK_SPILL) { ++ if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { + verbose("corrupted spill memory\n"); + return -EACCES; + } + } + +- spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; +- + if (value_regno >= 0) { + /* restore register state from stack */ +- state->regs[value_regno] = state->spilled_regs[spi]; ++ state->regs[value_regno] = state->stack[spi].spilled_ptr; + mark_stack_slot_read(state, spi); + } + return 0; + } else { + for (i = 0; i < size; i++) { +- if (slot_type[i] != STACK_MISC) { ++ if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) { + verbose("invalid read from stack off %d+%d size %d\n", + off, i, size); + return -EACCES; +@@ -837,7 +941,8 @@ static int check_stack_read(struct bpf_v + static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, + int size) + { +- struct bpf_map *map = env->cur_state.regs[regno].map_ptr; ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_map *map = regs[regno].map_ptr; + + if (off < 0 || size <= 0 || off + size > map->value_size) { + verbose("invalid access to map value, value_size=%d off=%d size=%d\n", +@@ -849,9 +954,9 @@ static int __check_map_access(struct bpf + + /* check read/write into a map element with possible variable offset */ + static int check_map_access(struct bpf_verifier_env *env, u32 regno, +- int off, int size) ++ int off, int size) + { +- struct bpf_verifier_state *state = &env->cur_state; ++ struct bpf_verifier_state *state = env->cur_state; + struct bpf_reg_state *reg = &state->regs[regno]; + int err; + +@@ -924,7 +1029,7 @@ static bool may_access_direct_pkt_data(s + static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, + int off, int size) + { +- struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[regno]; + + if (off < 0 || size <= 0 || (u64)off + size > reg->range) { +@@ -938,7 +1043,7 @@ static int __check_packet_access(struct + static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, + int size) + { +- struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = ®s[regno]; + int err; + +@@ -1008,19 +1113,19 @@ static bool __is_pointer_value(bool allo + + static bool is_pointer_value(struct bpf_verifier_env *env, int regno) + { +- return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); ++ return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); + } + + static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) + { +- const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; ++ const struct bpf_reg_state *reg = cur_regs(env) + regno; + + return reg->type == PTR_TO_CTX; + } + + static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) + { +- const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; ++ const struct bpf_reg_state *reg = cur_regs(env) + regno; + + return reg->type == PTR_TO_PACKET; + } +@@ -1145,8 +1250,9 @@ static int check_mem_access(struct bpf_v + int off, int bpf_size, enum bpf_access_type t, + int value_regno, bool strict_alignment_once) + { +- struct bpf_verifier_state *state = &env->cur_state; +- struct bpf_reg_state *reg = &state->regs[regno]; ++ struct bpf_verifier_state *state = env->cur_state; ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_reg_state *reg = regs + regno; + int size, err = 0; + + size = bpf_size_to_bytes(bpf_size); +@@ -1170,7 +1276,7 @@ static int check_mem_access(struct bpf_v + + err = check_map_access(env, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) +- mark_reg_unknown(state->regs, value_regno); ++ mark_reg_unknown(regs, value_regno); + + } else if (reg->type == PTR_TO_CTX) { + enum bpf_reg_type reg_type = SCALAR_VALUE; +@@ -1203,13 +1309,13 @@ static int check_mem_access(struct bpf_v + * the offset is zero. + */ + if (reg_type == SCALAR_VALUE) +- mark_reg_unknown(state->regs, value_regno); ++ mark_reg_unknown(regs, value_regno); + else +- mark_reg_known_zero(state->regs, value_regno); +- state->regs[value_regno].id = 0; +- state->regs[value_regno].off = 0; +- state->regs[value_regno].range = 0; +- state->regs[value_regno].type = reg_type; ++ mark_reg_known_zero(regs, value_regno); ++ regs[value_regno].id = 0; ++ regs[value_regno].off = 0; ++ regs[value_regno].range = 0; ++ regs[value_regno].type = reg_type; + } + + } else if (reg->type == PTR_TO_STACK) { +@@ -1234,18 +1340,11 @@ static int check_mem_access(struct bpf_v + if (env->prog->aux->stack_depth < -off) + env->prog->aux->stack_depth = -off; + +- if (t == BPF_WRITE) { +- if (!env->allow_ptr_leaks && +- state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL && +- size != BPF_REG_SIZE) { +- verbose("attempt to corrupt spilled pointer on stack\n"); +- return -EACCES; +- } ++ if (t == BPF_WRITE) + err = check_stack_write(env, state, off, size, + value_regno, insn_idx); +- } else { ++ else + err = check_stack_read(state, off, size, value_regno); +- } + } else if (reg->type == PTR_TO_PACKET) { + if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { + verbose("cannot write into packet\n"); +@@ -1258,7 +1357,7 @@ static int check_mem_access(struct bpf_v + } + err = check_packet_access(env, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) +- mark_reg_unknown(state->regs, value_regno); ++ mark_reg_unknown(regs, value_regno); + } else { + verbose("R%d invalid mem access '%s'\n", + regno, reg_type_str[reg->type]); +@@ -1266,9 +1365,9 @@ static int check_mem_access(struct bpf_v + } + + if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && +- state->regs[value_regno].type == SCALAR_VALUE) { ++ regs[value_regno].type == SCALAR_VALUE) { + /* b/h/w load zero-extends, mark upper bits as known 0 */ +- coerce_reg_to_size(&state->regs[value_regno], size); ++ coerce_reg_to_size(®s[value_regno], size); + } + return err; + } +@@ -1333,9 +1432,9 @@ static int check_stack_boundary(struct b + int access_size, bool zero_size_allowed, + struct bpf_call_arg_meta *meta) + { +- struct bpf_verifier_state *state = &env->cur_state; ++ struct bpf_verifier_state *state = env->cur_state; + struct bpf_reg_state *regs = state->regs; +- int off, i; ++ int off, i, slot, spi; + + if (regs[regno].type != PTR_TO_STACK) { + /* Allow zero-byte read from NULL, regardless of pointer type */ +@@ -1376,7 +1475,11 @@ static int check_stack_boundary(struct b + } + + for (i = 0; i < access_size; i++) { +- if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { ++ slot = -(off + i) - 1; ++ spi = slot / BPF_REG_SIZE; ++ if (state->allocated_stack <= slot || ++ state->stack[spi].slot_type[slot % BPF_REG_SIZE] != ++ STACK_MISC) { + verbose("invalid indirect read from stack off %d+%d size %d\n", + off, i, access_size); + return -EACCES; +@@ -1389,7 +1492,7 @@ static int check_helper_mem_access(struc + int access_size, bool zero_size_allowed, + struct bpf_call_arg_meta *meta) + { +- struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; ++ struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + + switch (reg->type) { + case PTR_TO_PACKET: +@@ -1406,7 +1509,7 @@ static int check_func_arg(struct bpf_ver + enum bpf_arg_type arg_type, + struct bpf_call_arg_meta *meta) + { +- struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; ++ struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + enum bpf_reg_type expected_type, type = reg->type; + int err = 0; + +@@ -1678,7 +1781,7 @@ static int check_raw_mode(const struct b + */ + static void clear_all_pkt_pointers(struct bpf_verifier_env *env) + { +- struct bpf_verifier_state *state = &env->cur_state; ++ struct bpf_verifier_state *state = env->cur_state; + struct bpf_reg_state *regs = state->regs, *reg; + int i; + +@@ -1687,10 +1790,10 @@ static void clear_all_pkt_pointers(struc + regs[i].type == PTR_TO_PACKET_END) + mark_reg_unknown(regs, i); + +- for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { +- if (state->stack_slot_type[i] != STACK_SPILL) ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { ++ if (state->stack[i].slot_type[0] != STACK_SPILL) + continue; +- reg = &state->spilled_regs[i / BPF_REG_SIZE]; ++ reg = &state->stack[i].spilled_ptr; + if (reg->type != PTR_TO_PACKET && + reg->type != PTR_TO_PACKET_END) + continue; +@@ -1700,9 +1803,8 @@ static void clear_all_pkt_pointers(struc + + static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) + { +- struct bpf_verifier_state *state = &env->cur_state; + const struct bpf_func_proto *fn = NULL; +- struct bpf_reg_state *regs = state->regs; ++ struct bpf_reg_state *regs; + struct bpf_call_arg_meta meta; + bool changes_data; + int i, err; +@@ -1776,6 +1878,7 @@ static int check_call(struct bpf_verifie + return err; + } + ++ regs = cur_regs(env); + /* reset caller saved regs */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { + mark_reg_not_init(regs, caller_saved[i]); +@@ -1890,7 +1993,7 @@ static int adjust_ptr_min_max_vals(struc + const struct bpf_reg_state *ptr_reg, + const struct bpf_reg_state *off_reg) + { +- struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; ++ struct bpf_reg_state *regs = cur_regs(env), *dst_reg; + bool known = tnum_is_const(off_reg->var_off); + s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, + smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; +@@ -2097,7 +2200,7 @@ static int adjust_scalar_min_max_vals(st + struct bpf_reg_state *dst_reg, + struct bpf_reg_state src_reg) + { +- struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); + u8 opcode = BPF_OP(insn->code); + bool src_known, dst_known; + s64 smin_val, smax_val; +@@ -2345,7 +2448,7 @@ static int adjust_scalar_min_max_vals(st + static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, + struct bpf_insn *insn) + { +- struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg; ++ struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; + struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; + u8 opcode = BPF_OP(insn->code); + int rc; +@@ -2419,12 +2522,12 @@ static int adjust_reg_min_max_vals(struc + + /* Got here implies adding two SCALAR_VALUEs */ + if (WARN_ON_ONCE(ptr_reg)) { +- print_verifier_state(&env->cur_state); ++ print_verifier_state(env->cur_state); + verbose("verifier internal error: unexpected ptr_reg\n"); + return -EINVAL; + } + if (WARN_ON(!src_reg)) { +- print_verifier_state(&env->cur_state); ++ print_verifier_state(env->cur_state); + verbose("verifier internal error: no src_reg\n"); + return -EINVAL; + } +@@ -2434,7 +2537,7 @@ static int adjust_reg_min_max_vals(struc + /* check validity of 32-bit and 64-bit arithmetic operations */ + static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) + { +- struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); + u8 opcode = BPF_OP(insn->code); + int err; + +@@ -2661,10 +2764,10 @@ static void find_good_pkt_pointers(struc + /* keep the maximum range already checked */ + regs[i].range = max(regs[i].range, new_range); + +- for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { +- if (state->stack_slot_type[i] != STACK_SPILL) ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { ++ if (state->stack[i].slot_type[0] != STACK_SPILL) + continue; +- reg = &state->spilled_regs[i / BPF_REG_SIZE]; ++ reg = &state->stack[i].spilled_ptr; + if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) + reg->range = max(reg->range, new_range); + } +@@ -2914,17 +3017,17 @@ static void mark_map_regs(struct bpf_ver + for (i = 0; i < MAX_BPF_REG; i++) + mark_map_reg(regs, i, id, is_null); + +- for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { +- if (state->stack_slot_type[i] != STACK_SPILL) ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { ++ if (state->stack[i].slot_type[0] != STACK_SPILL) + continue; +- mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null); ++ mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); + } + } + + static int check_cond_jmp_op(struct bpf_verifier_env *env, + struct bpf_insn *insn, int *insn_idx) + { +- struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; ++ struct bpf_verifier_state *other_branch, *this_branch = env->cur_state; + struct bpf_reg_state *regs = this_branch->regs, *dst_reg; + u8 opcode = BPF_OP(insn->code); + int err; +@@ -3087,7 +3190,7 @@ static struct bpf_map *ld_imm64_to_map_p + /* verify BPF_LD_IMM64 instruction */ + static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) + { +- struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); + int err; + + if (BPF_SIZE(insn->code) != BPF_DW) { +@@ -3148,7 +3251,7 @@ static bool may_access_skb(enum bpf_prog + */ + static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) + { +- struct bpf_reg_state *regs = env->cur_state.regs; ++ struct bpf_reg_state *regs = cur_regs(env); + u8 mode = BPF_MODE(insn->code); + int i, err; + +@@ -3534,6 +3637,57 @@ static bool regsafe(struct bpf_reg_state + return false; + } + ++static bool stacksafe(struct bpf_verifier_state *old, ++ struct bpf_verifier_state *cur, ++ struct idpair *idmap) ++{ ++ int i, spi; ++ ++ /* if explored stack has more populated slots than current stack ++ * such stacks are not equivalent ++ */ ++ if (old->allocated_stack > cur->allocated_stack) ++ return false; ++ ++ /* walk slots of the explored stack and ignore any additional ++ * slots in the current stack, since explored(safe) state ++ * didn't use them ++ */ ++ for (i = 0; i < old->allocated_stack; i++) { ++ spi = i / BPF_REG_SIZE; ++ ++ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) ++ continue; ++ if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != ++ cur->stack[spi].slot_type[i % BPF_REG_SIZE]) ++ /* Ex: old explored (safe) state has STACK_SPILL in ++ * this stack slot, but current has has STACK_MISC -> ++ * this verifier states are not equivalent, ++ * return false to continue verification of this path ++ */ ++ return false; ++ if (i % BPF_REG_SIZE) ++ continue; ++ if (old->stack[spi].slot_type[0] != STACK_SPILL) ++ continue; ++ if (!regsafe(&old->stack[spi].spilled_ptr, ++ &cur->stack[spi].spilled_ptr, ++ idmap)) ++ /* when explored and current stack slot are both storing ++ * spilled registers, check that stored pointers types ++ * are the same as well. ++ * Ex: explored safe path could have stored ++ * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} ++ * but current path has stored: ++ * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} ++ * such verifier states are not equivalent. ++ * return false to continue verification of this path ++ */ ++ return false; ++ } ++ return true; ++} ++ + /* compare two verifier states + * + * all states stored in state_list are known to be valid, since +@@ -3578,37 +3732,8 @@ static bool states_equal(struct bpf_veri + goto out_free; + } + +- for (i = 0; i < MAX_BPF_STACK; i++) { +- if (old->stack_slot_type[i] == STACK_INVALID) +- continue; +- if (old->stack_slot_type[i] != cur->stack_slot_type[i]) +- /* Ex: old explored (safe) state has STACK_SPILL in +- * this stack slot, but current has has STACK_MISC -> +- * this verifier states are not equivalent, +- * return false to continue verification of this path +- */ +- goto out_free; +- if (i % BPF_REG_SIZE) +- continue; +- if (old->stack_slot_type[i] != STACK_SPILL) +- continue; +- if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE], +- &cur->spilled_regs[i / BPF_REG_SIZE], +- idmap)) +- /* when explored and current stack slot are both storing +- * spilled registers, check that stored pointers types +- * are the same as well. +- * Ex: explored safe path could have stored +- * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} +- * but current path has stored: +- * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} +- * such verifier states are not equivalent. +- * return false to continue verification of this path +- */ +- goto out_free; +- else +- continue; +- } ++ if (!stacksafe(old, cur, idmap)) ++ goto out_free; + ret = true; + out_free: + kfree(idmap); +@@ -3644,17 +3769,19 @@ static bool do_propagate_liveness(const + } + } + /* ... and stack slots */ +- for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) { +- if (parent->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL) ++ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && ++ i < parent->allocated_stack / BPF_REG_SIZE; i++) { ++ if (parent->stack[i].slot_type[0] != STACK_SPILL) + continue; +- if (state->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL) ++ if (state->stack[i].slot_type[0] != STACK_SPILL) + continue; +- if (parent->spilled_regs[i].live & REG_LIVE_READ) ++ if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) + continue; +- if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN)) ++ if (writes && ++ (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN)) + continue; +- if (state->spilled_regs[i].live & REG_LIVE_READ) { +- parent->spilled_regs[i].live |= REG_LIVE_READ; ++ if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) { ++ parent->stack[i].spilled_ptr.live |= REG_LIVE_READ; + touched = true; + } + } +@@ -3684,6 +3811,7 @@ static int is_state_visited(struct bpf_v + { + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; ++ struct bpf_verifier_state *cur = env->cur_state; + int i; + + sl = env->explored_states[insn_idx]; +@@ -3694,7 +3822,7 @@ static int is_state_visited(struct bpf_v + return 0; + + while (sl != STATE_LIST_MARK) { +- if (states_equal(env, &sl->state, &env->cur_state)) { ++ if (states_equal(env, &sl->state, cur)) { + /* reached equivalent register/stack state, + * prune the search. + * Registers read by the continuation are read by us. +@@ -3705,7 +3833,7 @@ static int is_state_visited(struct bpf_v + * they'll be immediately forgotten as we're pruning + * this state and will pop a new one. + */ +- propagate_liveness(&sl->state, &env->cur_state); ++ propagate_liveness(&sl->state, cur); + return 1; + } + sl = sl->next; +@@ -3717,16 +3845,16 @@ static int is_state_visited(struct bpf_v + * it will be rejected. Since there are no loops, we won't be + * seeing this 'insn_idx' instruction again on the way to bpf_exit + */ +- new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); ++ new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); + if (!new_sl) + return -ENOMEM; + + /* add new state to the head of linked list */ +- memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state)); ++ copy_verifier_state(&new_sl->state, cur); + new_sl->next = env->explored_states[insn_idx]; + env->explored_states[insn_idx] = new_sl; + /* connect new state to parentage chain */ +- env->cur_state.parent = &new_sl->state; ++ cur->parent = &new_sl->state; + /* clear write marks in current state: the writes we did are not writes + * our child did, so they don't screen off its reads from us. + * (There are no read marks in current state, because reads always mark +@@ -3734,10 +3862,10 @@ static int is_state_visited(struct bpf_v + * explored_states can get read marks.) + */ + for (i = 0; i < BPF_REG_FP; i++) +- env->cur_state.regs[i].live = REG_LIVE_NONE; +- for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) +- if (env->cur_state.stack_slot_type[i * BPF_REG_SIZE] == STACK_SPILL) +- env->cur_state.spilled_regs[i].live = REG_LIVE_NONE; ++ cur->regs[i].live = REG_LIVE_NONE; ++ for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++) ++ if (cur->stack[i].slot_type[0] == STACK_SPILL) ++ cur->stack[i].spilled_ptr.live = REG_LIVE_NONE; + return 0; + } + +@@ -3752,15 +3880,19 @@ static int ext_analyzer_insn_hook(struct + + static int do_check(struct bpf_verifier_env *env) + { +- struct bpf_verifier_state *state = &env->cur_state; ++ struct bpf_verifier_state *state; + struct bpf_insn *insns = env->prog->insnsi; +- struct bpf_reg_state *regs = state->regs; ++ struct bpf_reg_state *regs; + int insn_cnt = env->prog->len; + int insn_idx, prev_insn_idx = 0; + int insn_processed = 0; + bool do_print_state = false; + +- init_reg_state(regs); ++ state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); ++ if (!state) ++ return -ENOMEM; ++ env->cur_state = state; ++ init_reg_state(state->regs); + state->parent = NULL; + insn_idx = 0; + for (;;) { +@@ -3807,7 +3939,7 @@ static int do_check(struct bpf_verifier_ + else + verbose("\nfrom %d to %d:", + prev_insn_idx, insn_idx); +- print_verifier_state(&env->cur_state); ++ print_verifier_state(env->cur_state); + do_print_state = false; + } + +@@ -3820,6 +3952,7 @@ static int do_check(struct bpf_verifier_ + if (err) + return err; + ++ regs = cur_regs(env); + env->insn_aux_data[insn_idx].seen = true; + if (class == BPF_ALU || class == BPF_ALU64) { + err = check_alu_op(env, insn); +@@ -3991,8 +4124,10 @@ static int do_check(struct bpf_verifier_ + } + + process_bpf_exit: +- insn_idx = pop_stack(env, &prev_insn_idx); +- if (insn_idx < 0) { ++ err = pop_stack(env, &prev_insn_idx, &insn_idx); ++ if (err < 0) { ++ if (err != -ENOENT) ++ return err; + break; + } else { + do_print_state = true; +@@ -4633,9 +4768,11 @@ int bpf_check(struct bpf_prog **prog, un + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); ++ free_verifier_state(env->cur_state); ++ env->cur_state = NULL; + + skip_full_check: +- while (pop_stack(env, NULL) >= 0); ++ while (!pop_stack(env, NULL, NULL)); + free_states(env); + + if (ret == 0) +@@ -4741,9 +4878,11 @@ int bpf_analyzer(struct bpf_prog *prog, + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); ++ free_verifier_state(env->cur_state); ++ env->cur_state = NULL; + + skip_full_check: +- while (pop_stack(env, NULL) >= 0); ++ while (!pop_stack(env, NULL, NULL)); + free_states(env); + + mutex_unlock(&bpf_verifier_lock); diff --git a/queue-4.14/bpf-restrict-map-value-pointer-arithmetic-for-unprivileged.patch b/queue-4.14/bpf-restrict-map-value-pointer-arithmetic-for-unprivileged.patch new file mode 100644 index 00000000000..d7c49ad3b6e --- /dev/null +++ b/queue-4.14/bpf-restrict-map-value-pointer-arithmetic-for-unprivileged.patch @@ -0,0 +1,50 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:08 +0000 +Subject: bpf: restrict map value pointer arithmetic for unprivileged +To: +Cc: , , , , Alexei Starovoitov +Message-ID: <20190403183917.13749-9-sblbir@amzn.com> + +From: Daniel Borkmann + +commit 0d6303db7970e6f56ae700fa07e11eb510cda125 upstream. + +Restrict map value pointer arithmetic for unprivileged users in that +arithmetic itself must not go out of bounds as opposed to the actual +access later on. Therefore after each adjust_ptr_min_max_vals() with a +map value pointer as a destination it will simulate a check_map_access() +of 1 byte on the destination and once that fails the program is rejected +for unprivileged program loads. We use this later on for masking any +pointer arithmetic with the remainder of the map value space. The +likelihood of breaking any existing real-world unprivileged eBPF +program is very small for this corner case. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2191,6 +2191,17 @@ static int adjust_ptr_min_max_vals(struc + __update_reg_bounds(dst_reg); + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); ++ ++ /* For unprivileged we require that resulting offset must be in bounds ++ * in order to be able to sanitize access later on. ++ */ ++ if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && ++ check_map_access(env, dst, dst_reg->off, 1, false)) { ++ verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", ++ dst); ++ return -EACCES; ++ } ++ + return 0; + } + diff --git a/queue-4.14/bpf-restrict-stack-pointer-arithmetic-for-unprivileged.patch b/queue-4.14/bpf-restrict-stack-pointer-arithmetic-for-unprivileged.patch new file mode 100644 index 00000000000..fdc0347446a --- /dev/null +++ b/queue-4.14/bpf-restrict-stack-pointer-arithmetic-for-unprivileged.patch @@ -0,0 +1,117 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:09 +0000 +Subject: bpf: restrict stack pointer arithmetic for unprivileged +To: +Cc: , , , , Alexei Starovoitov , Balbir Singh +Message-ID: <20190403183917.13749-10-sblbir@amzn.com> + +From: Daniel Borkmann + +commit e4298d25830a866cc0f427d4bccb858e76715859 upstream. + +Restrict stack pointer arithmetic for unprivileged users in that +arithmetic itself must not go out of bounds as opposed to the actual +access later on. Therefore after each adjust_ptr_min_max_vals() with +a stack pointer as a destination we simulate a check_stack_access() +of 1 byte on the destination and once that fails the program is +rejected for unprivileged program loads. This is analog to map +value pointer arithmetic and needed for masking later on. + + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +[backported to 4.14 sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 62 +++++++++++++++++++++++++++++++++----------------- + 1 file changed, 41 insertions(+), 21 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -940,6 +940,31 @@ static int check_stack_read(struct bpf_v + } + } + ++static int check_stack_access(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, ++ int off, int size) ++{ ++ /* Stack accesses must be at a fixed offset, so that we ++ * can determine what type of data were returned. See ++ * check_stack_read(). ++ */ ++ if (!tnum_is_const(reg->var_off)) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose("variable stack access var_off=%s off=%d size=%d", ++ tn_buf, off, size); ++ return -EACCES; ++ } ++ ++ if (off >= 0 || off < -MAX_BPF_STACK) { ++ verbose("invalid stack off=%d size=%d\n", off, size); ++ return -EACCES; ++ } ++ ++ return 0; ++} ++ + /* check read/write into map element returned by bpf_map_lookup_elem() */ + static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, + int size) +@@ -1322,23 +1347,10 @@ static int check_mem_access(struct bpf_v + } + + } else if (reg->type == PTR_TO_STACK) { +- /* stack accesses must be at a fixed offset, so that we can +- * determine what type of data were returned. +- * See check_stack_read(). +- */ +- if (!tnum_is_const(reg->var_off)) { +- char tn_buf[48]; +- +- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); +- verbose("variable stack access var_off=%s off=%d size=%d", +- tn_buf, off, size); +- return -EACCES; +- } + off += reg->var_off.value; +- if (off >= 0 || off < -MAX_BPF_STACK) { +- verbose("invalid stack off=%d size=%d\n", off, size); +- return -EACCES; +- } ++ err = check_stack_access(env, reg, off, size); ++ if (err) ++ return err; + + if (env->prog->aux->stack_depth < -off) + env->prog->aux->stack_depth = -off; +@@ -2195,11 +2207,19 @@ static int adjust_ptr_min_max_vals(struc + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ +- if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && +- check_map_access(env, dst, dst_reg->off, 1, false)) { +- verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", +- dst); +- return -EACCES; ++ if (!env->allow_ptr_leaks) { ++ if (dst_reg->type == PTR_TO_MAP_VALUE && ++ check_map_access(env, dst, dst_reg->off, 1)) { ++ verbose("R%d pointer arithmetic of map value goes out of range, " ++ "prohibited for !root\n", dst); ++ return -EACCES; ++ } else if (dst_reg->type == PTR_TO_STACK && ++ check_stack_access(env, dst_reg, dst_reg->off + ++ dst_reg->var_off.value, 1)) { ++ verbose("R%d stack pointer arithmetic goes out of range, " ++ "prohibited for !root\n", dst); ++ return -EACCES; ++ } + } + + return 0; diff --git a/queue-4.14/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-for-unprivileged.patch b/queue-4.14/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-for-unprivileged.patch new file mode 100644 index 00000000000..aac7705b410 --- /dev/null +++ b/queue-4.14/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-for-unprivileged.patch @@ -0,0 +1,56 @@ +From foo@baz Wed Apr 17 20:59:12 CEST 2019 +From: Balbir Singh +Date: Wed, 3 Apr 2019 18:39:10 +0000 +Subject: bpf: restrict unknown scalars of mixed signed bounds for unprivileged +To: +Cc: , , , , Alexei Starovoitov , Balbir Singh +Message-ID: <20190403183917.13749-11-sblbir@amzn.com> + +From: Daniel Borkmann + +commit 9d7eceede769f90b66cfa06ad5b357140d5141ed upstream. + +For unknown scalars of mixed signed bounds, meaning their smin_value is +negative and their smax_value is positive, we need to reject arithmetic +with pointer to map value. For unprivileged the goal is to mask every +map pointer arithmetic and this cannot reliably be done when it is +unknown at verification time whether the scalar value is negative or +positive. Given this is a corner case, the likelihood of breaking should +be very small. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +[backported to 4.14 sblbir] +Signed-off-by: Balbir Singh +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2014,8 +2014,8 @@ static int adjust_ptr_min_max_vals(struc + smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; + u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, + umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; ++ u32 dst = insn->dst_reg, src = insn->src_reg; + u8 opcode = BPF_OP(insn->code); +- u32 dst = insn->dst_reg; + + dst_reg = ®s[dst]; + +@@ -2189,6 +2189,13 @@ static int adjust_ptr_min_max_vals(struc + verbose("R%d bitwise operator %s on pointer prohibited\n", + dst, bpf_alu_string[opcode >> 4]); + return -EACCES; ++ case PTR_TO_MAP_VALUE: ++ if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { ++ verbose("R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", ++ off_reg == dst_reg ? dst : src); ++ return -EACCES; ++ } ++ /* fall-through */ + default: + /* other operators (e.g. MUL,LSH) produce non-pointer results */ + if (!env->allow_ptr_leaks) diff --git a/queue-4.14/series b/queue-4.14/series index 716f6e2c77e..5810cf55ed8 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -70,3 +70,21 @@ lib-div64.c-off-by-one-in-shift.patch include-linux-swap.h-use-offsetof-instead-of-custom-.patch bpf-fix-use-after-free-in-bpf_evict_inode.patch dm-disable-crypto_tfm_req_may_sleep-to-fix-a-gfp_ker.patch +bpf-reduce-verifier-memory-consumption.patch +bpf-fix-verifier-memory-leaks.patch +bpf-fix-verifier-null-pointer-dereference.patch +bpf-fix-stack-state-printing-in-verifier-log.patch +bpf-move-prev_-insn_idx-into-verifier-env.patch +bpf-move-tmp-variable-into-ax-register-in-interpreter.patch +bpf-enable-access-to-ax-register-also-from-verifier-rewrite.patch +bpf-restrict-map-value-pointer-arithmetic-for-unprivileged.patch +bpf-restrict-stack-pointer-arithmetic-for-unprivileged.patch +bpf-restrict-unknown-scalars-of-mixed-signed-bounds-for-unprivileged.patch +bpf-fix-check_map_access-smin_value-test-when-pointer-contains-offset.patch +bpf-prevent-out-of-bounds-speculation-on-pointer-arithmetic.patch +bpf-fix-sanitation-of-alu-op-with-pointer-scalar-type-from-different-paths.patch +bpf-fix-inner-map-masking-to-prevent-oob-under-speculation.patch +bpf-do-not-restore-dst_reg-when-cur_state-is-freed.patch +bpf-fix-sanitation-rewrite-in-case-of-non-pointers.patch +bpf-fix-selftests-are-changes-for-cve-2019-7308.patch +tools-include-adopt-linux-bits.h.patch diff --git a/queue-4.14/tools-include-adopt-linux-bits.h.patch b/queue-4.14/tools-include-adopt-linux-bits.h.patch new file mode 100644 index 00000000000..5ffff560f6b --- /dev/null +++ b/queue-4.14/tools-include-adopt-linux-bits.h.patch @@ -0,0 +1,98 @@ +From ba4aa02b417f08a0bee5e7b8ed70cac788a7c854 Mon Sep 17 00:00:00 2001 +From: Arnaldo Carvalho de Melo +Date: Tue, 25 Sep 2018 10:55:59 -0300 +Subject: tools include: Adopt linux/bits.h + +From: Arnaldo Carvalho de Melo + +commit ba4aa02b417f08a0bee5e7b8ed70cac788a7c854 upstream. + +So that we reduce the difference of tools/include/linux/bitops.h to the +original kernel file, include/linux/bitops.h, trying to remove the need +to define BITS_PER_LONG, to avoid clashes with asm/bitsperlong.h. + +And the things removed from tools/include/linux/bitops.h are really in +linux/bits.h, so that we can have a copy and then +tools/perf/check_headers.sh will tell us when new stuff gets added to +linux/bits.h so that we can check if it is useful and if any adjustment +needs to be done to the tools/{include,arch}/ copies. + +Cc: Adrian Hunter +Cc: Alexander Sverdlin +Cc: David Ahern +Cc: Jiri Olsa +Cc: Namhyung Kim +Cc: Wang Nan +Link: https://lkml.kernel.org/n/tip-y1sqyydvfzo0bjjoj4zsl562@git.kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Greg Kroah-Hartman + +--- + tools/include/linux/bitops.h | 7 ++----- + tools/include/linux/bits.h | 26 ++++++++++++++++++++++++++ + tools/perf/check-headers.sh | 1 + + 3 files changed, 29 insertions(+), 5 deletions(-) + +--- a/tools/include/linux/bitops.h ++++ b/tools/include/linux/bitops.h +@@ -3,8 +3,6 @@ + #define _TOOLS_LINUX_BITOPS_H_ + + #include +-#include +- + #ifndef __WORDSIZE + #define __WORDSIZE (__SIZEOF_LONG__ * 8) + #endif +@@ -12,10 +10,9 @@ + #ifndef BITS_PER_LONG + # define BITS_PER_LONG __WORDSIZE + #endif ++#include ++#include + +-#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +-#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +-#define BITS_PER_BYTE 8 + #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) + #define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) + #define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) +--- /dev/null ++++ b/tools/include/linux/bits.h +@@ -0,0 +1,26 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __LINUX_BITS_H ++#define __LINUX_BITS_H ++#include ++ ++#define BIT(nr) (1UL << (nr)) ++#define BIT_ULL(nr) (1ULL << (nr)) ++#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) ++#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) ++#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) ++#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) ++#define BITS_PER_BYTE 8 ++ ++/* ++ * Create a contiguous bitmask starting at bit position @l and ending at ++ * position @h. For example ++ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. ++ */ ++#define GENMASK(h, l) \ ++ (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) ++ ++#define GENMASK_ULL(h, l) \ ++ (((~0ULL) - (1ULL << (l)) + 1) & \ ++ (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) ++ ++#endif /* __LINUX_BITS_H */ +--- a/tools/perf/check-headers.sh ++++ b/tools/perf/check-headers.sh +@@ -11,6 +11,7 @@ include/uapi/linux/sched.h + include/uapi/linux/stat.h + include/uapi/linux/vhost.h + include/uapi/sound/asound.h ++include/linux/bits.h + include/linux/hash.h + include/uapi/linux/hw_breakpoint.h + arch/x86/include/asm/disabled-features.h -- 2.47.2