From: Sasha Levin Date: Mon, 28 Jan 2019 21:27:05 +0000 (-0500) Subject: patches for 4.19 X-Git-Tag: v4.9.154~27 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=96784b53d019c0dc4286b446ec7f242aeb15e1bf;p=thirdparty%2Fkernel%2Fstable-queue.git patches for 4.19 Signed-off-by: Sasha Levin --- diff --git a/queue-4.19/bpf-add-per-insn-complexity-limit.patch b/queue-4.19/bpf-add-per-insn-complexity-limit.patch new file mode 100644 index 00000000000..8a26489018c --- /dev/null +++ b/queue-4.19/bpf-add-per-insn-complexity-limit.patch @@ -0,0 +1,77 @@ +From 693c3a15893e2cc211828e9009eae0fef82b926f Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov +Date: Mon, 28 Jan 2019 21:28:20 +0100 +Subject: bpf: add per-insn complexity limit + +[ commit ceefbc96fa5c5b975d87bf8e89ba8416f6b764d9 upstream ] + +malicious bpf program may try to force the verifier to remember +a lot of distinct verifier states. +Put a limit to number of per-insn 'struct bpf_verifier_state'. +Note that hitting the limit doesn't reject the program. +It potentially makes the verifier do more steps to analyze the program. +It means that malicious programs will hit BPF_COMPLEXITY_LIMIT_INSNS sooner +instead of spending cpu time walking long link list. + +The limit of BPF_COMPLEXITY_LIMIT_STATES==64 affects cilium progs +with slight increase in number of "steps" it takes to successfully verify +the programs: + before after +bpf_lb-DLB_L3.o 1940 1940 +bpf_lb-DLB_L4.o 3089 3089 +bpf_lb-DUNKNOWN.o 1065 1065 +bpf_lxc-DDROP_ALL.o 28052 | 28162 +bpf_lxc-DUNKNOWN.o 35487 | 35541 +bpf_netdev.o 10864 10864 +bpf_overlay.o 6643 6643 +bpf_lcx_jit.o 38437 38437 + +But it also makes malicious program to be rejected in 0.4 seconds vs 6.5 +Hence apply this limit to unprivileged programs only. + +Signed-off-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Acked-by: Edward Cree +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 3d093003c723..2bbb98535b70 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -156,6 +156,7 @@ struct bpf_verifier_stack_elem { + + #define BPF_COMPLEXITY_LIMIT_INSNS 131072 + #define BPF_COMPLEXITY_LIMIT_STACK 1024 ++#define BPF_COMPLEXITY_LIMIT_STATES 64 + + #define BPF_MAP_PTR_UNPRIV 1UL + #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ +@@ -4735,7 +4736,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; + struct bpf_verifier_state *cur = env->cur_state; +- int i, j, err; ++ int i, j, err, states_cnt = 0; + + sl = env->explored_states[insn_idx]; + if (!sl) +@@ -4762,8 +4763,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) + return 1; + } + sl = sl->next; ++ states_cnt++; + } + ++ if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) ++ return 0; ++ + /* there were no equivalent states, remember current one. + * technically the current state is not proven to be safe yet, + * but it will either reach outer most bpf_exit (which means it's safe) +-- +2.19.1 + diff --git a/queue-4.19/bpf-enable-access-to-ax-register-also-from-verifier-.patch b/queue-4.19/bpf-enable-access-to-ax-register-also-from-verifier-.patch new file mode 100644 index 00000000000..79c159a514c --- /dev/null +++ b/queue-4.19/bpf-enable-access-to-ax-register-also-from-verifier-.patch @@ -0,0 +1,78 @@ +From 761ea22a5fe3fc3991b56c94f24f1e3edbc6fade Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:23 +0100 +Subject: bpf: enable access to ax register also from verifier rewrite + +[ commit 9b73bfdd08e73231d6a90ae6db4b46b3fbf56c30 upstream ] + +Right now we are using BPF ax register in JIT for constant blinding as +well as in interpreter as temporary variable. Verifier will not be able +to use it simply because its use will get overridden from the former in +bpf_jit_blind_insn(). However, it can be made to work in that blinding +will be skipped if there is prior use in either source or destination +register on the instruction. Taking constraints of ax into account, the +verifier is then open to use it in rewrites under some constraints. Note, +ax register already has mappings in every eBPF JIT. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + include/linux/filter.h | 7 +------ + kernel/bpf/core.c | 20 ++++++++++++++++++++ + 2 files changed, 21 insertions(+), 6 deletions(-) + +diff --git a/include/linux/filter.h b/include/linux/filter.h +index 81420a0efdbe..1a39d57eb88f 100644 +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -53,12 +53,7 @@ struct sock_reuseport; + #define BPF_REG_D BPF_REG_8 /* data, callee-saved */ + #define BPF_REG_H BPF_REG_9 /* hlen, callee-saved */ + +-/* Kernel hidden auxiliary/helper register for hardening step. +- * Only used by eBPF JITs. It's nothing more than a temporary +- * register that JITs use internally, only that here it's part +- * of eBPF instructions that have been rewritten for blinding +- * constants. See JIT pre-step in bpf_jit_blind_constants(). +- */ ++/* Kernel hidden auxiliary/helper register. */ + #define BPF_REG_AX MAX_BPF_REG + #define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) + #define MAX_BPF_JIT_REG MAX_BPF_EXT_REG +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index aefc62ae4a1e..474525e3a9db 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -643,6 +643,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, + BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); + BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + ++ /* Constraints on AX register: ++ * ++ * AX register is inaccessible from user space. It is mapped in ++ * all JITs, and used here for constant blinding rewrites. It is ++ * typically "stateless" meaning its contents are only valid within ++ * the executed instruction, but not across several instructions. ++ * There are a few exceptions however which are further detailed ++ * below. ++ * ++ * Constant blinding is only used by JITs, not in the interpreter. ++ * The interpreter uses AX in some occasions as a local temporary ++ * register e.g. in DIV or MOD instructions. ++ * ++ * In restricted circumstances, the verifier can also use the AX ++ * register for rewrites as long as they do not interfere with ++ * the above cases! ++ */ ++ if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) ++ goto out; ++ + if (from->imm == 0 && + (from->code == (BPF_ALU | BPF_MOV | BPF_K) || + from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { +-- +2.19.1 + diff --git a/queue-4.19/bpf-fix-check_map_access-smin_value-test-when-pointe.patch b/queue-4.19/bpf-fix-check_map_access-smin_value-test-when-pointe.patch new file mode 100644 index 00000000000..1d50fef27b1 --- /dev/null +++ b/queue-4.19/bpf-fix-check_map_access-smin_value-test-when-pointe.patch @@ -0,0 +1,54 @@ +From e6cba8acf0f7f47cc1b8c5884dac40b451de9c58 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:27 +0100 +Subject: bpf: fix check_map_access smin_value test when pointer contains + offset + +[ commit b7137c4eab85c1cf3d46acdde90ce1163b28c873 upstream ] + +In check_map_access() we probe actual bounds through __check_map_access() +with offset of reg->smin_value + off for lower bound and offset of +reg->umax_value + off for the upper bound. However, even though the +reg->smin_value could have a negative value, the final result of the +sum with off could be positive when pointer arithmetic with known and +unknown scalars is combined. In this case we reject the program with +an error such as "R min value is negative, either use unsigned index +or do a if (index >=0) check." even though the access itself would be +fine. Therefore extend the check to probe whether the actual resulting +reg->smin_value + off is less than zero. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index fbaa3b9e1d71..f9d5aea4891d 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1294,13 +1294,17 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, + */ + if (env->log.level) + print_verifier_state(env, state); ++ + /* The minimum value is only important with signed + * comparisons where we can't assume the floor of a + * value is 0. If we are using signed variables for our + * index'es we need to make sure that whatever we use + * will have a set floor within our range. + */ +- if (reg->smin_value < 0) { ++ if (reg->smin_value < 0 && ++ (reg->smin_value == S64_MIN || ++ (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || ++ reg->smin_value + off < 0)) { + verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; +-- +2.19.1 + diff --git a/queue-4.19/bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch b/queue-4.19/bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch new file mode 100644 index 00000000000..3c0174e1203 --- /dev/null +++ b/queue-4.19/bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch @@ -0,0 +1,139 @@ +From 3bfc7522973691f132ca4c855a82aee985b94751 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:30 +0100 +Subject: bpf: fix inner map masking to prevent oob under speculation + +[ commit 9d5564ddcf2a0f5ba3fa1c3a1f8a1b59ad309553 upstream ] + +During review I noticed that inner meta map setup for map in +map is buggy in that it does not propagate all needed data +from the reference map which the verifier is later accessing. + +In particular one such case is index masking to prevent out of +bounds access under speculative execution due to missing the +map's unpriv_array/index_mask field propagation. Fix this such +that the verifier is generating the correct code for inlined +lookups in case of unpriviledged use. + +Before patch (test_verifier's 'map in map access' dump): + + # bpftool prog dump xla id 3 + 0: (62) *(u32 *)(r10 -4) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -4 + 3: (18) r1 = map[id:4] + 5: (07) r1 += 272 | + 6: (61) r0 = *(u32 *)(r2 +0) | + 7: (35) if r0 >= 0x1 goto pc+6 | Inlined map in map lookup + 8: (54) (u32) r0 &= (u32) 0 | with index masking for + 9: (67) r0 <<= 3 | map->unpriv_array. + 10: (0f) r0 += r1 | + 11: (79) r0 = *(u64 *)(r0 +0) | + 12: (15) if r0 == 0x0 goto pc+1 | + 13: (05) goto pc+1 | + 14: (b7) r0 = 0 | + 15: (15) if r0 == 0x0 goto pc+11 + 16: (62) *(u32 *)(r10 -4) = 0 + 17: (bf) r2 = r10 + 18: (07) r2 += -4 + 19: (bf) r1 = r0 + 20: (07) r1 += 272 | + 21: (61) r0 = *(u32 *)(r2 +0) | Index masking missing (!) + 22: (35) if r0 >= 0x1 goto pc+3 | for inner map despite + 23: (67) r0 <<= 3 | map->unpriv_array set. + 24: (0f) r0 += r1 | + 25: (05) goto pc+1 | + 26: (b7) r0 = 0 | + 27: (b7) r0 = 0 + 28: (95) exit + +After patch: + + # bpftool prog dump xla id 1 + 0: (62) *(u32 *)(r10 -4) = 0 + 1: (bf) r2 = r10 + 2: (07) r2 += -4 + 3: (18) r1 = map[id:2] + 5: (07) r1 += 272 | + 6: (61) r0 = *(u32 *)(r2 +0) | + 7: (35) if r0 >= 0x1 goto pc+6 | Same inlined map in map lookup + 8: (54) (u32) r0 &= (u32) 0 | with index masking due to + 9: (67) r0 <<= 3 | map->unpriv_array. + 10: (0f) r0 += r1 | + 11: (79) r0 = *(u64 *)(r0 +0) | + 12: (15) if r0 == 0x0 goto pc+1 | + 13: (05) goto pc+1 | + 14: (b7) r0 = 0 | + 15: (15) if r0 == 0x0 goto pc+12 + 16: (62) *(u32 *)(r10 -4) = 0 + 17: (bf) r2 = r10 + 18: (07) r2 += -4 + 19: (bf) r1 = r0 + 20: (07) r1 += 272 | + 21: (61) r0 = *(u32 *)(r2 +0) | + 22: (35) if r0 >= 0x1 goto pc+4 | Now fixed inlined inner map + 23: (54) (u32) r0 &= (u32) 0 | lookup with proper index masking + 24: (67) r0 <<= 3 | for map->unpriv_array. + 25: (0f) r0 += r1 | + 26: (05) goto pc+1 | + 27: (b7) r0 = 0 | + 28: (b7) r0 = 0 + 29: (95) exit + +Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") +Signed-off-by: Daniel Borkmann +Acked-by: Martin KaFai Lau +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + kernel/bpf/map_in_map.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c +index 3bfbf4464416..9670ee5ee74e 100644 +--- a/kernel/bpf/map_in_map.c ++++ b/kernel/bpf/map_in_map.c +@@ -12,6 +12,7 @@ + struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) + { + struct bpf_map *inner_map, *inner_map_meta; ++ u32 inner_map_meta_size; + struct fd f; + + f = fdget(inner_map_ufd); +@@ -35,7 +36,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) + return ERR_PTR(-EINVAL); + } + +- inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); ++ inner_map_meta_size = sizeof(*inner_map_meta); ++ /* In some cases verifier needs to access beyond just base map. */ ++ if (inner_map->ops == &array_map_ops) ++ inner_map_meta_size = sizeof(struct bpf_array); ++ ++ inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); + if (!inner_map_meta) { + fdput(f); + return ERR_PTR(-ENOMEM); +@@ -45,9 +51,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) + inner_map_meta->key_size = inner_map->key_size; + inner_map_meta->value_size = inner_map->value_size; + inner_map_meta->map_flags = inner_map->map_flags; +- inner_map_meta->ops = inner_map->ops; + inner_map_meta->max_entries = inner_map->max_entries; + ++ /* Misc members not needed in bpf_map_meta_equal() check. */ ++ inner_map_meta->ops = inner_map->ops; ++ if (inner_map->ops == &array_map_ops) { ++ inner_map_meta->unpriv_array = inner_map->unpriv_array; ++ container_of(inner_map_meta, struct bpf_array, map)->index_mask = ++ container_of(inner_map, struct bpf_array, map)->index_mask; ++ } ++ + fdput(f); + return inner_map_meta; + } +-- +2.19.1 + diff --git a/queue-4.19/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch b/queue-4.19/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch new file mode 100644 index 00000000000..5e92700f67b --- /dev/null +++ b/queue-4.19/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch @@ -0,0 +1,152 @@ +From e467ff9122eb2f03e7844189882a40a2ac3726f4 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:29 +0100 +Subject: bpf: fix sanitation of alu op with pointer / scalar type from + different paths + +[ commit d3bd7413e0ca40b60cf60d4003246d067cafdeda upstream ] + +While 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer +arithmetic") took care of rejecting alu op on pointer when e.g. pointer +came from two different map values with different map properties such as +value size, Jann reported that a case was not covered yet when a given +alu op is used in both "ptr_reg += reg" and "numeric_reg += reg" from +different branches where we would incorrectly try to sanitize based +on the pointer's limit. Catch this corner case and reject the program +instead. + +Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") +Reported-by: Jann Horn +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + include/linux/bpf_verifier.h | 1 + + kernel/bpf/verifier.c | 61 ++++++++++++++++++++++++++++-------- + 2 files changed, 49 insertions(+), 13 deletions(-) + +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index 2716dcbfb2f1..91393724e933 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -147,6 +147,7 @@ struct bpf_verifier_state_list { + #define BPF_ALU_SANITIZE_SRC 1U + #define BPF_ALU_SANITIZE_DST 2U + #define BPF_ALU_NEG_VALUE (1U << 2) ++#define BPF_ALU_NON_POINTER (1U << 3) + #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ + BPF_ALU_SANITIZE_DST) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 9ab0adedb174..4d81be2d0739 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2734,6 +2734,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, + } + } + ++static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, ++ const struct bpf_insn *insn) ++{ ++ return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; ++} ++ ++static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, ++ u32 alu_state, u32 alu_limit) ++{ ++ /* If we arrived here from different branches with different ++ * state or limits to sanitize, then this won't work. ++ */ ++ if (aux->alu_state && ++ (aux->alu_state != alu_state || ++ aux->alu_limit != alu_limit)) ++ return -EACCES; ++ ++ /* Corresponding fixup done in fixup_bpf_calls(). */ ++ aux->alu_state = alu_state; ++ aux->alu_limit = alu_limit; ++ return 0; ++} ++ ++static int sanitize_val_alu(struct bpf_verifier_env *env, ++ struct bpf_insn *insn) ++{ ++ struct bpf_insn_aux_data *aux = cur_aux(env); ++ ++ if (can_skip_alu_sanitation(env, insn)) ++ return 0; ++ ++ return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); ++} ++ + static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, +@@ -2748,7 +2782,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_reg_state tmp; + bool ret; + +- if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) ++ if (can_skip_alu_sanitation(env, insn)) + return 0; + + /* We already marked aux for masking from non-speculative +@@ -2764,19 +2798,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, + + if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) + return 0; +- +- /* If we arrived here from different branches with different +- * limits to sanitize, then this won't work. +- */ +- if (aux->alu_state && +- (aux->alu_state != alu_state || +- aux->alu_limit != alu_limit)) ++ if (update_alu_sanitation_state(aux, alu_state, alu_limit)) + return -EACCES; +- +- /* Corresponding fixup done in fixup_bpf_calls(). */ +- aux->alu_state = alu_state; +- aux->alu_limit = alu_limit; +- + do_sim: + /* Simulate and find potential out-of-bounds access under + * speculative execution from truncation as a result of +@@ -3048,6 +3071,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, + s64 smin_val, smax_val; + u64 umin_val, umax_val; + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; ++ u32 dst = insn->dst_reg; ++ int ret; + + if (insn_bitness == 32) { + /* Relevant for 32-bit RSH: Information can propagate towards +@@ -3082,6 +3107,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, + + switch (opcode) { + case BPF_ADD: ++ ret = sanitize_val_alu(env, insn); ++ if (ret < 0) { ++ verbose(env, "R%d tried to add from different pointers or scalars\n", dst); ++ return ret; ++ } + if (signed_add_overflows(dst_reg->smin_value, smin_val) || + signed_add_overflows(dst_reg->smax_value, smax_val)) { + dst_reg->smin_value = S64_MIN; +@@ -3101,6 +3131,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, + dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); + break; + case BPF_SUB: ++ ret = sanitize_val_alu(env, insn); ++ if (ret < 0) { ++ verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); ++ return ret; ++ } + if (signed_sub_overflows(dst_reg->smin_value, smax_val) || + signed_sub_overflows(dst_reg->smax_value, smin_val)) { + /* Overflow possible, we know nothing */ +-- +2.19.1 + diff --git a/queue-4.19/bpf-improve-verifier-branch-analysis.patch b/queue-4.19/bpf-improve-verifier-branch-analysis.patch new file mode 100644 index 00000000000..50425975808 --- /dev/null +++ b/queue-4.19/bpf-improve-verifier-branch-analysis.patch @@ -0,0 +1,163 @@ +From 7125bbd1caa7f8875350606add501e2948c16b95 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov +Date: Mon, 28 Jan 2019 21:28:19 +0100 +Subject: bpf: improve verifier branch analysis + +[ commit 4f7b3e82589e0de723780198ec7983e427144c0a upstream ] + +pathological bpf programs may try to force verifier to explode in +the number of branch states: + 20: (d5) if r1 s<= 0x24000028 goto pc+0 + 21: (b5) if r0 <= 0xe1fa20 goto pc+2 + 22: (d5) if r1 s<= 0x7e goto pc+0 + 23: (b5) if r0 <= 0xe880e000 goto pc+0 + 24: (c5) if r0 s< 0x2100ecf4 goto pc+0 + 25: (d5) if r1 s<= 0xe880e000 goto pc+1 + 26: (c5) if r0 s< 0xf4041810 goto pc+0 + 27: (d5) if r1 s<= 0x1e007e goto pc+0 + 28: (b5) if r0 <= 0xe86be000 goto pc+0 + 29: (07) r0 += 16614 + 30: (c5) if r0 s< 0x6d0020da goto pc+0 + 31: (35) if r0 >= 0x2100ecf4 goto pc+0 + +Teach verifier to recognize always taken and always not taken branches. +This analysis is already done for == and != comparison. +Expand it to all other branches. + +It also helps real bpf programs to be verified faster: + before after +bpf_lb-DLB_L3.o 2003 1940 +bpf_lb-DLB_L4.o 3173 3089 +bpf_lb-DUNKNOWN.o 1080 1065 +bpf_lxc-DDROP_ALL.o 29584 28052 +bpf_lxc-DUNKNOWN.o 36916 35487 +bpf_netdev.o 11188 10864 +bpf_overlay.o 6679 6643 +bpf_lcx_jit.o 39555 38437 + +Reported-by: Anatoly Trosinenko +Signed-off-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Acked-by: Edward Cree +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 93 +++++++++++++++++++++++++++++++++++++------ + 1 file changed, 80 insertions(+), 13 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 341806668f03..3d093003c723 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3475,6 +3475,79 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, + } + } + ++/* compute branch direction of the expression "if (reg opcode val) goto target;" ++ * and return: ++ * 1 - branch will be taken and "goto target" will be executed ++ * 0 - branch will not be taken and fall-through to next insn ++ * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10] ++ */ ++static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) ++{ ++ if (__is_pointer_value(false, reg)) ++ return -1; ++ ++ switch (opcode) { ++ case BPF_JEQ: ++ if (tnum_is_const(reg->var_off)) ++ return !!tnum_equals_const(reg->var_off, val); ++ break; ++ case BPF_JNE: ++ if (tnum_is_const(reg->var_off)) ++ return !tnum_equals_const(reg->var_off, val); ++ break; ++ case BPF_JGT: ++ if (reg->umin_value > val) ++ return 1; ++ else if (reg->umax_value <= val) ++ return 0; ++ break; ++ case BPF_JSGT: ++ if (reg->smin_value > (s64)val) ++ return 1; ++ else if (reg->smax_value < (s64)val) ++ return 0; ++ break; ++ case BPF_JLT: ++ if (reg->umax_value < val) ++ return 1; ++ else if (reg->umin_value >= val) ++ return 0; ++ break; ++ case BPF_JSLT: ++ if (reg->smax_value < (s64)val) ++ return 1; ++ else if (reg->smin_value >= (s64)val) ++ return 0; ++ break; ++ case BPF_JGE: ++ if (reg->umin_value >= val) ++ return 1; ++ else if (reg->umax_value < val) ++ return 0; ++ break; ++ case BPF_JSGE: ++ if (reg->smin_value >= (s64)val) ++ return 1; ++ else if (reg->smax_value < (s64)val) ++ return 0; ++ break; ++ case BPF_JLE: ++ if (reg->umax_value <= val) ++ return 1; ++ else if (reg->umin_value > val) ++ return 0; ++ break; ++ case BPF_JSLE: ++ if (reg->smax_value <= (s64)val) ++ return 1; ++ else if (reg->smin_value > (s64)val) ++ return 0; ++ break; ++ } ++ ++ return -1; ++} ++ + /* Adjusts the register min/max values in the case that the dst_reg is the + * variable register that we are working on, and src_reg is a constant or we're + * simply doing a BPF_K check. +@@ -3868,21 +3941,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, + + dst_reg = ®s[insn->dst_reg]; + +- /* detect if R == 0 where R was initialized to zero earlier */ +- if (BPF_SRC(insn->code) == BPF_K && +- (opcode == BPF_JEQ || opcode == BPF_JNE) && +- dst_reg->type == SCALAR_VALUE && +- tnum_is_const(dst_reg->var_off)) { +- if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) || +- (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) { +- /* if (imm == imm) goto pc+off; +- * only follow the goto, ignore fall-through +- */ ++ if (BPF_SRC(insn->code) == BPF_K) { ++ int pred = is_branch_taken(dst_reg, insn->imm, opcode); ++ ++ if (pred == 1) { ++ /* only follow the goto, ignore fall-through */ + *insn_idx += insn->off; + return 0; +- } else { +- /* if (imm != imm) goto pc+off; +- * only follow fall-through branch, since ++ } else if (pred == 0) { ++ /* only follow fall-through branch, since + * that's where the program will go + */ + return 0; +-- +2.19.1 + diff --git a/queue-4.19/bpf-move-prev_-insn_idx-into-verifier-env.patch b/queue-4.19/bpf-move-prev_-insn_idx-into-verifier-env.patch new file mode 100644 index 00000000000..f672843e1ca --- /dev/null +++ b/queue-4.19/bpf-move-prev_-insn_idx-into-verifier-env.patch @@ -0,0 +1,268 @@ +From 94ad22febaf47fcf129727187fd62feac7919bdf Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:21 +0100 +Subject: bpf: move {prev_,}insn_idx into verifier env + +[ commit c08435ec7f2bc8f4109401f696fd55159b4b40cb upstream ] + +Move prev_insn_idx and insn_idx from the do_check() function into +the verifier environment, so they can be read inside the various +helper functions for handling the instructions. It's easier to put +this into the environment rather than changing all call-sites only +to pass it along. insn_idx is useful in particular since this later +on allows to hold state in env->insn_aux_data[env->insn_idx]. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + include/linux/bpf_verifier.h | 2 + + kernel/bpf/verifier.c | 75 ++++++++++++++++++------------------ + 2 files changed, 40 insertions(+), 37 deletions(-) + +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index 1fd6fa822d2c..b1587ffdea7b 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -186,6 +186,8 @@ struct bpf_subprog_info { + * one verifier_env per bpf_check() call + */ + struct bpf_verifier_env { ++ u32 insn_idx; ++ u32 prev_insn_idx; + struct bpf_prog *prog; /* eBPF program being verified */ + const struct bpf_verifier_ops *ops; + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 2bbb98535b70..6d11320998c6 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4816,7 +4816,6 @@ static int do_check(struct bpf_verifier_env *env) + struct bpf_insn *insns = env->prog->insnsi; + struct bpf_reg_state *regs; + int insn_cnt = env->prog->len, i; +- int insn_idx, prev_insn_idx = 0; + int insn_processed = 0; + bool do_print_state = false; + +@@ -4835,19 +4834,19 @@ static int do_check(struct bpf_verifier_env *env) + BPF_MAIN_FUNC /* callsite */, + 0 /* frameno */, + 0 /* subprogno, zero == main subprog */); +- insn_idx = 0; ++ + for (;;) { + struct bpf_insn *insn; + u8 class; + int err; + +- if (insn_idx >= insn_cnt) { ++ if (env->insn_idx >= insn_cnt) { + verbose(env, "invalid insn idx %d insn_cnt %d\n", +- insn_idx, insn_cnt); ++ env->insn_idx, insn_cnt); + return -EFAULT; + } + +- insn = &insns[insn_idx]; ++ insn = &insns[env->insn_idx]; + class = BPF_CLASS(insn->code); + + if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { +@@ -4857,7 +4856,7 @@ static int do_check(struct bpf_verifier_env *env) + return -E2BIG; + } + +- err = is_state_visited(env, insn_idx); ++ err = is_state_visited(env, env->insn_idx); + if (err < 0) + return err; + if (err == 1) { +@@ -4865,9 +4864,9 @@ static int do_check(struct bpf_verifier_env *env) + if (env->log.level) { + if (do_print_state) + verbose(env, "\nfrom %d to %d: safe\n", +- prev_insn_idx, insn_idx); ++ env->prev_insn_idx, env->insn_idx); + else +- verbose(env, "%d: safe\n", insn_idx); ++ verbose(env, "%d: safe\n", env->insn_idx); + } + goto process_bpf_exit; + } +@@ -4880,10 +4879,10 @@ static int do_check(struct bpf_verifier_env *env) + + if (env->log.level > 1 || (env->log.level && do_print_state)) { + if (env->log.level > 1) +- verbose(env, "%d:", insn_idx); ++ verbose(env, "%d:", env->insn_idx); + else + verbose(env, "\nfrom %d to %d:", +- prev_insn_idx, insn_idx); ++ env->prev_insn_idx, env->insn_idx); + print_verifier_state(env, state->frame[state->curframe]); + do_print_state = false; + } +@@ -4894,19 +4893,20 @@ static int do_check(struct bpf_verifier_env *env) + .private_data = env, + }; + +- verbose(env, "%d: ", insn_idx); ++ verbose(env, "%d: ", env->insn_idx); + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); + } + + if (bpf_prog_is_dev_bound(env->prog->aux)) { +- err = bpf_prog_offload_verify_insn(env, insn_idx, +- prev_insn_idx); ++ err = bpf_prog_offload_verify_insn(env, env->insn_idx, ++ env->prev_insn_idx); + if (err) + return err; + } + + regs = cur_regs(env); +- env->insn_aux_data[insn_idx].seen = true; ++ env->insn_aux_data[env->insn_idx].seen = true; ++ + if (class == BPF_ALU || class == BPF_ALU64) { + err = check_alu_op(env, insn); + if (err) +@@ -4931,13 +4931,13 @@ static int do_check(struct bpf_verifier_env *env) + /* check that memory (src_reg + off) is readable, + * the state of dst_reg will be updated by this func + */ +- err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, +- BPF_SIZE(insn->code), BPF_READ, +- insn->dst_reg, false); ++ err = check_mem_access(env, env->insn_idx, insn->src_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_READ, insn->dst_reg, false); + if (err) + return err; + +- prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; ++ prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; + + if (*prev_src_type == NOT_INIT) { + /* saw a valid insn +@@ -4964,10 +4964,10 @@ static int do_check(struct bpf_verifier_env *env) + enum bpf_reg_type *prev_dst_type, dst_reg_type; + + if (BPF_MODE(insn->code) == BPF_XADD) { +- err = check_xadd(env, insn_idx, insn); ++ err = check_xadd(env, env->insn_idx, insn); + if (err) + return err; +- insn_idx++; ++ env->insn_idx++; + continue; + } + +@@ -4983,13 +4983,13 @@ static int do_check(struct bpf_verifier_env *env) + dst_reg_type = regs[insn->dst_reg].type; + + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_WRITE, +- insn->src_reg, false); ++ err = check_mem_access(env, env->insn_idx, insn->dst_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_WRITE, insn->src_reg, false); + if (err) + return err; + +- prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; ++ prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; + + if (*prev_dst_type == NOT_INIT) { + *prev_dst_type = dst_reg_type; +@@ -5018,9 +5018,9 @@ static int do_check(struct bpf_verifier_env *env) + } + + /* check that memory (dst_reg + off) is writeable */ +- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, +- BPF_SIZE(insn->code), BPF_WRITE, +- -1, false); ++ err = check_mem_access(env, env->insn_idx, insn->dst_reg, ++ insn->off, BPF_SIZE(insn->code), ++ BPF_WRITE, -1, false); + if (err) + return err; + +@@ -5038,9 +5038,9 @@ static int do_check(struct bpf_verifier_env *env) + } + + if (insn->src_reg == BPF_PSEUDO_CALL) +- err = check_func_call(env, insn, &insn_idx); ++ err = check_func_call(env, insn, &env->insn_idx); + else +- err = check_helper_call(env, insn->imm, insn_idx); ++ err = check_helper_call(env, insn->imm, env->insn_idx); + if (err) + return err; + +@@ -5053,7 +5053,7 @@ static int do_check(struct bpf_verifier_env *env) + return -EINVAL; + } + +- insn_idx += insn->off + 1; ++ env->insn_idx += insn->off + 1; + continue; + + } else if (opcode == BPF_EXIT) { +@@ -5067,8 +5067,8 @@ static int do_check(struct bpf_verifier_env *env) + + if (state->curframe) { + /* exit from nested function */ +- prev_insn_idx = insn_idx; +- err = prepare_func_exit(env, &insn_idx); ++ env->prev_insn_idx = env->insn_idx; ++ err = prepare_func_exit(env, &env->insn_idx); + if (err) + return err; + do_print_state = true; +@@ -5094,7 +5094,8 @@ static int do_check(struct bpf_verifier_env *env) + if (err) + return err; + process_bpf_exit: +- err = pop_stack(env, &prev_insn_idx, &insn_idx); ++ err = pop_stack(env, &env->prev_insn_idx, ++ &env->insn_idx); + if (err < 0) { + if (err != -ENOENT) + return err; +@@ -5104,7 +5105,7 @@ static int do_check(struct bpf_verifier_env *env) + continue; + } + } else { +- err = check_cond_jmp_op(env, insn, &insn_idx); ++ err = check_cond_jmp_op(env, insn, &env->insn_idx); + if (err) + return err; + } +@@ -5121,8 +5122,8 @@ static int do_check(struct bpf_verifier_env *env) + if (err) + return err; + +- insn_idx++; +- env->insn_aux_data[insn_idx].seen = true; ++ env->insn_idx++; ++ env->insn_aux_data[env->insn_idx].seen = true; + } else { + verbose(env, "invalid BPF_LD mode\n"); + return -EINVAL; +@@ -5132,7 +5133,7 @@ static int do_check(struct bpf_verifier_env *env) + return -EINVAL; + } + +- insn_idx++; ++ env->insn_idx++; + } + + verbose(env, "processed %d insns (limit %d), stack depth ", +-- +2.19.1 + diff --git a/queue-4.19/bpf-move-tmp-variable-into-ax-register-in-interprete.patch b/queue-4.19/bpf-move-tmp-variable-into-ax-register-in-interprete.patch new file mode 100644 index 00000000000..32aab3f7c78 --- /dev/null +++ b/queue-4.19/bpf-move-tmp-variable-into-ax-register-in-interprete.patch @@ -0,0 +1,130 @@ +From b9750ff1cc957ae81cde2758812e14497b209a3c Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:22 +0100 +Subject: bpf: move tmp variable into ax register in interpreter + +[ commit 144cd91c4c2bced6eb8a7e25e590f6618a11e854 upstream ] + +This change moves the on-stack 64 bit tmp variable in ___bpf_prog_run() +into the hidden ax register. The latter is currently only used in JITs +for constant blinding as a temporary scratch register, meaning the BPF +interpreter will never see the use of ax. Therefore it is safe to use +it for the cases where tmp has been used earlier. This is needed to later +on allow restricted hidden use of ax in both interpreter and JITs. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + include/linux/filter.h | 3 ++- + kernel/bpf/core.c | 34 +++++++++++++++++----------------- + 2 files changed, 19 insertions(+), 18 deletions(-) + +diff --git a/include/linux/filter.h b/include/linux/filter.h +index ec90d5255cf7..81420a0efdbe 100644 +--- a/include/linux/filter.h ++++ b/include/linux/filter.h +@@ -60,7 +60,8 @@ struct sock_reuseport; + * constants. See JIT pre-step in bpf_jit_blind_constants(). + */ + #define BPF_REG_AX MAX_BPF_REG +-#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) ++#define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) ++#define MAX_BPF_JIT_REG MAX_BPF_EXT_REG + + /* unused opcode to mark special call to bpf_tail_call() helper */ + #define BPF_TAIL_CALL 0xf0 +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 3f5bf1af0826..aefc62ae4a1e 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -52,6 +52,7 @@ + #define DST regs[insn->dst_reg] + #define SRC regs[insn->src_reg] + #define FP regs[BPF_REG_FP] ++#define AX regs[BPF_REG_AX] + #define ARG1 regs[BPF_REG_ARG1] + #define CTX regs[BPF_REG_CTX] + #define IMM insn->imm +@@ -971,7 +972,6 @@ bool bpf_opcode_in_insntable(u8 code) + */ + static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) + { +- u64 tmp; + #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y + #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z + static const void *jumptable[256] = { +@@ -1045,36 +1045,36 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) + (*(s64 *) &DST) >>= IMM; + CONT; + ALU64_MOD_X: +- div64_u64_rem(DST, SRC, &tmp); +- DST = tmp; ++ div64_u64_rem(DST, SRC, &AX); ++ DST = AX; + CONT; + ALU_MOD_X: +- tmp = (u32) DST; +- DST = do_div(tmp, (u32) SRC); ++ AX = (u32) DST; ++ DST = do_div(AX, (u32) SRC); + CONT; + ALU64_MOD_K: +- div64_u64_rem(DST, IMM, &tmp); +- DST = tmp; ++ div64_u64_rem(DST, IMM, &AX); ++ DST = AX; + CONT; + ALU_MOD_K: +- tmp = (u32) DST; +- DST = do_div(tmp, (u32) IMM); ++ AX = (u32) DST; ++ DST = do_div(AX, (u32) IMM); + CONT; + ALU64_DIV_X: + DST = div64_u64(DST, SRC); + CONT; + ALU_DIV_X: +- tmp = (u32) DST; +- do_div(tmp, (u32) SRC); +- DST = (u32) tmp; ++ AX = (u32) DST; ++ do_div(AX, (u32) SRC); ++ DST = (u32) AX; + CONT; + ALU64_DIV_K: + DST = div64_u64(DST, IMM); + CONT; + ALU_DIV_K: +- tmp = (u32) DST; +- do_div(tmp, (u32) IMM); +- DST = (u32) tmp; ++ AX = (u32) DST; ++ do_div(AX, (u32) IMM); ++ DST = (u32) AX; + CONT; + ALU_END_TO_BE: + switch (IMM) { +@@ -1330,7 +1330,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ + static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ + { \ + u64 stack[stack_size / sizeof(u64)]; \ +- u64 regs[MAX_BPF_REG]; \ ++ u64 regs[MAX_BPF_EXT_REG]; \ + \ + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ + ARG1 = (u64) (unsigned long) ctx; \ +@@ -1343,7 +1343,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ + const struct bpf_insn *insn) \ + { \ + u64 stack[stack_size / sizeof(u64)]; \ +- u64 regs[MAX_BPF_REG]; \ ++ u64 regs[MAX_BPF_EXT_REG]; \ + \ + FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ + BPF_R1 = r1; \ +-- +2.19.1 + diff --git a/queue-4.19/bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch b/queue-4.19/bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch new file mode 100644 index 00000000000..f1f2d6a90a3 --- /dev/null +++ b/queue-4.19/bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch @@ -0,0 +1,608 @@ +From 054668490b98b1cffa6d01681969bcfc102e10d4 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:28 +0100 +Subject: bpf: prevent out of bounds speculation on pointer arithmetic + +[ commit 979d63d50c0c0f7bc537bf821e056cc9fe5abd38 upstream ] + +Jann reported that the original commit back in b2157399cc98 +("bpf: prevent out-of-bounds speculation") was not sufficient +to stop CPU from speculating out of bounds memory access: +While b2157399cc98 only focussed on masking array map access +for unprivileged users for tail calls and data access such +that the user provided index gets sanitized from BPF program +and syscall side, there is still a more generic form affected +from BPF programs that applies to most maps that hold user +data in relation to dynamic map access when dealing with +unknown scalars or "slow" known scalars as access offset, for +example: + + - Load a map value pointer into R6 + - Load an index into R7 + - Do a slow computation (e.g. with a memory dependency) that + loads a limit into R8 (e.g. load the limit from a map for + high latency, then mask it to make the verifier happy) + - Exit if R7 >= R8 (mispredicted branch) + - Load R0 = R6[R7] + - Load R0 = R6[R0] + +For unknown scalars there are two options in the BPF verifier +where we could derive knowledge from in order to guarantee +safe access to the memory: i) While /<=/>= variants won't +allow to derive any lower or upper bounds from the unknown +scalar where it would be safe to add it to the map value +pointer, it is possible through ==/!= test however. ii) another +option is to transform the unknown scalar into a known scalar, +for example, through ALU ops combination such as R &= +followed by R |= or any similar combination where the +original information from the unknown scalar would be destroyed +entirely leaving R with a constant. The initial slow load still +precedes the latter ALU ops on that register, so the CPU +executes speculatively from that point. Once we have the known +scalar, any compare operation would work then. A third option +only involving registers with known scalars could be crafted +as described in [0] where a CPU port (e.g. Slow Int unit) +would be filled with many dependent computations such that +the subsequent condition depending on its outcome has to wait +for evaluation on its execution port and thereby executing +speculatively if the speculated code can be scheduled on a +different execution port, or any other form of mistraining +as described in [1], for example. Given this is not limited +to only unknown scalars, not only map but also stack access +is affected since both is accessible for unprivileged users +and could potentially be used for out of bounds access under +speculation. + +In order to prevent any of these cases, the verifier is now +sanitizing pointer arithmetic on the offset such that any +out of bounds speculation would be masked in a way where the +pointer arithmetic result in the destination register will +stay unchanged, meaning offset masked into zero similar as +in array_index_nospec() case. With regards to implementation, +there are three options that were considered: i) new insn +for sanitation, ii) push/pop insn and sanitation as inlined +BPF, iii) reuse of ax register and sanitation as inlined BPF. + +Option i) has the downside that we end up using from reserved +bits in the opcode space, but also that we would require +each JIT to emit masking as native arch opcodes meaning +mitigation would have slow adoption till everyone implements +it eventually which is counter-productive. Option ii) and iii) +have both in common that a temporary register is needed in +order to implement the sanitation as inlined BPF since we +are not allowed to modify the source register. While a push / +pop insn in ii) would be useful to have in any case, it +requires once again that every JIT needs to implement it +first. While possible, amount of changes needed would also +be unsuitable for a -stable patch. Therefore, the path which +has fewer changes, less BPF instructions for the mitigation +and does not require anything to be changed in the JITs is +option iii) which this work is pursuing. The ax register is +already mapped to a register in all JITs (modulo arm32 where +it's mapped to stack as various other BPF registers there) +and used in constant blinding for JITs-only so far. It can +be reused for verifier rewrites under certain constraints. +The interpreter's tmp "register" has therefore been remapped +into extending the register set with hidden ax register and +reusing that for a number of instructions that needed the +prior temporary variable internally (e.g. div, mod). This +allows for zero increase in stack space usage in the interpreter, +and enables (restricted) generic use in rewrites otherwise as +long as such a patchlet does not make use of these instructions. +The sanitation mask is dynamic and relative to the offset the +map value or stack pointer currently holds. + +There are various cases that need to be taken under consideration +for the masking, e.g. such operation could look as follows: +ptr += val or val += ptr or ptr -= val. Thus, the value to be +sanitized could reside either in source or in destination +register, and the limit is different depending on whether +the ALU op is addition or subtraction and depending on the +current known and bounded offset. The limit is derived as +follows: limit := max_value_size - (smin_value + off). For +subtraction: limit := umax_value + off. This holds because +we do not allow any pointer arithmetic that would +temporarily go out of bounds or would have an unknown +value with mixed signed bounds where it is unclear at +verification time whether the actual runtime value would +be either negative or positive. For example, we have a +derived map pointer value with constant offset and bounded +one, so limit based on smin_value works because the verifier +requires that statically analyzed arithmetic on the pointer +must be in bounds, and thus it checks if resulting +smin_value + off and umax_value + off is still within map +value bounds at time of arithmetic in addition to time of +access. Similarly, for the case of stack access we derive +the limit as follows: MAX_BPF_STACK + off for subtraction +and -off for the case of addition where off := ptr_reg->off + +ptr_reg->var_off.value. Subtraction is a special case for +the masking which can be in form of ptr += -val, ptr -= -val, +or ptr -= val. In the first two cases where we know that +the value is negative, we need to temporarily negate the +value in order to do the sanitation on a positive value +where we later swap the ALU op, and restore original source +register if the value was in source. + +The sanitation of pointer arithmetic alone is still not fully +sufficient as is, since a scenario like the following could +happen ... + + PTR += 0x1000 (e.g. K-based imm) + PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON + PTR += 0x1000 + PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON + [...] + +... which under speculation could end up as ... + + PTR += 0x1000 + PTR -= 0 [ truncated by mitigation ] + PTR += 0x1000 + PTR -= 0 [ truncated by mitigation ] + [...] + +... and therefore still access out of bounds. To prevent such +case, the verifier is also analyzing safety for potential out +of bounds access under speculative execution. Meaning, it is +also simulating pointer access under truncation. We therefore +"branch off" and push the current verification state after the +ALU operation with known 0 to the verification stack for later +analysis. Given the current path analysis succeeded it is +likely that the one under speculation can be pruned. In any +case, it is also subject to existing complexity limits and +therefore anything beyond this point will be rejected. In +terms of pruning, it needs to be ensured that the verification +state from speculative execution simulation must never prune +a non-speculative execution path, therefore, we mark verifier +state accordingly at the time of push_stack(). If verifier +detects out of bounds access under speculative execution from +one of the possible paths that includes a truncation, it will +reject such program. + +Given we mask every reg-based pointer arithmetic for +unprivileged programs, we've been looking into how it could +affect real-world programs in terms of size increase. As the +majority of programs are targeted for privileged-only use +case, we've unconditionally enabled masking (with its alu +restrictions on top of it) for privileged programs for the +sake of testing in order to check i) whether they get rejected +in its current form, and ii) by how much the number of +instructions and size will increase. We've tested this by +using Katran, Cilium and test_l4lb from the kernel selftests. +For Katran we've evaluated balancer_kern.o, Cilium bpf_lxc.o +and an older test object bpf_lxc_opt_-DUNKNOWN.o and l4lb +we've used test_l4lb.o as well as test_l4lb_noinline.o. We +found that none of the programs got rejected by the verifier +with this change, and that impact is rather minimal to none. +balancer_kern.o had 13,904 bytes (1,738 insns) xlated and +7,797 bytes JITed before and after the change. Most complex +program in bpf_lxc.o had 30,544 bytes (3,817 insns) xlated +and 18,538 bytes JITed before and after and none of the other +tail call programs in bpf_lxc.o had any changes either. For +the older bpf_lxc_opt_-DUNKNOWN.o object we found a small +increase from 20,616 bytes (2,576 insns) and 12,536 bytes JITed +before to 20,664 bytes (2,582 insns) and 12,558 bytes JITed +after the change. Other programs from that object file had +similar small increase. Both test_l4lb.o had no change and +remained at 6,544 bytes (817 insns) xlated and 3,401 bytes +JITed and for test_l4lb_noinline.o constant at 5,080 bytes +(634 insns) xlated and 3,313 bytes JITed. This can be explained +in that LLVM typically optimizes stack based pointer arithmetic +by using K-based operations and that use of dynamic map access +is not overly frequent. However, in future we may decide to +optimize the algorithm further under known guarantees from +branch and value speculation. Latter seems also unclear in +terms of prediction heuristics that today's CPUs apply as well +as whether there could be collisions in e.g. the predictor's +Value History/Pattern Table for triggering out of bounds access, +thus masking is performed unconditionally at this point but could +be subject to relaxation later on. We were generally also +brainstorming various other approaches for mitigation, but the +blocker was always lack of available registers at runtime and/or +overhead for runtime tracking of limits belonging to a specific +pointer. Thus, we found this to be minimally intrusive under +given constraints. + +With that in place, a simple example with sanitized access on +unprivileged load at post-verification time looks as follows: + + # bpftool prog dump xlated id 282 + [...] + 28: (79) r1 = *(u64 *)(r7 +0) + 29: (79) r2 = *(u64 *)(r7 +8) + 30: (57) r1 &= 15 + 31: (79) r3 = *(u64 *)(r0 +4608) + 32: (57) r3 &= 1 + 33: (47) r3 |= 1 + 34: (2d) if r2 > r3 goto pc+19 + 35: (b4) (u32) r11 = (u32) 20479 | + 36: (1f) r11 -= r2 | Dynamic sanitation for pointer + 37: (4f) r11 |= r2 | arithmetic with registers + 38: (87) r11 = -r11 | containing bounded or known + 39: (c7) r11 s>>= 63 | scalars in order to prevent + 40: (5f) r11 &= r2 | out of bounds speculation. + 41: (0f) r4 += r11 | + 42: (71) r4 = *(u8 *)(r4 +0) + 43: (6f) r4 <<= r1 + [...] + +For the case where the scalar sits in the destination register +as opposed to the source register, the following code is emitted +for the above example: + + [...] + 16: (b4) (u32) r11 = (u32) 20479 + 17: (1f) r11 -= r2 + 18: (4f) r11 |= r2 + 19: (87) r11 = -r11 + 20: (c7) r11 s>>= 63 + 21: (5f) r2 &= r11 + 22: (0f) r2 += r0 + 23: (61) r0 = *(u32 *)(r2 +0) + [...] + +JIT blinding example with non-conflicting use of r10: + + [...] + d5: je 0x0000000000000106 _ + d7: mov 0x0(%rax),%edi | + da: mov $0xf153246,%r10d | Index load from map value and + e0: xor $0xf153259,%r10 | (const blinded) mask with 0x1f. + e7: and %r10,%rdi |_ + ea: mov $0x2f,%r10d | + f0: sub %rdi,%r10 | Sanitized addition. Both use r10 + f3: or %rdi,%r10 | but do not interfere with each + f6: neg %r10 | other. (Neither do these instructions + f9: sar $0x3f,%r10 | interfere with the use of ax as temp + fd: and %r10,%rdi | in interpreter.) + 100: add %rax,%rdi |_ + 103: mov 0x0(%rdi),%eax + [...] + +Tested that it fixes Jann's reproducer, and also checked that test_verifier +and test_progs suite with interpreter, JIT and JIT with hardening enabled +on x86-64 and arm64 runs successfully. + + [0] Speculose: Analyzing the Security Implications of Speculative + Execution in CPUs, Giorgi Maisuradze and Christian Rossow, + https://arxiv.org/pdf/1801.04084.pdf + + [1] A Systematic Evaluation of Transient Execution Attacks and + Defenses, Claudio Canella, Jo Van Bulck, Michael Schwarz, + Moritz Lipp, Benjamin von Berg, Philipp Ortner, Frank Piessens, + Dmitry Evtyushkin, Daniel Gruss, + https://arxiv.org/pdf/1811.05441.pdf + +Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") +Reported-by: Jann Horn +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + include/linux/bpf_verifier.h | 10 ++ + kernel/bpf/verifier.c | 186 +++++++++++++++++++++++++++++++++-- + 2 files changed, 189 insertions(+), 7 deletions(-) + +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index b1587ffdea7b..2716dcbfb2f1 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -134,6 +134,7 @@ struct bpf_verifier_state { + struct bpf_func_state *frame[MAX_CALL_FRAMES]; + struct bpf_verifier_state *parent; + u32 curframe; ++ bool speculative; + }; + + /* linked list of verifier states used to prune search */ +@@ -142,15 +143,24 @@ struct bpf_verifier_state_list { + struct bpf_verifier_state_list *next; + }; + ++/* Possible states for alu_state member. */ ++#define BPF_ALU_SANITIZE_SRC 1U ++#define BPF_ALU_SANITIZE_DST 2U ++#define BPF_ALU_NEG_VALUE (1U << 2) ++#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ ++ BPF_ALU_SANITIZE_DST) ++ + struct bpf_insn_aux_data { + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + unsigned long map_state; /* pointer/poison value for maps */ + s32 call_imm; /* saved imm field of call insn */ ++ u32 alu_limit; /* limit for add/sub register with pointer */ + }; + int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int sanitize_stack_off; /* stack slot to be cleared */ + bool seen; /* this insn was processed by the verifier */ ++ u8 alu_state; /* used in combination with alu_limit */ + }; + + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index f9d5aea4891d..9ab0adedb174 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -466,6 +466,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, + free_func_state(dst_state->frame[i]); + dst_state->frame[i] = NULL; + } ++ dst_state->speculative = src->speculative; + dst_state->curframe = src->curframe; + dst_state->parent = src->parent; + for (i = 0; i <= src->curframe; i++) { +@@ -511,7 +512,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, + } + + static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, +- int insn_idx, int prev_insn_idx) ++ int insn_idx, int prev_insn_idx, ++ bool speculative) + { + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_verifier_stack_elem *elem; +@@ -529,6 +531,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, + err = copy_verifier_state(&elem->st, cur); + if (err) + goto err; ++ elem->st.speculative |= speculative; + if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { + verbose(env, "BPF program is too complex\n"); + goto err; +@@ -2698,6 +2701,102 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, + return true; + } + ++static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) ++{ ++ return &env->insn_aux_data[env->insn_idx]; ++} ++ ++static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, ++ u32 *ptr_limit, u8 opcode, bool off_is_neg) ++{ ++ bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || ++ (opcode == BPF_SUB && !off_is_neg); ++ u32 off; ++ ++ switch (ptr_reg->type) { ++ case PTR_TO_STACK: ++ off = ptr_reg->off + ptr_reg->var_off.value; ++ if (mask_to_left) ++ *ptr_limit = MAX_BPF_STACK + off; ++ else ++ *ptr_limit = -off; ++ return 0; ++ case PTR_TO_MAP_VALUE: ++ if (mask_to_left) { ++ *ptr_limit = ptr_reg->umax_value + ptr_reg->off; ++ } else { ++ off = ptr_reg->smin_value + ptr_reg->off; ++ *ptr_limit = ptr_reg->map_ptr->value_size - off; ++ } ++ return 0; ++ default: ++ return -EINVAL; ++ } ++} ++ ++static int sanitize_ptr_alu(struct bpf_verifier_env *env, ++ struct bpf_insn *insn, ++ const struct bpf_reg_state *ptr_reg, ++ struct bpf_reg_state *dst_reg, ++ bool off_is_neg) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_insn_aux_data *aux = cur_aux(env); ++ bool ptr_is_dst_reg = ptr_reg == dst_reg; ++ u8 opcode = BPF_OP(insn->code); ++ u32 alu_state, alu_limit; ++ struct bpf_reg_state tmp; ++ bool ret; ++ ++ if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) ++ return 0; ++ ++ /* We already marked aux for masking from non-speculative ++ * paths, thus we got here in the first place. We only care ++ * to explore bad access from here. ++ */ ++ if (vstate->speculative) ++ goto do_sim; ++ ++ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; ++ alu_state |= ptr_is_dst_reg ? ++ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; ++ ++ if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) ++ return 0; ++ ++ /* If we arrived here from different branches with different ++ * limits to sanitize, then this won't work. ++ */ ++ if (aux->alu_state && ++ (aux->alu_state != alu_state || ++ aux->alu_limit != alu_limit)) ++ return -EACCES; ++ ++ /* Corresponding fixup done in fixup_bpf_calls(). */ ++ aux->alu_state = alu_state; ++ aux->alu_limit = alu_limit; ++ ++do_sim: ++ /* Simulate and find potential out-of-bounds access under ++ * speculative execution from truncation as a result of ++ * masking when off was not within expected range. If off ++ * sits in dst, then we temporarily need to move ptr there ++ * to simulate dst (== 0) +/-= ptr. Needed, for example, ++ * for cases where we use K-based arithmetic in one direction ++ * and truncated reg-based in the other in order to explore ++ * bad access. ++ */ ++ if (!ptr_is_dst_reg) { ++ tmp = *dst_reg; ++ *dst_reg = *ptr_reg; ++ } ++ ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); ++ if (!ptr_is_dst_reg) ++ *dst_reg = tmp; ++ return !ret ? -EFAULT : 0; ++} ++ + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. + * Caller should also handle BPF_MOV case separately. + * If we return -EACCES, caller may want to try again treating pointer as a +@@ -2718,6 +2817,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + u32 dst = insn->dst_reg, src = insn->src_reg; + u8 opcode = BPF_OP(insn->code); ++ int ret; + + dst_reg = ®s[dst]; + +@@ -2772,6 +2872,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + + switch (opcode) { + case BPF_ADD: ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); ++ if (ret < 0) { ++ verbose(env, "R%d tried to add from different maps or paths\n", dst); ++ return ret; ++ } + /* We can take a fixed offset as long as it doesn't overflow + * the s32 'off' field + */ +@@ -2822,6 +2927,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + } + break; + case BPF_SUB: ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); ++ if (ret < 0) { ++ verbose(env, "R%d tried to sub from different maps or paths\n", dst); ++ return ret; ++ } + if (dst_reg == off_reg) { + /* scalar -= pointer. Creates an unknown scalar */ + verbose(env, "R%d tried to subtract pointer from scalar\n", +@@ -3997,7 +4107,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, + } + } + +- other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); ++ other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, ++ false); + if (!other_branch) + return -EFAULT; + other_branch_regs = other_branch->frame[other_branch->curframe]->regs; +@@ -4712,6 +4823,12 @@ static bool states_equal(struct bpf_verifier_env *env, + if (old->curframe != cur->curframe) + return false; + ++ /* Verification state from speculative execution simulation ++ * must never prune a non-speculative execution one. ++ */ ++ if (old->speculative && !cur->speculative) ++ return false; ++ + /* for states to be equal callsites have to be the same + * and all frame states need to be equivalent + */ +@@ -4863,7 +4980,7 @@ static int do_check(struct bpf_verifier_env *env) + if (!state) + return -ENOMEM; + state->curframe = 0; +- state->parent = NULL; ++ state->speculative = false; + state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); + if (!state->frame[0]) { + kfree(state); +@@ -4903,8 +5020,10 @@ static int do_check(struct bpf_verifier_env *env) + /* found equivalent state, can prune the search */ + if (env->log.level) { + if (do_print_state) +- verbose(env, "\nfrom %d to %d: safe\n", +- env->prev_insn_idx, env->insn_idx); ++ verbose(env, "\nfrom %d to %d%s: safe\n", ++ env->prev_insn_idx, env->insn_idx, ++ env->cur_state->speculative ? ++ " (speculative execution)" : ""); + else + verbose(env, "%d: safe\n", env->insn_idx); + } +@@ -4921,8 +5040,10 @@ static int do_check(struct bpf_verifier_env *env) + if (env->log.level > 1) + verbose(env, "%d:", env->insn_idx); + else +- verbose(env, "\nfrom %d to %d:", +- env->prev_insn_idx, env->insn_idx); ++ verbose(env, "\nfrom %d to %d%s:", ++ env->prev_insn_idx, env->insn_idx, ++ env->cur_state->speculative ? ++ " (speculative execution)" : ""); + print_verifier_state(env, state->frame[state->curframe]); + do_print_state = false; + } +@@ -5869,6 +5990,57 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) + continue; + } + ++ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || ++ insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { ++ const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; ++ const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; ++ struct bpf_insn insn_buf[16]; ++ struct bpf_insn *patch = &insn_buf[0]; ++ bool issrc, isneg; ++ u32 off_reg; ++ ++ aux = &env->insn_aux_data[i + delta]; ++ if (!aux->alu_state) ++ continue; ++ ++ isneg = aux->alu_state & BPF_ALU_NEG_VALUE; ++ issrc = (aux->alu_state & BPF_ALU_SANITIZE) == ++ BPF_ALU_SANITIZE_SRC; ++ ++ off_reg = issrc ? insn->src_reg : insn->dst_reg; ++ if (isneg) ++ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); ++ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); ++ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); ++ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); ++ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); ++ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); ++ if (issrc) { ++ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, ++ off_reg); ++ insn->src_reg = BPF_REG_AX; ++ } else { ++ *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, ++ BPF_REG_AX); ++ } ++ if (isneg) ++ insn->code = insn->code == code_add ? ++ code_sub : code_add; ++ *patch++ = *insn; ++ if (issrc && isneg) ++ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); ++ cnt = patch - insn_buf; ++ ++ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); ++ if (!new_prog) ++ return -ENOMEM; ++ ++ delta += cnt - 1; ++ env->prog = prog = new_prog; ++ insn = new_prog->insnsi + i + delta; ++ continue; ++ } ++ + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; + if (insn->src_reg == BPF_PSEUDO_CALL) +-- +2.19.1 + diff --git a/queue-4.19/bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch b/queue-4.19/bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch new file mode 100644 index 00000000000..9e11d2c6b30 --- /dev/null +++ b/queue-4.19/bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch @@ -0,0 +1,51 @@ +From 5a41b1860e320e8d921b785115460e4ff80dc0eb Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:24 +0100 +Subject: bpf: restrict map value pointer arithmetic for unprivileged + +[ commit 0d6303db7970e6f56ae700fa07e11eb510cda125 upstream ] + +Restrict map value pointer arithmetic for unprivileged users in that +arithmetic itself must not go out of bounds as opposed to the actual +access later on. Therefore after each adjust_ptr_min_max_vals() with a +map value pointer as a destination it will simulate a check_map_access() +of 1 byte on the destination and once that fails the program is rejected +for unprivileged program loads. We use this later on for masking any +pointer arithmetic with the remainder of the map value space. The +likelihood of breaking any existing real-world unprivileged eBPF +program is very small for this corner case. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 6d11320998c6..110ca915cfb6 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2880,6 +2880,17 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + __update_reg_bounds(dst_reg); + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); ++ ++ /* For unprivileged we require that resulting offset must be in bounds ++ * in order to be able to sanitize access later on. ++ */ ++ if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && ++ check_map_access(env, dst, dst_reg->off, 1, false)) { ++ verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", ++ dst); ++ return -EACCES; ++ } ++ + return 0; + } + +-- +2.19.1 + diff --git a/queue-4.19/bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch b/queue-4.19/bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch new file mode 100644 index 00000000000..1308452257c --- /dev/null +++ b/queue-4.19/bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch @@ -0,0 +1,116 @@ +From f2685b0fc59fc7f3debc0502020b8134277770a7 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:25 +0100 +Subject: bpf: restrict stack pointer arithmetic for unprivileged + +[ commit e4298d25830a866cc0f427d4bccb858e76715859 upstream ] + +Restrict stack pointer arithmetic for unprivileged users in that +arithmetic itself must not go out of bounds as opposed to the actual +access later on. Therefore after each adjust_ptr_min_max_vals() with +a stack pointer as a destination we simulate a check_stack_access() +of 1 byte on the destination and once that fails the program is +rejected for unprivileged program loads. This is analog to map +value pointer arithmetic and needed for masking later on. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 63 ++++++++++++++++++++++++++++--------------- + 1 file changed, 41 insertions(+), 22 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 110ca915cfb6..aa2944d54e7a 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1238,6 +1238,31 @@ static int check_stack_read(struct bpf_verifier_env *env, + } + } + ++static int check_stack_access(struct bpf_verifier_env *env, ++ const struct bpf_reg_state *reg, ++ int off, int size) ++{ ++ /* Stack accesses must be at a fixed offset, so that we ++ * can determine what type of data were returned. See ++ * check_stack_read(). ++ */ ++ if (!tnum_is_const(reg->var_off)) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "variable stack access var_off=%s off=%d size=%d", ++ tn_buf, off, size); ++ return -EACCES; ++ } ++ ++ if (off >= 0 || off < -MAX_BPF_STACK) { ++ verbose(env, "invalid stack off=%d size=%d\n", off, size); ++ return -EACCES; ++ } ++ ++ return 0; ++} ++ + /* check read/write into map element returned by bpf_map_lookup_elem() */ + static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, + int size, bool zero_size_allowed) +@@ -1736,24 +1761,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + } + + } else if (reg->type == PTR_TO_STACK) { +- /* stack accesses must be at a fixed offset, so that we can +- * determine what type of data were returned. +- * See check_stack_read(). +- */ +- if (!tnum_is_const(reg->var_off)) { +- char tn_buf[48]; +- +- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); +- verbose(env, "variable stack access var_off=%s off=%d size=%d", +- tn_buf, off, size); +- return -EACCES; +- } + off += reg->var_off.value; +- if (off >= 0 || off < -MAX_BPF_STACK) { +- verbose(env, "invalid stack off=%d size=%d\n", off, +- size); +- return -EACCES; +- } ++ err = check_stack_access(env, reg, off, size); ++ if (err) ++ return err; + + state = func(env, reg); + err = update_stack_depth(env, state, off); +@@ -2884,11 +2895,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ +- if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && +- check_map_access(env, dst, dst_reg->off, 1, false)) { +- verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", +- dst); +- return -EACCES; ++ if (!env->allow_ptr_leaks) { ++ if (dst_reg->type == PTR_TO_MAP_VALUE && ++ check_map_access(env, dst, dst_reg->off, 1, false)) { ++ verbose(env, "R%d pointer arithmetic of map value goes out of range, " ++ "prohibited for !root\n", dst); ++ return -EACCES; ++ } else if (dst_reg->type == PTR_TO_STACK && ++ check_stack_access(env, dst_reg, dst_reg->off + ++ dst_reg->var_off.value, 1)) { ++ verbose(env, "R%d stack pointer arithmetic goes out of range, " ++ "prohibited for !root\n", dst); ++ return -EACCES; ++ } + } + + return 0; +-- +2.19.1 + diff --git a/queue-4.19/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch b/queue-4.19/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch new file mode 100644 index 00000000000..97730012810 --- /dev/null +++ b/queue-4.19/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch @@ -0,0 +1,54 @@ +From 91b541fcd73c3735479ac2018aecab3c0ed6a9b2 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Mon, 28 Jan 2019 21:28:26 +0100 +Subject: bpf: restrict unknown scalars of mixed signed bounds for unprivileged + +[ commit 9d7eceede769f90b66cfa06ad5b357140d5141ed upstream ] + +For unknown scalars of mixed signed bounds, meaning their smin_value is +negative and their smax_value is positive, we need to reject arithmetic +with pointer to map value. For unprivileged the goal is to mask every +map pointer arithmetic and this cannot reliably be done when it is +unknown at verification time whether the scalar value is negative or +positive. Given this is a corner case, the likelihood of breaking should +be very small. + +Signed-off-by: Daniel Borkmann +Acked-by: Alexei Starovoitov +Signed-off-by: Alexei Starovoitov +Signed-off-by: Daniel Borkmann +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index aa2944d54e7a..fbaa3b9e1d71 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2712,8 +2712,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; + u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, + umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; ++ u32 dst = insn->dst_reg, src = insn->src_reg; + u8 opcode = BPF_OP(insn->code); +- u32 dst = insn->dst_reg; + + dst_reg = ®s[dst]; + +@@ -2749,6 +2749,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + dst); + return -EACCES; + } ++ if (ptr_reg->type == PTR_TO_MAP_VALUE && ++ !env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { ++ verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", ++ off_reg == dst_reg ? dst : src); ++ return -EACCES; ++ } + + /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. + * The id may be overwritten later if we create a new variable offset. +-- +2.19.1 + diff --git a/queue-4.19/series b/queue-4.19/series index c906d1a1c4f..a0fe7124c95 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -83,3 +83,15 @@ vt-make-vt_console_print-compatible-with-the-unicode-screen-buffer.patch vt-always-call-notifier-with-the-console-lock-held.patch vt-invoke-notifier-on-screen-size-change.patch drm-meson-fix-atomic-mode-switching-regression.patch +bpf-improve-verifier-branch-analysis.patch +bpf-add-per-insn-complexity-limit.patch +bpf-move-prev_-insn_idx-into-verifier-env.patch +bpf-move-tmp-variable-into-ax-register-in-interprete.patch +bpf-enable-access-to-ax-register-also-from-verifier-.patch +bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch +bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch +bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch +bpf-fix-check_map_access-smin_value-test-when-pointe.patch +bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch +bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch +bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch