From: Sasha Levin Date: Sun, 25 Apr 2021 18:18:05 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v4.4.268~24 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=579f1c9bda2dd0c4a934513a842a2095b26f318b;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/bpf-allow-variable-offset-stack-access.patch b/queue-5.10/bpf-allow-variable-offset-stack-access.patch new file mode 100644 index 00000000000..e3ef7e0fbf9 --- /dev/null +++ b/queue-5.10/bpf-allow-variable-offset-stack-access.patch @@ -0,0 +1,945 @@ +From 3e1ab8301f1fefaccfcc1491b5dbc75ece355783 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 6 Feb 2021 20:10:24 -0500 +Subject: bpf: Allow variable-offset stack access + +From: Andrei Matei + +[ Upstream commit 01f810ace9ed37255f27608a0864abebccf0aab3 ] + +Before this patch, variable offset access to the stack was dissalowed +for regular instructions, but was allowed for "indirect" accesses (i.e. +helpers). This patch removes the restriction, allowing reading and +writing to the stack through stack pointers with variable offsets. This +makes stack-allocated buffers more usable in programs, and brings stack +pointers closer to other types of pointers. + +The motivation is being able to use stack-allocated buffers for data +manipulation. When the stack size limit is sufficient, allocating +buffers on the stack is simpler than per-cpu arrays, or other +alternatives. + +In unpriviledged programs, variable-offset reads and writes are +disallowed (they were already disallowed for the indirect access case) +because the speculative execution checking code doesn't support them. +Additionally, when writing through a variable-offset stack pointer, if +any pointers are in the accessible range, there's possilibities of later +leaking pointers because the write cannot be tracked precisely. + +Writes with variable offset mark the whole range as initialized, even +though we don't know which stack slots are actually written. This is in +order to not reject future reads to these slots. Note that this doesn't +affect writes done through helpers; like before, helpers need the whole +stack range to be initialized to begin with. +All the stack slots are in range are considered scalars after the write; +variable-offset register spills are not tracked. + +For reads, all the stack slots in the variable range needs to be +initialized (but see above about what writes do), otherwise the read is +rejected. All register spilled in stack slots that might be read are +marked as having been read, however reads through such pointers don't do +register filling; the target register will always be either a scalar or +a constant zero. + +Signed-off-by: Andrei Matei +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20210207011027.676572-2-andreimatei1@gmail.com +Signed-off-by: Sasha Levin +--- + include/linux/bpf.h | 5 + + include/linux/bpf_verifier.h | 3 +- + kernel/bpf/verifier.c | 657 +++++++++++++++++++++++++++-------- + 3 files changed, 518 insertions(+), 147 deletions(-) + +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index b416bba3a62b..8ad819132dde 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1259,6 +1259,11 @@ static inline bool bpf_allow_ptr_leaks(void) + return perfmon_capable(); + } + ++static inline bool bpf_allow_uninit_stack(void) ++{ ++ return perfmon_capable(); ++} ++ + static inline bool bpf_allow_ptr_to_map_access(void) + { + return perfmon_capable(); +diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h +index e83ef6f6bf43..85bac3191e12 100644 +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -187,7 +187,7 @@ struct bpf_func_state { + * 0 = main function, 1 = first callee. + */ + u32 frameno; +- /* subprog number == index within subprog_stack_depth ++ /* subprog number == index within subprog_info + * zero == main subprog + */ + u32 subprogno; +@@ -390,6 +390,7 @@ struct bpf_verifier_env { + u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; ++ bool allow_uninit_stack; + bool allow_ptr_to_map_access; + bool bpf_capable; + bool bypass_spec_v1; +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 2e09e691a6be..94923c2bdd81 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -2268,12 +2268,14 @@ static void save_register_state(struct bpf_func_state *state, + state->stack[spi].slot_type[i] = STACK_SPILL; + } + +-/* check_stack_read/write functions track spill/fill of registers, ++/* check_stack_{read,write}_fixed_off functions track spill/fill of registers, + * stack boundary and alignment are checked in check_mem_access() + */ +-static int check_stack_write(struct bpf_verifier_env *env, +- struct bpf_func_state *state, /* func where register points to */ +- int off, int size, int value_regno, int insn_idx) ++static int check_stack_write_fixed_off(struct bpf_verifier_env *env, ++ /* stack frame we're writing to */ ++ struct bpf_func_state *state, ++ int off, int size, int value_regno, ++ int insn_idx) + { + struct bpf_func_state *cur; /* state of the current function */ + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; +@@ -2399,9 +2401,175 @@ static int check_stack_write(struct bpf_verifier_env *env, + return 0; + } + +-static int check_stack_read(struct bpf_verifier_env *env, +- struct bpf_func_state *reg_state /* func where register points to */, +- int off, int size, int value_regno) ++/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is ++ * known to contain a variable offset. ++ * This function checks whether the write is permitted and conservatively ++ * tracks the effects of the write, considering that each stack slot in the ++ * dynamic range is potentially written to. ++ * ++ * 'off' includes 'regno->off'. ++ * 'value_regno' can be -1, meaning that an unknown value is being written to ++ * the stack. ++ * ++ * Spilled pointers in range are not marked as written because we don't know ++ * what's going to be actually written. This means that read propagation for ++ * future reads cannot be terminated by this write. ++ * ++ * For privileged programs, uninitialized stack slots are considered ++ * initialized by this write (even though we don't know exactly what offsets ++ * are going to be written to). The idea is that we don't want the verifier to ++ * reject future reads that access slots written to through variable offsets. ++ */ ++static int check_stack_write_var_off(struct bpf_verifier_env *env, ++ /* func where register points to */ ++ struct bpf_func_state *state, ++ int ptr_regno, int off, int size, ++ int value_regno, int insn_idx) ++{ ++ struct bpf_func_state *cur; /* state of the current function */ ++ int min_off, max_off; ++ int i, err; ++ struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL; ++ bool writing_zero = false; ++ /* set if the fact that we're writing a zero is used to let any ++ * stack slots remain STACK_ZERO ++ */ ++ bool zero_used = false; ++ ++ cur = env->cur_state->frame[env->cur_state->curframe]; ++ ptr_reg = &cur->regs[ptr_regno]; ++ min_off = ptr_reg->smin_value + off; ++ max_off = ptr_reg->smax_value + off + size; ++ if (value_regno >= 0) ++ value_reg = &cur->regs[value_regno]; ++ if (value_reg && register_is_null(value_reg)) ++ writing_zero = true; ++ ++ err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), ++ state->acquired_refs, true); ++ if (err) ++ return err; ++ ++ ++ /* Variable offset writes destroy any spilled pointers in range. */ ++ for (i = min_off; i < max_off; i++) { ++ u8 new_type, *stype; ++ int slot, spi; ++ ++ slot = -i - 1; ++ spi = slot / BPF_REG_SIZE; ++ stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; ++ ++ if (!env->allow_ptr_leaks ++ && *stype != NOT_INIT ++ && *stype != SCALAR_VALUE) { ++ /* Reject the write if there's are spilled pointers in ++ * range. If we didn't reject here, the ptr status ++ * would be erased below (even though not all slots are ++ * actually overwritten), possibly opening the door to ++ * leaks. ++ */ ++ verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", ++ insn_idx, i); ++ return -EINVAL; ++ } ++ ++ /* Erase all spilled pointers. */ ++ state->stack[spi].spilled_ptr.type = NOT_INIT; ++ ++ /* Update the slot type. */ ++ new_type = STACK_MISC; ++ if (writing_zero && *stype == STACK_ZERO) { ++ new_type = STACK_ZERO; ++ zero_used = true; ++ } ++ /* If the slot is STACK_INVALID, we check whether it's OK to ++ * pretend that it will be initialized by this write. The slot ++ * might not actually be written to, and so if we mark it as ++ * initialized future reads might leak uninitialized memory. ++ * For privileged programs, we will accept such reads to slots ++ * that may or may not be written because, if we're reject ++ * them, the error would be too confusing. ++ */ ++ if (*stype == STACK_INVALID && !env->allow_uninit_stack) { ++ verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", ++ insn_idx, i); ++ return -EINVAL; ++ } ++ *stype = new_type; ++ } ++ if (zero_used) { ++ /* backtracking doesn't work for STACK_ZERO yet. */ ++ err = mark_chain_precision(env, value_regno); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/* When register 'dst_regno' is assigned some values from stack[min_off, ++ * max_off), we set the register's type according to the types of the ++ * respective stack slots. If all the stack values are known to be zeros, then ++ * so is the destination reg. Otherwise, the register is considered to be ++ * SCALAR. This function does not deal with register filling; the caller must ++ * ensure that all spilled registers in the stack range have been marked as ++ * read. ++ */ ++static void mark_reg_stack_read(struct bpf_verifier_env *env, ++ /* func where src register points to */ ++ struct bpf_func_state *ptr_state, ++ int min_off, int max_off, int dst_regno) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ int i, slot, spi; ++ u8 *stype; ++ int zeros = 0; ++ ++ for (i = min_off; i < max_off; i++) { ++ slot = -i - 1; ++ spi = slot / BPF_REG_SIZE; ++ stype = ptr_state->stack[spi].slot_type; ++ if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) ++ break; ++ zeros++; ++ } ++ if (zeros == max_off - min_off) { ++ /* any access_size read into register is zero extended, ++ * so the whole register == const_zero ++ */ ++ __mark_reg_const_zero(&state->regs[dst_regno]); ++ /* backtracking doesn't support STACK_ZERO yet, ++ * so mark it precise here, so that later ++ * backtracking can stop here. ++ * Backtracking may not need this if this register ++ * doesn't participate in pointer adjustment. ++ * Forward propagation of precise flag is not ++ * necessary either. This mark is only to stop ++ * backtracking. Any register that contributed ++ * to const 0 was marked precise before spill. ++ */ ++ state->regs[dst_regno].precise = true; ++ } else { ++ /* have read misc data from the stack */ ++ mark_reg_unknown(env, state->regs, dst_regno); ++ } ++ state->regs[dst_regno].live |= REG_LIVE_WRITTEN; ++} ++ ++/* Read the stack at 'off' and put the results into the register indicated by ++ * 'dst_regno'. It handles reg filling if the addressed stack slot is a ++ * spilled reg. ++ * ++ * 'dst_regno' can be -1, meaning that the read value is not going to a ++ * register. ++ * ++ * The access is assumed to be within the current stack bounds. ++ */ ++static int check_stack_read_fixed_off(struct bpf_verifier_env *env, ++ /* func where src register points to */ ++ struct bpf_func_state *reg_state, ++ int off, int size, int dst_regno) + { + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; +@@ -2409,11 +2577,6 @@ static int check_stack_read(struct bpf_verifier_env *env, + struct bpf_reg_state *reg; + u8 *stype; + +- if (reg_state->allocated_stack <= slot) { +- verbose(env, "invalid read from stack off %d+0 size %d\n", +- off, size); +- return -EACCES; +- } + stype = reg_state->stack[spi].slot_type; + reg = ®_state->stack[spi].spilled_ptr; + +@@ -2424,9 +2587,9 @@ static int check_stack_read(struct bpf_verifier_env *env, + verbose(env, "invalid size of register fill\n"); + return -EACCES; + } +- if (value_regno >= 0) { +- mark_reg_unknown(env, state->regs, value_regno); +- state->regs[value_regno].live |= REG_LIVE_WRITTEN; ++ if (dst_regno >= 0) { ++ mark_reg_unknown(env, state->regs, dst_regno); ++ state->regs[dst_regno].live |= REG_LIVE_WRITTEN; + } + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + return 0; +@@ -2438,16 +2601,16 @@ static int check_stack_read(struct bpf_verifier_env *env, + } + } + +- if (value_regno >= 0) { ++ if (dst_regno >= 0) { + /* restore register state from stack */ +- state->regs[value_regno] = *reg; ++ state->regs[dst_regno] = *reg; + /* mark reg as written since spilled pointer state likely + * has its liveness marks cleared by is_state_visited() + * which resets stack/reg liveness for state transitions + */ +- state->regs[value_regno].live |= REG_LIVE_WRITTEN; ++ state->regs[dst_regno].live |= REG_LIVE_WRITTEN; + } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { +- /* If value_regno==-1, the caller is asking us whether ++ /* If dst_regno==-1, the caller is asking us whether + * it is acceptable to use this value as a SCALAR_VALUE + * (e.g. for XADD). + * We must not allow unprivileged callers to do that +@@ -2459,70 +2622,167 @@ static int check_stack_read(struct bpf_verifier_env *env, + } + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); + } else { +- int zeros = 0; ++ u8 type; + + for (i = 0; i < size; i++) { +- if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) ++ type = stype[(slot - i) % BPF_REG_SIZE]; ++ if (type == STACK_MISC) + continue; +- if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { +- zeros++; ++ if (type == STACK_ZERO) + continue; +- } + verbose(env, "invalid read from stack off %d+%d size %d\n", + off, i, size); + return -EACCES; + } + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); +- if (value_regno >= 0) { +- if (zeros == size) { +- /* any size read into register is zero extended, +- * so the whole register == const_zero +- */ +- __mark_reg_const_zero(&state->regs[value_regno]); +- /* backtracking doesn't support STACK_ZERO yet, +- * so mark it precise here, so that later +- * backtracking can stop here. +- * Backtracking may not need this if this register +- * doesn't participate in pointer adjustment. +- * Forward propagation of precise flag is not +- * necessary either. This mark is only to stop +- * backtracking. Any register that contributed +- * to const 0 was marked precise before spill. +- */ +- state->regs[value_regno].precise = true; +- } else { +- /* have read misc data from the stack */ +- mark_reg_unknown(env, state->regs, value_regno); +- } +- state->regs[value_regno].live |= REG_LIVE_WRITTEN; +- } ++ if (dst_regno >= 0) ++ mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); + } + return 0; + } + +-static int check_stack_access(struct bpf_verifier_env *env, +- const struct bpf_reg_state *reg, +- int off, int size) ++enum stack_access_src { ++ ACCESS_DIRECT = 1, /* the access is performed by an instruction */ ++ ACCESS_HELPER = 2, /* the access is performed by a helper */ ++}; ++ ++static int check_stack_range_initialized(struct bpf_verifier_env *env, ++ int regno, int off, int access_size, ++ bool zero_size_allowed, ++ enum stack_access_src type, ++ struct bpf_call_arg_meta *meta); ++ ++static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) ++{ ++ return cur_regs(env) + regno; ++} ++ ++/* Read the stack at 'ptr_regno + off' and put the result into the register ++ * 'dst_regno'. ++ * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'), ++ * but not its variable offset. ++ * 'size' is assumed to be <= reg size and the access is assumed to be aligned. ++ * ++ * As opposed to check_stack_read_fixed_off, this function doesn't deal with ++ * filling registers (i.e. reads of spilled register cannot be detected when ++ * the offset is not fixed). We conservatively mark 'dst_regno' as containing ++ * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable ++ * offset; for a fixed offset check_stack_read_fixed_off should be used ++ * instead. ++ */ ++static int check_stack_read_var_off(struct bpf_verifier_env *env, ++ int ptr_regno, int off, int size, int dst_regno) + { +- /* Stack accesses must be at a fixed offset, so that we +- * can determine what type of data were returned. See +- * check_stack_read(). ++ /* The state of the source register. */ ++ struct bpf_reg_state *reg = reg_state(env, ptr_regno); ++ struct bpf_func_state *ptr_state = func(env, reg); ++ int err; ++ int min_off, max_off; ++ ++ /* Note that we pass a NULL meta, so raw access will not be permitted. + */ +- if (!tnum_is_const(reg->var_off)) { ++ err = check_stack_range_initialized(env, ptr_regno, off, size, ++ false, ACCESS_DIRECT, NULL); ++ if (err) ++ return err; ++ ++ min_off = reg->smin_value + off; ++ max_off = reg->smax_value + off; ++ mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); ++ return 0; ++} ++ ++/* check_stack_read dispatches to check_stack_read_fixed_off or ++ * check_stack_read_var_off. ++ * ++ * The caller must ensure that the offset falls within the allocated stack ++ * bounds. ++ * ++ * 'dst_regno' is a register which will receive the value from the stack. It ++ * can be -1, meaning that the read value is not going to a register. ++ */ ++static int check_stack_read(struct bpf_verifier_env *env, ++ int ptr_regno, int off, int size, ++ int dst_regno) ++{ ++ struct bpf_reg_state *reg = reg_state(env, ptr_regno); ++ struct bpf_func_state *state = func(env, reg); ++ int err; ++ /* Some accesses are only permitted with a static offset. */ ++ bool var_off = !tnum_is_const(reg->var_off); ++ ++ /* The offset is required to be static when reads don't go to a ++ * register, in order to not leak pointers (see ++ * check_stack_read_fixed_off). ++ */ ++ if (dst_regno < 0 && var_off) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); +- verbose(env, "variable stack access var_off=%s off=%d size=%d\n", ++ verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n", + tn_buf, off, size); + return -EACCES; + } ++ /* Variable offset is prohibited for unprivileged mode for simplicity ++ * since it requires corresponding support in Spectre masking for stack ++ * ALU. See also retrieve_ptr_limit(). ++ */ ++ if (!env->bypass_spec_v1 && var_off) { ++ char tn_buf[48]; + +- if (off >= 0 || off < -MAX_BPF_STACK) { +- verbose(env, "invalid stack off=%d size=%d\n", off, size); ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", ++ ptr_regno, tn_buf); + return -EACCES; + } + +- return 0; ++ if (!var_off) { ++ off += reg->var_off.value; ++ err = check_stack_read_fixed_off(env, state, off, size, ++ dst_regno); ++ } else { ++ /* Variable offset stack reads need more conservative handling ++ * than fixed offset ones. Note that dst_regno >= 0 on this ++ * branch. ++ */ ++ err = check_stack_read_var_off(env, ptr_regno, off, size, ++ dst_regno); ++ } ++ return err; ++} ++ ++ ++/* check_stack_write dispatches to check_stack_write_fixed_off or ++ * check_stack_write_var_off. ++ * ++ * 'ptr_regno' is the register used as a pointer into the stack. ++ * 'off' includes 'ptr_regno->off', but not its variable offset (if any). ++ * 'value_regno' is the register whose value we're writing to the stack. It can ++ * be -1, meaning that we're not writing from a register. ++ * ++ * The caller must ensure that the offset falls within the maximum stack size. ++ */ ++static int check_stack_write(struct bpf_verifier_env *env, ++ int ptr_regno, int off, int size, ++ int value_regno, int insn_idx) ++{ ++ struct bpf_reg_state *reg = reg_state(env, ptr_regno); ++ struct bpf_func_state *state = func(env, reg); ++ int err; ++ ++ if (tnum_is_const(reg->var_off)) { ++ off += reg->var_off.value; ++ err = check_stack_write_fixed_off(env, state, off, size, ++ value_regno, insn_idx); ++ } else { ++ /* Variable offset stack reads need more conservative handling ++ * than fixed offset ones. ++ */ ++ err = check_stack_write_var_off(env, state, ++ ptr_regno, off, size, ++ value_regno, insn_idx); ++ } ++ return err; + } + + static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, +@@ -2851,11 +3111,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, + return -EACCES; + } + +-static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) +-{ +- return cur_regs(env) + regno; +-} +- + static bool is_pointer_value(struct bpf_verifier_env *env, int regno) + { + return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); +@@ -2974,8 +3229,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, + break; + case PTR_TO_STACK: + pointer_desc = "stack "; +- /* The stack spill tracking logic in check_stack_write() +- * and check_stack_read() relies on stack accesses being ++ /* The stack spill tracking logic in check_stack_write_fixed_off() ++ * and check_stack_read_fixed_off() relies on stack accesses being + * aligned. + */ + strict = true; +@@ -3393,6 +3648,91 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, + return 0; + } + ++/* Check that the stack access at the given offset is within bounds. The ++ * maximum valid offset is -1. ++ * ++ * The minimum valid offset is -MAX_BPF_STACK for writes, and ++ * -state->allocated_stack for reads. ++ */ ++static int check_stack_slot_within_bounds(int off, ++ struct bpf_func_state *state, ++ enum bpf_access_type t) ++{ ++ int min_valid_off; ++ ++ if (t == BPF_WRITE) ++ min_valid_off = -MAX_BPF_STACK; ++ else ++ min_valid_off = -state->allocated_stack; ++ ++ if (off < min_valid_off || off > -1) ++ return -EACCES; ++ return 0; ++} ++ ++/* Check that the stack access at 'regno + off' falls within the maximum stack ++ * bounds. ++ * ++ * 'off' includes `regno->offset`, but not its dynamic part (if any). ++ */ ++static int check_stack_access_within_bounds( ++ struct bpf_verifier_env *env, ++ int regno, int off, int access_size, ++ enum stack_access_src src, enum bpf_access_type type) ++{ ++ struct bpf_reg_state *regs = cur_regs(env); ++ struct bpf_reg_state *reg = regs + regno; ++ struct bpf_func_state *state = func(env, reg); ++ int min_off, max_off; ++ int err; ++ char *err_extra; ++ ++ if (src == ACCESS_HELPER) ++ /* We don't know if helpers are reading or writing (or both). */ ++ err_extra = " indirect access to"; ++ else if (type == BPF_READ) ++ err_extra = " read from"; ++ else ++ err_extra = " write to"; ++ ++ if (tnum_is_const(reg->var_off)) { ++ min_off = reg->var_off.value + off; ++ if (access_size > 0) ++ max_off = min_off + access_size - 1; ++ else ++ max_off = min_off; ++ } else { ++ if (reg->smax_value >= BPF_MAX_VAR_OFF || ++ reg->smin_value <= -BPF_MAX_VAR_OFF) { ++ verbose(env, "invalid unbounded variable-offset%s stack R%d\n", ++ err_extra, regno); ++ return -EACCES; ++ } ++ min_off = reg->smin_value + off; ++ if (access_size > 0) ++ max_off = reg->smax_value + off + access_size - 1; ++ else ++ max_off = min_off; ++ } ++ ++ err = check_stack_slot_within_bounds(min_off, state, type); ++ if (!err) ++ err = check_stack_slot_within_bounds(max_off, state, type); ++ ++ if (err) { ++ if (tnum_is_const(reg->var_off)) { ++ verbose(env, "invalid%s stack R%d off=%d size=%d\n", ++ err_extra, regno, off, access_size); ++ } else { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n", ++ err_extra, regno, tn_buf, access_size); ++ } ++ } ++ return err; ++} + + /* check whether memory at (regno + off) is accessible for t = (read | write) + * if t==write, value_regno is a register which value is stored into memory +@@ -3505,8 +3845,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + } + + } else if (reg->type == PTR_TO_STACK) { +- off += reg->var_off.value; +- err = check_stack_access(env, reg, off, size); ++ /* Basic bounds checks. */ ++ err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t); + if (err) + return err; + +@@ -3515,12 +3855,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn + if (err) + return err; + +- if (t == BPF_WRITE) +- err = check_stack_write(env, state, off, size, +- value_regno, insn_idx); +- else +- err = check_stack_read(env, state, off, size, ++ if (t == BPF_READ) ++ err = check_stack_read(env, regno, off, size, + value_regno); ++ else ++ err = check_stack_write(env, regno, off, size, ++ value_regno, insn_idx); + } else if (reg_is_pkt_pointer(reg)) { + if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { + verbose(env, "cannot write into packet\n"); +@@ -3642,49 +3982,53 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins + BPF_SIZE(insn->code), BPF_WRITE, -1, true); + } + +-static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, +- int off, int access_size, +- bool zero_size_allowed) ++/* When register 'regno' is used to read the stack (either directly or through ++ * a helper function) make sure that it's within stack boundary and, depending ++ * on the access type, that all elements of the stack are initialized. ++ * ++ * 'off' includes 'regno->off', but not its dynamic part (if any). ++ * ++ * All registers that have been spilled on the stack in the slots within the ++ * read offsets are marked as read. ++ */ ++static int check_stack_range_initialized( ++ struct bpf_verifier_env *env, int regno, int off, ++ int access_size, bool zero_size_allowed, ++ enum stack_access_src type, struct bpf_call_arg_meta *meta) + { + struct bpf_reg_state *reg = reg_state(env, regno); ++ struct bpf_func_state *state = func(env, reg); ++ int err, min_off, max_off, i, j, slot, spi; ++ char *err_extra = type == ACCESS_HELPER ? " indirect" : ""; ++ enum bpf_access_type bounds_check_type; ++ /* Some accesses can write anything into the stack, others are ++ * read-only. ++ */ ++ bool clobber = false; + +- if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || +- access_size < 0 || (access_size == 0 && !zero_size_allowed)) { +- if (tnum_is_const(reg->var_off)) { +- verbose(env, "invalid stack type R%d off=%d access_size=%d\n", +- regno, off, access_size); +- } else { +- char tn_buf[48]; +- +- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); +- verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", +- regno, tn_buf, access_size); +- } ++ if (access_size == 0 && !zero_size_allowed) { ++ verbose(env, "invalid zero-sized read\n"); + return -EACCES; + } +- return 0; +-} + +-/* when register 'regno' is passed into function that will read 'access_size' +- * bytes from that pointer, make sure that it's within stack boundary +- * and all elements of stack are initialized. +- * Unlike most pointer bounds-checking functions, this one doesn't take an +- * 'off' argument, so it has to add in reg->off itself. +- */ +-static int check_stack_boundary(struct bpf_verifier_env *env, int regno, +- int access_size, bool zero_size_allowed, +- struct bpf_call_arg_meta *meta) +-{ +- struct bpf_reg_state *reg = reg_state(env, regno); +- struct bpf_func_state *state = func(env, reg); +- int err, min_off, max_off, i, j, slot, spi; ++ if (type == ACCESS_HELPER) { ++ /* The bounds checks for writes are more permissive than for ++ * reads. However, if raw_mode is not set, we'll do extra ++ * checks below. ++ */ ++ bounds_check_type = BPF_WRITE; ++ clobber = true; ++ } else { ++ bounds_check_type = BPF_READ; ++ } ++ err = check_stack_access_within_bounds(env, regno, off, access_size, ++ type, bounds_check_type); ++ if (err) ++ return err; ++ + + if (tnum_is_const(reg->var_off)) { +- min_off = max_off = reg->var_off.value + reg->off; +- err = __check_stack_boundary(env, regno, min_off, access_size, +- zero_size_allowed); +- if (err) +- return err; ++ min_off = max_off = reg->var_off.value + off; + } else { + /* Variable offset is prohibited for unprivileged mode for + * simplicity since it requires corresponding support in +@@ -3695,8 +4039,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); +- verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", +- regno, tn_buf); ++ verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n", ++ regno, err_extra, tn_buf); + return -EACCES; + } + /* Only initialized buffer on stack is allowed to be accessed +@@ -3708,28 +4052,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, + if (meta && meta->raw_mode) + meta = NULL; + +- if (reg->smax_value >= BPF_MAX_VAR_OFF || +- reg->smax_value <= -BPF_MAX_VAR_OFF) { +- verbose(env, "R%d unbounded indirect variable offset stack access\n", +- regno); +- return -EACCES; +- } +- min_off = reg->smin_value + reg->off; +- max_off = reg->smax_value + reg->off; +- err = __check_stack_boundary(env, regno, min_off, access_size, +- zero_size_allowed); +- if (err) { +- verbose(env, "R%d min value is outside of stack bound\n", +- regno); +- return err; +- } +- err = __check_stack_boundary(env, regno, max_off, access_size, +- zero_size_allowed); +- if (err) { +- verbose(env, "R%d max value is outside of stack bound\n", +- regno); +- return err; +- } ++ min_off = reg->smin_value + off; ++ max_off = reg->smax_value + off; + } + + if (meta && meta->raw_mode) { +@@ -3749,8 +4073,10 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, + if (*stype == STACK_MISC) + goto mark; + if (*stype == STACK_ZERO) { +- /* helper can write anything into the stack */ +- *stype = STACK_MISC; ++ if (clobber) { ++ /* helper can write anything into the stack */ ++ *stype = STACK_MISC; ++ } + goto mark; + } + +@@ -3761,22 +4087,24 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, + if (state->stack[spi].slot_type[0] == STACK_SPILL && + (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || + env->allow_ptr_leaks)) { +- __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); +- for (j = 0; j < BPF_REG_SIZE; j++) +- state->stack[spi].slot_type[j] = STACK_MISC; ++ if (clobber) { ++ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); ++ for (j = 0; j < BPF_REG_SIZE; j++) ++ state->stack[spi].slot_type[j] = STACK_MISC; ++ } + goto mark; + } + + err: + if (tnum_is_const(reg->var_off)) { +- verbose(env, "invalid indirect read from stack off %d+%d size %d\n", +- min_off, i - min_off, access_size); ++ verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", ++ err_extra, regno, min_off, i - min_off, access_size); + } else { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); +- verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", +- tn_buf, i - min_off, access_size); ++ verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n", ++ err_extra, regno, tn_buf, i - min_off, access_size); + } + return -EACCES; + mark: +@@ -3825,8 +4153,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, + "rdwr", + &env->prog->aux->max_rdwr_access); + case PTR_TO_STACK: +- return check_stack_boundary(env, regno, access_size, +- zero_size_allowed, meta); ++ return check_stack_range_initialized( ++ env, ++ regno, reg->off, access_size, ++ zero_size_allowed, ACCESS_HELPER, meta); + default: /* scalar_value or invalid ptr */ + /* Allow zero-byte read from NULL, regardless of pointer type */ + if (zero_size_allowed && access_size == 0 && +@@ -5519,6 +5849,41 @@ static int sanitize_err(struct bpf_verifier_env *env, + return -EACCES; + } + ++/* check that stack access falls within stack limits and that 'reg' doesn't ++ * have a variable offset. ++ * ++ * Variable offset is prohibited for unprivileged mode for simplicity since it ++ * requires corresponding support in Spectre masking for stack ALU. See also ++ * retrieve_ptr_limit(). ++ * ++ * ++ * 'off' includes 'reg->off'. ++ */ ++static int check_stack_access_for_ptr_arithmetic( ++ struct bpf_verifier_env *env, ++ int regno, ++ const struct bpf_reg_state *reg, ++ int off) ++{ ++ if (!tnum_is_const(reg->var_off)) { ++ char tn_buf[48]; ++ ++ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); ++ verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", ++ regno, tn_buf, off); ++ return -EACCES; ++ } ++ ++ if (off >= 0 || off < -MAX_BPF_STACK) { ++ verbose(env, "R%d stack pointer arithmetic goes out of range, " ++ "prohibited for !root; off=%d\n", regno, off); ++ return -EACCES; ++ } ++ ++ return 0; ++} ++ ++ + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. + * Caller should also handle BPF_MOV case separately. + * If we return -EACCES, caller may want to try again treating pointer as a +@@ -5753,10 +6118,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + "prohibited for !root\n", dst); + return -EACCES; + } else if (dst_reg->type == PTR_TO_STACK && +- check_stack_access(env, dst_reg, dst_reg->off + +- dst_reg->var_off.value, 1)) { +- verbose(env, "R%d stack pointer arithmetic goes out of range, " +- "prohibited for !root\n", dst); ++ check_stack_access_for_ptr_arithmetic( ++ env, dst, dst_reg, dst_reg->off + ++ dst_reg->var_off.value)) { + return -EACCES; + } + } +@@ -11952,6 +12316,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, + env->strict_alignment = false; + + env->allow_ptr_leaks = bpf_allow_ptr_leaks(); ++ env->allow_uninit_stack = bpf_allow_uninit_stack(); + env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access(); + env->bypass_spec_v1 = bpf_bypass_spec_v1(); + env->bypass_spec_v4 = bpf_bypass_spec_v4(); +-- +2.30.2 + diff --git a/queue-5.10/bpf-permits-pointers-on-stack-for-helper-calls.patch b/queue-5.10/bpf-permits-pointers-on-stack-for-helper-calls.patch new file mode 100644 index 00000000000..0733518d17a --- /dev/null +++ b/queue-5.10/bpf-permits-pointers-on-stack-for-helper-calls.patch @@ -0,0 +1,76 @@ +From a5b41af6f59de4d6bf5fc91a52e50e82f7fdc5b3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Dec 2020 17:33:49 -0800 +Subject: bpf: Permits pointers on stack for helper calls + +From: Yonghong Song + +[ Upstream commit cd17d38f8b28f808c368121041c0a4fa91757e0d ] + +Currently, when checking stack memory accessed by helper calls, +for spills, only PTR_TO_BTF_ID and SCALAR_VALUE are +allowed. + +Song discovered an issue where the below bpf program + int dump_task(struct bpf_iter__task *ctx) + { + struct seq_file *seq = ctx->meta->seq; + static char[] info = "abc"; + BPF_SEQ_PRINTF(seq, "%s\n", info); + return 0; + } +may cause a verifier failure. + +The verifier output looks like: + ; struct seq_file *seq = ctx->meta->seq; + 1: (79) r1 = *(u64 *)(r1 +0) + ; BPF_SEQ_PRINTF(seq, "%s\n", info); + 2: (18) r2 = 0xffff9054400f6000 + 4: (7b) *(u64 *)(r10 -8) = r2 + 5: (bf) r4 = r10 + ; + 6: (07) r4 += -8 + ; BPF_SEQ_PRINTF(seq, "%s\n", info); + 7: (18) r2 = 0xffff9054400fe000 + 9: (b4) w3 = 4 + 10: (b4) w5 = 8 + 11: (85) call bpf_seq_printf#126 + R1_w=ptr_seq_file(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=4,imm=0) + R3_w=inv4 R4_w=fp-8 R5_w=inv8 R10=fp0 fp-8_w=map_value + last_idx 11 first_idx 0 + regs=8 stack=0 before 10: (b4) w5 = 8 + regs=8 stack=0 before 9: (b4) w3 = 4 + invalid indirect read from stack off -8+0 size 8 + +Basically, the verifier complains the map_value pointer at "fp-8" location. +To fix the issue, if env->allow_ptr_leaks is true, let us also permit +pointers on the stack to be accessible by the helper. + +Reported-by: Song Liu +Suggested-by: Alexei Starovoitov +Signed-off-by: Yonghong Song +Signed-off-by: Daniel Borkmann +Acked-by: Song Liu +Link: https://lore.kernel.org/bpf/20201210013349.943719-1-yhs@fb.com +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 3370f0d476e9..2e09e691a6be 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3759,7 +3759,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, + goto mark; + + if (state->stack[spi].slot_type[0] == STACK_SPILL && +- state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { ++ (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || ++ env->allow_ptr_leaks)) { + __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); + for (j = 0; j < BPF_REG_SIZE; j++) + state->stack[spi].slot_type[j] = STACK_MISC; +-- +2.30.2 + diff --git a/queue-5.10/bpf-refactor-and-streamline-bounds-check-into-helper.patch b/queue-5.10/bpf-refactor-and-streamline-bounds-check-into-helper.patch new file mode 100644 index 00000000000..cd8b1bdf9e1 --- /dev/null +++ b/queue-5.10/bpf-refactor-and-streamline-bounds-check-into-helper.patch @@ -0,0 +1,90 @@ +From db5f3d93ca03cbc43526af87db38695f4ea66677 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Mar 2021 15:05:48 +0100 +Subject: bpf: Refactor and streamline bounds check into helper + +From: Daniel Borkmann + +[ Upstream commit 073815b756c51ba9d8384d924c5d1c03ca3d1ae4 ] + +Move the bounds check in adjust_ptr_min_max_vals() into a small helper named +sanitize_check_bounds() in order to simplify the former a bit. + +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Acked-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++++-------------- + 1 file changed, 33 insertions(+), 16 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 94923c2bdd81..1b97fd364ce2 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5883,6 +5883,37 @@ static int check_stack_access_for_ptr_arithmetic( + return 0; + } + ++static int sanitize_check_bounds(struct bpf_verifier_env *env, ++ const struct bpf_insn *insn, ++ const struct bpf_reg_state *dst_reg) ++{ ++ u32 dst = insn->dst_reg; ++ ++ /* For unprivileged we require that resulting offset must be in bounds ++ * in order to be able to sanitize access later on. ++ */ ++ if (env->bypass_spec_v1) ++ return 0; ++ ++ switch (dst_reg->type) { ++ case PTR_TO_STACK: ++ if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, ++ dst_reg->off + dst_reg->var_off.value)) ++ return -EACCES; ++ break; ++ case PTR_TO_MAP_VALUE: ++ if (check_map_access(env, dst, dst_reg->off, 1, false)) { ++ verbose(env, "R%d pointer arithmetic of map value goes out of range, " ++ "prohibited for !root\n", dst); ++ return -EACCES; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} + + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. + * Caller should also handle BPF_MOV case separately. +@@ -6108,22 +6139,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); + +- /* For unprivileged we require that resulting offset must be in bounds +- * in order to be able to sanitize access later on. +- */ +- if (!env->bypass_spec_v1) { +- if (dst_reg->type == PTR_TO_MAP_VALUE && +- check_map_access(env, dst, dst_reg->off, 1, false)) { +- verbose(env, "R%d pointer arithmetic of map value goes out of range, " +- "prohibited for !root\n", dst); +- return -EACCES; +- } else if (dst_reg->type == PTR_TO_STACK && +- check_stack_access_for_ptr_arithmetic( +- env, dst, dst_reg, dst_reg->off + +- dst_reg->var_off.value)) { +- return -EACCES; +- } +- } ++ if (sanitize_check_bounds(env, insn, dst_reg) < 0) ++ return -EACCES; + + return 0; + } +-- +2.30.2 + diff --git a/queue-5.10/bpf-tighten-speculative-pointer-arithmetic-mask.patch b/queue-5.10/bpf-tighten-speculative-pointer-arithmetic-mask.patch new file mode 100644 index 00000000000..43f1bff83d5 --- /dev/null +++ b/queue-5.10/bpf-tighten-speculative-pointer-arithmetic-mask.patch @@ -0,0 +1,201 @@ +From 3a78b31ba7c8d170c0fd8d9ed2b42a01cde42d5a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Mar 2021 10:38:26 +0100 +Subject: bpf: Tighten speculative pointer arithmetic mask + +From: Daniel Borkmann + +[ Upstream commit 7fedb63a8307dda0ec3b8969a3b233a1dd7ea8e0 ] + +This work tightens the offset mask we use for unprivileged pointer arithmetic +in order to mitigate a corner case reported by Piotr and Benedict where in +the speculative domain it is possible to advance, for example, the map value +pointer by up to value_size-1 out-of-bounds in order to leak kernel memory +via side-channel to user space. + +Before this change, the computed ptr_limit for retrieve_ptr_limit() helper +represents largest valid distance when moving pointer to the right or left +which is then fed as aux->alu_limit to generate masking instructions against +the offset register. After the change, the derived aux->alu_limit represents +the largest potential value of the offset register which we mask against which +is just a narrower subset of the former limit. + +For minimal complexity, we call sanitize_ptr_alu() from 2 observation points +in adjust_ptr_min_max_vals(), that is, before and after the simulated alu +operation. In the first step, we retieve the alu_state and alu_limit before +the operation as well as we branch-off a verifier path and push it to the +verification stack as we did before which checks the dst_reg under truncation, +in other words, when the speculative domain would attempt to move the pointer +out-of-bounds. + +In the second step, we retrieve the new alu_limit and calculate the absolute +distance between both. Moreover, we commit the alu_state and final alu_limit +via update_alu_sanitation_state() to the env's instruction aux data, and bail +out from there if there is a mismatch due to coming from different verification +paths with different states. + +Reported-by: Piotr Krysiuk +Reported-by: Benedict Schlueter +Signed-off-by: Daniel Borkmann +Reviewed-by: John Fastabend +Acked-by: Alexei Starovoitov +Tested-by: Benedict Schlueter +Signed-off-by: Sasha Levin +--- + kernel/bpf/verifier.c | 73 ++++++++++++++++++++++++++----------------- + 1 file changed, 44 insertions(+), 29 deletions(-) + +diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c +index 1b97fd364ce2..b9180509917e 100644 +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5674,7 +5674,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, + bool off_is_neg = off_reg->smin_value < 0; + bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || + (opcode == BPF_SUB && !off_is_neg); +- u32 off, max = 0, ptr_limit = 0; ++ u32 max = 0, ptr_limit = 0; + + if (!tnum_is_const(off_reg->var_off) && + (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) +@@ -5683,26 +5683,18 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, + switch (ptr_reg->type) { + case PTR_TO_STACK: + /* Offset 0 is out-of-bounds, but acceptable start for the +- * left direction, see BPF_REG_FP. ++ * left direction, see BPF_REG_FP. Also, unknown scalar ++ * offset where we would need to deal with min/max bounds is ++ * currently prohibited for unprivileged. + */ + max = MAX_BPF_STACK + mask_to_left; +- /* Indirect variable offset stack access is prohibited in +- * unprivileged mode so it's not handled here. +- */ +- off = ptr_reg->off + ptr_reg->var_off.value; +- if (mask_to_left) +- ptr_limit = MAX_BPF_STACK + off; +- else +- ptr_limit = -off - 1; ++ ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off); + break; + case PTR_TO_MAP_VALUE: + max = ptr_reg->map_ptr->value_size; +- if (mask_to_left) { +- ptr_limit = ptr_reg->umax_value + ptr_reg->off; +- } else { +- off = ptr_reg->smin_value + ptr_reg->off; +- ptr_limit = ptr_reg->map_ptr->value_size - off - 1; +- } ++ ptr_limit = (mask_to_left ? ++ ptr_reg->smin_value : ++ ptr_reg->umax_value) + ptr_reg->off; + break; + default: + return REASON_TYPE; +@@ -5757,10 +5749,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, + const struct bpf_reg_state *off_reg, +- struct bpf_reg_state *dst_reg) ++ struct bpf_reg_state *dst_reg, ++ struct bpf_insn_aux_data *tmp_aux, ++ const bool commit_window) + { ++ struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; + struct bpf_verifier_state *vstate = env->cur_state; +- struct bpf_insn_aux_data *aux = cur_aux(env); + bool off_is_neg = off_reg->smin_value < 0; + bool ptr_is_dst_reg = ptr_reg == dst_reg; + u8 opcode = BPF_OP(insn->code); +@@ -5779,18 +5773,33 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, + if (vstate->speculative) + goto do_sim; + +- alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; +- alu_state |= ptr_is_dst_reg ? +- BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; +- + err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); + if (err < 0) + return err; + ++ if (commit_window) { ++ /* In commit phase we narrow the masking window based on ++ * the observed pointer move after the simulated operation. ++ */ ++ alu_state = tmp_aux->alu_state; ++ alu_limit = abs(tmp_aux->alu_limit - alu_limit); ++ } else { ++ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; ++ alu_state |= ptr_is_dst_reg ? ++ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; ++ } ++ + err = update_alu_sanitation_state(aux, alu_state, alu_limit); + if (err < 0) + return err; + do_sim: ++ /* If we're in commit phase, we're done here given we already ++ * pushed the truncated dst_reg into the speculative verification ++ * stack. ++ */ ++ if (commit_window) ++ return 0; ++ + /* Simulate and find potential out-of-bounds access under + * speculative execution from truncation as a result of + * masking when off was not within expected range. If off +@@ -5933,6 +5942,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; + u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, + umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; ++ struct bpf_insn_aux_data tmp_aux = {}; + u8 opcode = BPF_OP(insn->code); + u32 dst = insn->dst_reg; + int ret; +@@ -5999,12 +6009,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + /* pointer types do not carry 32-bit bounds at the moment. */ + __mark_reg32_unbounded(dst_reg); + +- switch (opcode) { +- case BPF_ADD: +- ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); ++ if (sanitize_needed(opcode)) { ++ ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, ++ &tmp_aux, false); + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); ++ } + ++ switch (opcode) { ++ case BPF_ADD: + /* We can take a fixed offset as long as it doesn't overflow + * the s32 'off' field + */ +@@ -6055,10 +6068,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + } + break; + case BPF_SUB: +- ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); +- if (ret < 0) +- return sanitize_err(env, insn, ret, off_reg, dst_reg); +- + if (dst_reg == off_reg) { + /* scalar -= pointer. Creates an unknown scalar */ + verbose(env, "R%d tried to subtract pointer from scalar\n", +@@ -6141,6 +6150,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, + + if (sanitize_check_bounds(env, insn, dst_reg) < 0) + return -EACCES; ++ if (sanitize_needed(opcode)) { ++ ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, ++ &tmp_aux, true); ++ if (ret < 0) ++ return sanitize_err(env, insn, ret, off_reg, dst_reg); ++ } + + return 0; + } +-- +2.30.2 + diff --git a/queue-5.10/series b/queue-5.10/series index 74dc6e41fad..8b6c02ac2a9 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -6,3 +6,7 @@ pinctrl-lewisburg-update-number-of-pins-in-community.patch block-return-ebusy-when-there-are-open-partitions-in.patch pinctrl-core-show-pin-numbers-for-the-controllers-wi.patch arm64-dts-allwinner-revert-sd-card-cd-gpio-for-pine6.patch +bpf-permits-pointers-on-stack-for-helper-calls.patch +bpf-allow-variable-offset-stack-access.patch +bpf-refactor-and-streamline-bounds-check-into-helper.patch +bpf-tighten-speculative-pointer-arithmetic-mask.patch