From 7245b3e89e4e8ce5cc2a8888cc995a980d6bebc6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 26 Jan 2024 17:17:09 -0800 Subject: [PATCH] 6.6-stable patches added patches: bpf-correct-loop-detection-for-iterators-convergence.patch bpf-exact-states-comparison-for-iterator-convergence-checks.patch bpf-extract-__check_reg_arg-utility-function.patch bpf-extract-same_callsites-as-utility-function.patch bpf-extract-setup_func_entry-utility-function.patch bpf-keep-track-of-max-number-of-bpf_loop-callback-iterations.patch bpf-move-explored_state-closer-to-the-beginning-of-verifier.c.patch bpf-print-full-verifier-states-on-infinite-loop-detection.patch bpf-verify-callbacks-as-if-they-are-called-unknown-number-of-times.patch bpf-widening-for-callback-iterators.patch selftests-bpf-check-if-max-number-of-bpf_loop-iterations-is-tracked.patch selftests-bpf-test-if-state-loops-are-detected-in-a-tricky-case.patch selftests-bpf-test-widening-for-iterating-callbacks.patch selftests-bpf-tests-for-iterating-callbacks.patch selftests-bpf-tests-with-delayed-read-precision-makrs-in-loop-body.patch selftests-bpf-track-string-payload-offset-as-scalar-in-strobemeta.patch selftests-bpf-track-tcp-payload-offset-as-scalar-in-xdp_synproxy.patch --- ...-detection-for-iterators-convergence.patch | 339 +++++++++ ...ison-for-iterator-convergence-checks.patch | 494 +++++++++++++ ...act-__check_reg_arg-utility-function.patch | 70 ++ ...t-same_callsites-as-utility-function.patch | 63 ++ ...ct-setup_func_entry-utility-function.patch | 149 ++++ ...mber-of-bpf_loop-callback-iterations.patch | 181 +++++ ...loser-to-the-beginning-of-verifier.c.patch | 64 ++ ...er-states-on-infinite-loop-detection.patch | 34 + ...y-are-called-unknown-number-of-times.patch | 660 ++++++++++++++++++ .../bpf-widening-for-callback-iterators.patch | 83 +++ ...er-of-bpf_loop-iterations-is-tracked.patch | 103 +++ ...-loops-are-detected-in-a-tricky-case.patch | 228 ++++++ ...est-widening-for-iterating-callbacks.patch | 56 ++ ...ts-bpf-tests-for-iterating-callbacks.patch | 197 ++++++ ...ed-read-precision-makrs-in-loop-body.patch | 556 +++++++++++++++ ...yload-offset-as-scalar-in-strobemeta.patch | 252 +++++++ ...oad-offset-as-scalar-in-xdp_synproxy.patch | 179 +++++ queue-6.6/series | 17 + 18 files changed, 3725 insertions(+) create mode 100644 queue-6.6/bpf-correct-loop-detection-for-iterators-convergence.patch create mode 100644 queue-6.6/bpf-exact-states-comparison-for-iterator-convergence-checks.patch create mode 100644 queue-6.6/bpf-extract-__check_reg_arg-utility-function.patch create mode 100644 queue-6.6/bpf-extract-same_callsites-as-utility-function.patch create mode 100644 queue-6.6/bpf-extract-setup_func_entry-utility-function.patch create mode 100644 queue-6.6/bpf-keep-track-of-max-number-of-bpf_loop-callback-iterations.patch create mode 100644 queue-6.6/bpf-move-explored_state-closer-to-the-beginning-of-verifier.c.patch create mode 100644 queue-6.6/bpf-print-full-verifier-states-on-infinite-loop-detection.patch create mode 100644 queue-6.6/bpf-verify-callbacks-as-if-they-are-called-unknown-number-of-times.patch create mode 100644 queue-6.6/bpf-widening-for-callback-iterators.patch create mode 100644 queue-6.6/selftests-bpf-check-if-max-number-of-bpf_loop-iterations-is-tracked.patch create mode 100644 queue-6.6/selftests-bpf-test-if-state-loops-are-detected-in-a-tricky-case.patch create mode 100644 queue-6.6/selftests-bpf-test-widening-for-iterating-callbacks.patch create mode 100644 queue-6.6/selftests-bpf-tests-for-iterating-callbacks.patch create mode 100644 queue-6.6/selftests-bpf-tests-with-delayed-read-precision-makrs-in-loop-body.patch create mode 100644 queue-6.6/selftests-bpf-track-string-payload-offset-as-scalar-in-strobemeta.patch create mode 100644 queue-6.6/selftests-bpf-track-tcp-payload-offset-as-scalar-in-xdp_synproxy.patch diff --git a/queue-6.6/bpf-correct-loop-detection-for-iterators-convergence.patch b/queue-6.6/bpf-correct-loop-detection-for-iterators-convergence.patch new file mode 100644 index 00000000000..2caf7199bae --- /dev/null +++ b/queue-6.6/bpf-correct-loop-detection-for-iterators-convergence.patch @@ -0,0 +1,339 @@ +From 2a0992829ea3864939d917a5c7b48be6629c6217 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 24 Oct 2023 03:09:15 +0300 +Subject: bpf: correct loop detection for iterators convergence + +From: Eduard Zingerman + +commit 2a0992829ea3864939d917a5c7b48be6629c6217 upstream. + +It turns out that .branches > 0 in is_state_visited() is not a +sufficient condition to identify if two verifier states form a loop +when iterators convergence is computed. This commit adds logic to +distinguish situations like below: + + (I) initial (II) initial + | | + V V + .---------> hdr .. + | | | + | V V + | .------... .------.. + | | | | | + | V V V V + | ... ... .-> hdr .. + | | | | | | + | V V | V V + | succ <- cur | succ <- cur + | | | | + | V | V + | ... | ... + | | | | + '----' '----' + +For both (I) and (II) successor 'succ' of the current state 'cur' was +previously explored and has branches count at 0. However, loop entry +'hdr' corresponding to 'succ' might be a part of current DFS path. +If that is the case 'succ' and 'cur' are members of the same loop +and have to be compared exactly. + +Co-developed-by: Andrii Nakryiko +Co-developed-by: Alexei Starovoitov +Reviewed-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231024000917.12153-6-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 15 +++ + kernel/bpf/verifier.c | 207 ++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 218 insertions(+), 4 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -372,10 +372,25 @@ struct bpf_verifier_state { + struct bpf_active_lock active_lock; + bool speculative; + bool active_rcu_lock; ++ /* If this state was ever pointed-to by other state's loop_entry field ++ * this flag would be set to true. Used to avoid freeing such states ++ * while they are still in use. ++ */ ++ bool used_as_loop_entry; + + /* first and last insn idx of this verifier state */ + u32 first_insn_idx; + u32 last_insn_idx; ++ /* If this state is a part of states loop this field points to some ++ * parent of this state such that: ++ * - it is also a member of the same states loop; ++ * - DFS states traversal starting from initial state visits loop_entry ++ * state before this state. ++ * Used to compute topmost loop entry for state loops. ++ * State loops might appear because of open coded iterators logic. ++ * See get_loop_entry() for more information. ++ */ ++ struct bpf_verifier_state *loop_entry; + /* jmp history recorded from first to last. + * backtracking is using it to go from last to first. + * For most states jmp_history_cnt is [0-3]. +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1772,6 +1772,7 @@ static int copy_verifier_state(struct bp + dst_state->first_insn_idx = src->first_insn_idx; + dst_state->last_insn_idx = src->last_insn_idx; + dst_state->dfs_depth = src->dfs_depth; ++ dst_state->used_as_loop_entry = src->used_as_loop_entry; + for (i = 0; i <= src->curframe; i++) { + dst = dst_state->frame[i]; + if (!dst) { +@@ -1814,11 +1815,176 @@ static bool same_callsites(struct bpf_ve + return true; + } + ++/* Open coded iterators allow back-edges in the state graph in order to ++ * check unbounded loops that iterators. ++ * ++ * In is_state_visited() it is necessary to know if explored states are ++ * part of some loops in order to decide whether non-exact states ++ * comparison could be used: ++ * - non-exact states comparison establishes sub-state relation and uses ++ * read and precision marks to do so, these marks are propagated from ++ * children states and thus are not guaranteed to be final in a loop; ++ * - exact states comparison just checks if current and explored states ++ * are identical (and thus form a back-edge). ++ * ++ * Paper "A New Algorithm for Identifying Loops in Decompilation" ++ * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient ++ * algorithm for loop structure detection and gives an overview of ++ * relevant terminology. It also has helpful illustrations. ++ * ++ * [1] https://api.semanticscholar.org/CorpusID:15784067 ++ * ++ * We use a similar algorithm but because loop nested structure is ++ * irrelevant for verifier ours is significantly simpler and resembles ++ * strongly connected components algorithm from Sedgewick's textbook. ++ * ++ * Define topmost loop entry as a first node of the loop traversed in a ++ * depth first search starting from initial state. The goal of the loop ++ * tracking algorithm is to associate topmost loop entries with states ++ * derived from these entries. ++ * ++ * For each step in the DFS states traversal algorithm needs to identify ++ * the following situations: ++ * ++ * initial initial initial ++ * | | | ++ * V V V ++ * ... ... .---------> hdr ++ * | | | | ++ * V V | V ++ * cur .-> succ | .------... ++ * | | | | | | ++ * V | V | V V ++ * succ '-- cur | ... ... ++ * | | | ++ * | V V ++ * | succ <- cur ++ * | | ++ * | V ++ * | ... ++ * | | ++ * '----' ++ * ++ * (A) successor state of cur (B) successor state of cur or it's entry ++ * not yet traversed are in current DFS path, thus cur and succ ++ * are members of the same outermost loop ++ * ++ * initial initial ++ * | | ++ * V V ++ * ... ... ++ * | | ++ * V V ++ * .------... .------... ++ * | | | | ++ * V V V V ++ * .-> hdr ... ... ... ++ * | | | | | ++ * | V V V V ++ * | succ <- cur succ <- cur ++ * | | | ++ * | V V ++ * | ... ... ++ * | | | ++ * '----' exit ++ * ++ * (C) successor state of cur is a part of some loop but this loop ++ * does not include cur or successor state is not in a loop at all. ++ * ++ * Algorithm could be described as the following python code: ++ * ++ * traversed = set() # Set of traversed nodes ++ * entries = {} # Mapping from node to loop entry ++ * depths = {} # Depth level assigned to graph node ++ * path = set() # Current DFS path ++ * ++ * # Find outermost loop entry known for n ++ * def get_loop_entry(n): ++ * h = entries.get(n, None) ++ * while h in entries and entries[h] != h: ++ * h = entries[h] ++ * return h ++ * ++ * # Update n's loop entry if h's outermost entry comes ++ * # before n's outermost entry in current DFS path. ++ * def update_loop_entry(n, h): ++ * n1 = get_loop_entry(n) or n ++ * h1 = get_loop_entry(h) or h ++ * if h1 in path and depths[h1] <= depths[n1]: ++ * entries[n] = h1 ++ * ++ * def dfs(n, depth): ++ * traversed.add(n) ++ * path.add(n) ++ * depths[n] = depth ++ * for succ in G.successors(n): ++ * if succ not in traversed: ++ * # Case A: explore succ and update cur's loop entry ++ * # only if succ's entry is in current DFS path. ++ * dfs(succ, depth + 1) ++ * h = get_loop_entry(succ) ++ * update_loop_entry(n, h) ++ * else: ++ * # Case B or C depending on `h1 in path` check in update_loop_entry(). ++ * update_loop_entry(n, succ) ++ * path.remove(n) ++ * ++ * To adapt this algorithm for use with verifier: ++ * - use st->branch == 0 as a signal that DFS of succ had been finished ++ * and cur's loop entry has to be updated (case A), handle this in ++ * update_branch_counts(); ++ * - use st->branch > 0 as a signal that st is in the current DFS path; ++ * - handle cases B and C in is_state_visited(); ++ * - update topmost loop entry for intermediate states in get_loop_entry(). ++ */ ++static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st) ++{ ++ struct bpf_verifier_state *topmost = st->loop_entry, *old; ++ ++ while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) ++ topmost = topmost->loop_entry; ++ /* Update loop entries for intermediate states to avoid this ++ * traversal in future get_loop_entry() calls. ++ */ ++ while (st && st->loop_entry != topmost) { ++ old = st->loop_entry; ++ st->loop_entry = topmost; ++ st = old; ++ } ++ return topmost; ++} ++ ++static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) ++{ ++ struct bpf_verifier_state *cur1, *hdr1; ++ ++ cur1 = get_loop_entry(cur) ?: cur; ++ hdr1 = get_loop_entry(hdr) ?: hdr; ++ /* The head1->branches check decides between cases B and C in ++ * comment for get_loop_entry(). If hdr1->branches == 0 then ++ * head's topmost loop entry is not in current DFS path, ++ * hence 'cur' and 'hdr' are not in the same loop and there is ++ * no need to update cur->loop_entry. ++ */ ++ if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) { ++ cur->loop_entry = hdr; ++ hdr->used_as_loop_entry = true; ++ } ++} ++ + static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) + { + while (st) { + u32 br = --st->branches; + ++ /* br == 0 signals that DFS exploration for 'st' is finished, ++ * thus it is necessary to update parent's loop entry if it ++ * turned out that st is a part of some loop. ++ * This is a part of 'case A' in get_loop_entry() comment. ++ */ ++ if (br == 0 && st->parent && st->loop_entry) ++ update_loop_entry(st->parent, st->loop_entry); ++ + /* WARN_ON(br > 1) technically makes sense here, + * but see comment in push_stack(), hence: + */ +@@ -16262,10 +16428,11 @@ static int is_state_visited(struct bpf_v + { + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl, **pprev; +- struct bpf_verifier_state *cur = env->cur_state, *new; ++ struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; + int i, j, n, err, states_cnt = 0; + bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx); + bool add_new_state = force_new_state; ++ bool force_exact; + + /* bpf progs typically have pruning point every 4 instructions + * http://vger.kernel.org/bpfconf2019.html#session-1 +@@ -16360,8 +16527,10 @@ static int is_state_visited(struct bpf_v + */ + spi = __get_spi(iter_reg->off + iter_reg->var_off.value); + iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; +- if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) ++ if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { ++ update_loop_entry(cur, &sl->state); + goto hit; ++ } + } + goto skip_inf_loop_check; + } +@@ -16392,7 +16561,36 @@ skip_inf_loop_check: + add_new_state = false; + goto miss; + } +- if (states_equal(env, &sl->state, cur, false)) { ++ /* If sl->state is a part of a loop and this loop's entry is a part of ++ * current verification path then states have to be compared exactly. ++ * 'force_exact' is needed to catch the following case: ++ * ++ * initial Here state 'succ' was processed first, ++ * | it was eventually tracked to produce a ++ * V state identical to 'hdr'. ++ * .---------> hdr All branches from 'succ' had been explored ++ * | | and thus 'succ' has its .branches == 0. ++ * | V ++ * | .------... Suppose states 'cur' and 'succ' correspond ++ * | | | to the same instruction + callsites. ++ * | V V In such case it is necessary to check ++ * | ... ... if 'succ' and 'cur' are states_equal(). ++ * | | | If 'succ' and 'cur' are a part of the ++ * | V V same loop exact flag has to be set. ++ * | succ <- cur To check if that is the case, verify ++ * | | if loop entry of 'succ' is in current ++ * | V DFS path. ++ * | ... ++ * | | ++ * '----' ++ * ++ * Additional details are in the comment before get_loop_entry(). ++ */ ++ loop_entry = get_loop_entry(&sl->state); ++ force_exact = loop_entry && loop_entry->branches > 0; ++ if (states_equal(env, &sl->state, cur, force_exact)) { ++ if (force_exact) ++ update_loop_entry(cur, loop_entry); + hit: + sl->hit_cnt++; + /* reached equivalent register/stack state, +@@ -16441,7 +16639,8 @@ miss: + * speed up verification + */ + *pprev = sl->next; +- if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { ++ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE && ++ !sl->state.used_as_loop_entry) { + u32 br = sl->state.branches; + + WARN_ONCE(br, diff --git a/queue-6.6/bpf-exact-states-comparison-for-iterator-convergence-checks.patch b/queue-6.6/bpf-exact-states-comparison-for-iterator-convergence-checks.patch new file mode 100644 index 00000000000..7481001c459 --- /dev/null +++ b/queue-6.6/bpf-exact-states-comparison-for-iterator-convergence-checks.patch @@ -0,0 +1,494 @@ +From 2793a8b015f7f1caadb9bce9c63dc659f7522676 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 24 Oct 2023 03:09:13 +0300 +Subject: bpf: exact states comparison for iterator convergence checks + +From: Eduard Zingerman + +commit 2793a8b015f7f1caadb9bce9c63dc659f7522676 upstream. + +Convergence for open coded iterators is computed in is_state_visited() +by examining states with branches count > 1 and using states_equal(). +states_equal() computes sub-state relation using read and precision marks. +Read and precision marks are propagated from children states, +thus are not guaranteed to be complete inside a loop when branches +count > 1. This could be demonstrated using the following unsafe program: + + 1. r7 = -16 + 2. r6 = bpf_get_prandom_u32() + 3. while (bpf_iter_num_next(&fp[-8])) { + 4. if (r6 != 42) { + 5. r7 = -32 + 6. r6 = bpf_get_prandom_u32() + 7. continue + 8. } + 9. r0 = r10 + 10. r0 += r7 + 11. r8 = *(u64 *)(r0 + 0) + 12. r6 = bpf_get_prandom_u32() + 13. } + +Here verifier would first visit path 1-3, create a checkpoint at 3 +with r7=-16, continue to 4-7,3 with r7=-32. + +Because instructions at 9-12 had not been visitied yet existing +checkpoint at 3 does not have read or precision mark for r7. +Thus states_equal() would return true and verifier would discard +current state, thus unsafe memory access at 11 would not be caught. + +This commit fixes this loophole by introducing exact state comparisons +for iterator convergence logic: +- registers are compared using regs_exact() regardless of read or + precision marks; +- stack slots have to have identical type. + +Unfortunately, this is too strict even for simple programs like below: + + i = 0; + while(iter_next(&it)) + i++; + +At each iteration step i++ would produce a new distinct state and +eventually instruction processing limit would be reached. + +To avoid such behavior speculatively forget (widen) range for +imprecise scalar registers, if those registers were not precise at the +end of the previous iteration and do not match exactly. + +This a conservative heuristic that allows to verify wide range of +programs, however it precludes verification of programs that conjure +an imprecise value on the first loop iteration and use it as precise +on the second. + +Test case iter_task_vma_for_each() presents one of such cases: + + unsigned int seen = 0; + ... + bpf_for_each(task_vma, vma, task, 0) { + if (seen >= 1000) + break; + ... + seen++; + } + +Here clang generates the following code: + +: + 24: r8 = r6 ; stash current value of + ... body ... 'seen' + 29: r1 = r10 + 30: r1 += -0x8 + 31: call bpf_iter_task_vma_next + 32: r6 += 0x1 ; seen++; + 33: if r0 == 0x0 goto +0x2 ; exit on next() == NULL + 34: r7 += 0x10 + 35: if r8 < 0x3e7 goto -0xc ; loop on seen < 1000 + +: + ... exit ... + +Note that counter in r6 is copied to r8 and then incremented, +conditional jump is done using r8. Because of this precision mark for +r6 lags one state behind of precision mark on r8 and widening logic +kicks in. + +Adding barrier_var(seen) after conditional is sufficient to force +clang use the same register for both counting and conditional jump. + +This issue was discussed in the thread [1] which was started by +Andrew Werner demonstrating a similar bug +in callback functions handling. The callbacks would be addressed +in a followup patch. + +[1] https://lore.kernel.org/bpf/97a90da09404c65c8e810cf83c94ac703705dc0e.camel@gmail.com/ + +Co-developed-by: Andrii Nakryiko +Co-developed-by: Alexei Starovoitov +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231024000917.12153-4-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 218 ++++++++++++++++++++++++++++++++++++------- + 2 files changed, 188 insertions(+), 31 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -383,6 +383,7 @@ struct bpf_verifier_state { + */ + struct bpf_idx_pair *jmp_history; + u32 jmp_history_cnt; ++ u32 dfs_depth; + }; + + #define bpf_get_spilled_reg(slot, frame) \ +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1771,6 +1771,7 @@ static int copy_verifier_state(struct bp + dst_state->parent = src->parent; + dst_state->first_insn_idx = src->first_insn_idx; + dst_state->last_insn_idx = src->last_insn_idx; ++ dst_state->dfs_depth = src->dfs_depth; + for (i = 0; i <= src->curframe; i++) { + dst = dst_state->frame[i]; + if (!dst) { +@@ -7554,6 +7555,81 @@ static int process_iter_arg(struct bpf_v + return 0; + } + ++/* Look for a previous loop entry at insn_idx: nearest parent state ++ * stopped at insn_idx with callsites matching those in cur->frame. ++ */ ++static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env, ++ struct bpf_verifier_state *cur, ++ int insn_idx) ++{ ++ struct bpf_verifier_state_list *sl; ++ struct bpf_verifier_state *st; ++ ++ /* Explored states are pushed in stack order, most recent states come first */ ++ sl = *explored_state(env, insn_idx); ++ for (; sl; sl = sl->next) { ++ /* If st->branches != 0 state is a part of current DFS verification path, ++ * hence cur & st for a loop. ++ */ ++ st = &sl->state; ++ if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) && ++ st->dfs_depth < cur->dfs_depth) ++ return st; ++ } ++ ++ return NULL; ++} ++ ++static void reset_idmap_scratch(struct bpf_verifier_env *env); ++static bool regs_exact(const struct bpf_reg_state *rold, ++ const struct bpf_reg_state *rcur, ++ struct bpf_idmap *idmap); ++ ++static void maybe_widen_reg(struct bpf_verifier_env *env, ++ struct bpf_reg_state *rold, struct bpf_reg_state *rcur, ++ struct bpf_idmap *idmap) ++{ ++ if (rold->type != SCALAR_VALUE) ++ return; ++ if (rold->type != rcur->type) ++ return; ++ if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap)) ++ return; ++ __mark_reg_unknown(env, rcur); ++} ++ ++static int widen_imprecise_scalars(struct bpf_verifier_env *env, ++ struct bpf_verifier_state *old, ++ struct bpf_verifier_state *cur) ++{ ++ struct bpf_func_state *fold, *fcur; ++ int i, fr; ++ ++ reset_idmap_scratch(env); ++ for (fr = old->curframe; fr >= 0; fr--) { ++ fold = old->frame[fr]; ++ fcur = cur->frame[fr]; ++ ++ for (i = 0; i < MAX_BPF_REG; i++) ++ maybe_widen_reg(env, ++ &fold->regs[i], ++ &fcur->regs[i], ++ &env->idmap_scratch); ++ ++ for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) { ++ if (!is_spilled_reg(&fold->stack[i]) || ++ !is_spilled_reg(&fcur->stack[i])) ++ continue; ++ ++ maybe_widen_reg(env, ++ &fold->stack[i].spilled_ptr, ++ &fcur->stack[i].spilled_ptr, ++ &env->idmap_scratch); ++ } ++ } ++ return 0; ++} ++ + /* process_iter_next_call() is called when verifier gets to iterator's next + * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer + * to it as just "iter_next()" in comments below. +@@ -7595,25 +7671,47 @@ static int process_iter_arg(struct bpf_v + * is some statically known limit on number of iterations (e.g., if there is + * an explicit `if n > 100 then break;` statement somewhere in the loop). + * +- * One very subtle but very important aspect is that we *always* simulate NULL +- * condition first (as the current state) before we simulate non-NULL case. +- * This has to do with intricacies of scalar precision tracking. By simulating +- * "exit condition" of iter_next() returning NULL first, we make sure all the +- * relevant precision marks *that will be set **after** we exit iterator loop* +- * are propagated backwards to common parent state of NULL and non-NULL +- * branches. Thanks to that, state equivalence checks done later in forked +- * state, when reaching iter_next() for ACTIVE iterator, can assume that +- * precision marks are finalized and won't change. Because simulating another +- * ACTIVE iterator iteration won't change them (because given same input +- * states we'll end up with exactly same output states which we are currently +- * comparing; and verification after the loop already propagated back what +- * needs to be **additionally** tracked as precise). It's subtle, grok +- * precision tracking for more intuitive understanding. ++ * Iteration convergence logic in is_state_visited() relies on exact ++ * states comparison, which ignores read and precision marks. ++ * This is necessary because read and precision marks are not finalized ++ * while in the loop. Exact comparison might preclude convergence for ++ * simple programs like below: ++ * ++ * i = 0; ++ * while(iter_next(&it)) ++ * i++; ++ * ++ * At each iteration step i++ would produce a new distinct state and ++ * eventually instruction processing limit would be reached. ++ * ++ * To avoid such behavior speculatively forget (widen) range for ++ * imprecise scalar registers, if those registers were not precise at the ++ * end of the previous iteration and do not match exactly. ++ * ++ * This is a conservative heuristic that allows to verify wide range of programs, ++ * however it precludes verification of programs that conjure an ++ * imprecise value on the first loop iteration and use it as precise on a second. ++ * For example, the following safe program would fail to verify: ++ * ++ * struct bpf_num_iter it; ++ * int arr[10]; ++ * int i = 0, a = 0; ++ * bpf_iter_num_new(&it, 0, 10); ++ * while (bpf_iter_num_next(&it)) { ++ * if (a == 0) { ++ * a = 1; ++ * i = 7; // Because i changed verifier would forget ++ * // it's range on second loop entry. ++ * } else { ++ * arr[i] = 42; // This would fail to verify. ++ * } ++ * } ++ * bpf_iter_num_destroy(&it); + */ + static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, + struct bpf_kfunc_call_arg_meta *meta) + { +- struct bpf_verifier_state *cur_st = env->cur_state, *queued_st; ++ struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; + struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; + struct bpf_reg_state *cur_iter, *queued_iter; + int iter_frameno = meta->iter.frameno; +@@ -7631,6 +7729,19 @@ static int process_iter_next_call(struct + } + + if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) { ++ /* Because iter_next() call is a checkpoint is_state_visitied() ++ * should guarantee parent state with same call sites and insn_idx. ++ */ ++ if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx || ++ !same_callsites(cur_st->parent, cur_st)) { ++ verbose(env, "bug: bad parent state for iter next call"); ++ return -EFAULT; ++ } ++ /* Note cur_st->parent in the call below, it is necessary to skip ++ * checkpoint created for cur_st by is_state_visited() ++ * right at this instruction. ++ */ ++ prev_st = find_prev_entry(env, cur_st->parent, insn_idx); + /* branch out active iter state */ + queued_st = push_stack(env, insn_idx + 1, insn_idx, false); + if (!queued_st) +@@ -7639,6 +7750,8 @@ static int process_iter_next_call(struct + queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; + queued_iter->iter.depth++; ++ if (prev_st) ++ widen_imprecise_scalars(env, prev_st, queued_st); + + queued_fr = queued_st->frame[queued_st->curframe]; + mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]); +@@ -15560,8 +15673,11 @@ static bool regs_exact(const struct bpf_ + + /* Returns true if (rold safe implies rcur safe) */ + static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, +- struct bpf_reg_state *rcur, struct bpf_idmap *idmap) ++ struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact) + { ++ if (exact) ++ return regs_exact(rold, rcur, idmap); ++ + if (!(rold->live & REG_LIVE_READ)) + /* explored state didn't use this */ + return true; +@@ -15678,7 +15794,7 @@ static bool regsafe(struct bpf_verifier_ + } + + static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, +- struct bpf_func_state *cur, struct bpf_idmap *idmap) ++ struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact) + { + int i, spi; + +@@ -15691,7 +15807,12 @@ static bool stacksafe(struct bpf_verifie + + spi = i / BPF_REG_SIZE; + +- if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { ++ if (exact && ++ old->stack[spi].slot_type[i % BPF_REG_SIZE] != ++ cur->stack[spi].slot_type[i % BPF_REG_SIZE]) ++ return false; ++ ++ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) { + i += BPF_REG_SIZE - 1; + /* explored state didn't use this */ + continue; +@@ -15741,7 +15862,7 @@ static bool stacksafe(struct bpf_verifie + * return false to continue verification of this path + */ + if (!regsafe(env, &old->stack[spi].spilled_ptr, +- &cur->stack[spi].spilled_ptr, idmap)) ++ &cur->stack[spi].spilled_ptr, idmap, exact)) + return false; + break; + case STACK_DYNPTR: +@@ -15823,16 +15944,16 @@ static bool refsafe(struct bpf_func_stat + * the current state will reach 'bpf_exit' instruction safely + */ + static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, +- struct bpf_func_state *cur) ++ struct bpf_func_state *cur, bool exact) + { + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + if (!regsafe(env, &old->regs[i], &cur->regs[i], +- &env->idmap_scratch)) ++ &env->idmap_scratch, exact)) + return false; + +- if (!stacksafe(env, old, cur, &env->idmap_scratch)) ++ if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) + return false; + + if (!refsafe(old, cur, &env->idmap_scratch)) +@@ -15841,17 +15962,23 @@ static bool func_states_equal(struct bpf + return true; + } + ++static void reset_idmap_scratch(struct bpf_verifier_env *env) ++{ ++ env->idmap_scratch.tmp_id_gen = env->id_gen; ++ memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); ++} ++ + static bool states_equal(struct bpf_verifier_env *env, + struct bpf_verifier_state *old, +- struct bpf_verifier_state *cur) ++ struct bpf_verifier_state *cur, ++ bool exact) + { + int i; + + if (old->curframe != cur->curframe) + return false; + +- env->idmap_scratch.tmp_id_gen = env->id_gen; +- memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); ++ reset_idmap_scratch(env); + + /* Verification state from speculative execution simulation + * must never prune a non-speculative execution one. +@@ -15881,7 +16008,7 @@ static bool states_equal(struct bpf_veri + for (i = 0; i <= old->curframe; i++) { + if (old->frame[i]->callsite != cur->frame[i]->callsite) + return false; +- if (!func_states_equal(env, old->frame[i], cur->frame[i])) ++ if (!func_states_equal(env, old->frame[i], cur->frame[i], exact)) + return false; + } + return true; +@@ -16136,7 +16263,7 @@ static int is_state_visited(struct bpf_v + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl, **pprev; + struct bpf_verifier_state *cur = env->cur_state, *new; +- int i, j, err, states_cnt = 0; ++ int i, j, n, err, states_cnt = 0; + bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx); + bool add_new_state = force_new_state; + +@@ -16191,9 +16318,33 @@ static int is_state_visited(struct bpf_v + * It's safe to assume that iterator loop will finish, taking into + * account iter_next() contract of eventually returning + * sticky NULL result. ++ * ++ * Note, that states have to be compared exactly in this case because ++ * read and precision marks might not be finalized inside the loop. ++ * E.g. as in the program below: ++ * ++ * 1. r7 = -16 ++ * 2. r6 = bpf_get_prandom_u32() ++ * 3. while (bpf_iter_num_next(&fp[-8])) { ++ * 4. if (r6 != 42) { ++ * 5. r7 = -32 ++ * 6. r6 = bpf_get_prandom_u32() ++ * 7. continue ++ * 8. } ++ * 9. r0 = r10 ++ * 10. r0 += r7 ++ * 11. r8 = *(u64 *)(r0 + 0) ++ * 12. r6 = bpf_get_prandom_u32() ++ * 13. } ++ * ++ * Here verifier would first visit path 1-3, create a checkpoint at 3 ++ * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does ++ * not have read or precision mark for r7 yet, thus inexact states ++ * comparison would discard current state with r7=-32 ++ * => unsafe memory access at 11 would not be caught. + */ + if (is_iter_next_insn(env, insn_idx)) { +- if (states_equal(env, &sl->state, cur)) { ++ if (states_equal(env, &sl->state, cur, true)) { + struct bpf_func_state *cur_frame; + struct bpf_reg_state *iter_state, *iter_reg; + int spi; +@@ -16216,7 +16367,7 @@ static int is_state_visited(struct bpf_v + } + /* attempt to detect infinite loop to avoid unnecessary doomed work */ + if (states_maybe_looping(&sl->state, cur) && +- states_equal(env, &sl->state, cur) && ++ states_equal(env, &sl->state, cur, false) && + !iter_active_depths_differ(&sl->state, cur)) { + verbose_linfo(env, insn_idx, "; "); + verbose(env, "infinite loop detected at insn %d\n", insn_idx); +@@ -16241,7 +16392,7 @@ skip_inf_loop_check: + add_new_state = false; + goto miss; + } +- if (states_equal(env, &sl->state, cur)) { ++ if (states_equal(env, &sl->state, cur, false)) { + hit: + sl->hit_cnt++; + /* reached equivalent register/stack state, +@@ -16280,8 +16431,12 @@ miss: + * to keep checking from state equivalence point of view. + * Higher numbers increase max_states_per_insn and verification time, + * but do not meaningfully decrease insn_processed. ++ * 'n' controls how many times state could miss before eviction. ++ * Use bigger 'n' for checkpoints because evicting checkpoint states ++ * too early would hinder iterator convergence. + */ +- if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { ++ n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3; ++ if (sl->miss_cnt > sl->hit_cnt * n + n) { + /* the state is unlikely to be useful. Remove it to + * speed up verification + */ +@@ -16355,6 +16510,7 @@ next: + + cur->parent = new; + cur->first_insn_idx = insn_idx; ++ cur->dfs_depth = new->dfs_depth + 1; + clear_jmp_history(cur); + new_sl->next = *explored_state(env, insn_idx); + *explored_state(env, insn_idx) = new_sl; diff --git a/queue-6.6/bpf-extract-__check_reg_arg-utility-function.patch b/queue-6.6/bpf-extract-__check_reg_arg-utility-function.patch new file mode 100644 index 00000000000..7950158d00b --- /dev/null +++ b/queue-6.6/bpf-extract-__check_reg_arg-utility-function.patch @@ -0,0 +1,70 @@ +From 683b96f9606ab7308ffb23c46ab43cecdef8a241 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:54 +0200 +Subject: bpf: extract __check_reg_arg() utility function + +From: Eduard Zingerman + +commit 683b96f9606ab7308ffb23c46ab43cecdef8a241 upstream. + +Split check_reg_arg() into two utility functions: +- check_reg_arg() operating on registers from current verifier state; +- __check_reg_arg() operating on a specific set of registers passed as + a parameter; + +The __check_reg_arg() function would be used by a follow-up change for +callbacks handling. + +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-5-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -3321,13 +3321,11 @@ static void mark_insn_zext(struct bpf_ve + reg->subreg_def = DEF_NOT_SUBREG; + } + +-static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, +- enum reg_arg_type t) ++static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, ++ enum reg_arg_type t) + { +- struct bpf_verifier_state *vstate = env->cur_state; +- struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_insn *insn = env->prog->insnsi + env->insn_idx; +- struct bpf_reg_state *reg, *regs = state->regs; ++ struct bpf_reg_state *reg; + bool rw64; + + if (regno >= MAX_BPF_REG) { +@@ -3368,6 +3366,15 @@ static int check_reg_arg(struct bpf_veri + return 0; + } + ++static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, ++ enum reg_arg_type t) ++{ ++ struct bpf_verifier_state *vstate = env->cur_state; ++ struct bpf_func_state *state = vstate->frame[vstate->curframe]; ++ ++ return __check_reg_arg(env, state->regs, regno, t); ++} ++ + static void mark_jmp_point(struct bpf_verifier_env *env, int idx) + { + env->insn_aux_data[idx].jmp_point = true; +@@ -9147,7 +9154,7 @@ static void clear_caller_saved_regs(stru + /* after the call registers r0 - r5 were scratched */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { + mark_reg_not_init(env, regs, caller_saved[i]); +- check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); ++ __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK); + } + } + diff --git a/queue-6.6/bpf-extract-same_callsites-as-utility-function.patch b/queue-6.6/bpf-extract-same_callsites-as-utility-function.patch new file mode 100644 index 00000000000..5a391e8bc5a --- /dev/null +++ b/queue-6.6/bpf-extract-same_callsites-as-utility-function.patch @@ -0,0 +1,63 @@ +From 4c97259abc9bc8df7712f76f58ce385581876857 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 24 Oct 2023 03:09:12 +0300 +Subject: bpf: extract same_callsites() as utility function + +From: Eduard Zingerman + +commit 4c97259abc9bc8df7712f76f58ce385581876857 upstream. + +Extract same_callsites() from clean_live_states() as a utility function. +This function would be used by the next patch in the set. + +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231024000917.12153-3-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1799,6 +1799,20 @@ static struct bpf_verifier_state_list ** + return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; + } + ++static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b) ++{ ++ int fr; ++ ++ if (a->curframe != b->curframe) ++ return false; ++ ++ for (fr = a->curframe; fr >= 0; fr--) ++ if (a->frame[fr]->callsite != b->frame[fr]->callsite) ++ return false; ++ ++ return true; ++} ++ + static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) + { + while (st) { +@@ -15521,18 +15535,14 @@ static void clean_live_states(struct bpf + struct bpf_verifier_state *cur) + { + struct bpf_verifier_state_list *sl; +- int i; + + sl = *explored_state(env, insn); + while (sl) { + if (sl->state.branches) + goto next; + if (sl->state.insn_idx != insn || +- sl->state.curframe != cur->curframe) ++ !same_callsites(&sl->state, cur)) + goto next; +- for (i = 0; i <= cur->curframe; i++) +- if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) +- goto next; + clean_verifier_state(env, &sl->state); + next: + sl = sl->next; diff --git a/queue-6.6/bpf-extract-setup_func_entry-utility-function.patch b/queue-6.6/bpf-extract-setup_func_entry-utility-function.patch new file mode 100644 index 00000000000..739eafec2bf --- /dev/null +++ b/queue-6.6/bpf-extract-setup_func_entry-utility-function.patch @@ -0,0 +1,149 @@ +From 58124a98cb8eda69d248d7f1de954c8b2767c945 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:55 +0200 +Subject: bpf: extract setup_func_entry() utility function + +From: Eduard Zingerman + +commit 58124a98cb8eda69d248d7f1de954c8b2767c945 upstream. + +Move code for simulated stack frame creation to a separate utility +function. This function would be used in the follow-up change for +callbacks handling. + +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-6-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++++++---------------------- + 1 file changed, 48 insertions(+), 36 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -9167,11 +9167,10 @@ static int set_callee_state(struct bpf_v + struct bpf_func_state *caller, + struct bpf_func_state *callee, int insn_idx); + +-static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, +- int *insn_idx, int subprog, +- set_callee_state_fn set_callee_state_cb) ++static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite, ++ set_callee_state_fn set_callee_state_cb, ++ struct bpf_verifier_state *state) + { +- struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *caller, *callee; + int err; + +@@ -9181,13 +9180,53 @@ static int __check_func_call(struct bpf_ + return -E2BIG; + } + +- caller = state->frame[state->curframe]; + if (state->frame[state->curframe + 1]) { + verbose(env, "verifier bug. Frame %d already allocated\n", + state->curframe + 1); + return -EFAULT; + } + ++ caller = state->frame[state->curframe]; ++ callee = kzalloc(sizeof(*callee), GFP_KERNEL); ++ if (!callee) ++ return -ENOMEM; ++ state->frame[state->curframe + 1] = callee; ++ ++ /* callee cannot access r0, r6 - r9 for reading and has to write ++ * into its own stack before reading from it. ++ * callee can read/write into caller's stack ++ */ ++ init_func_state(env, callee, ++ /* remember the callsite, it will be used by bpf_exit */ ++ callsite, ++ state->curframe + 1 /* frameno within this callchain */, ++ subprog /* subprog number within this prog */); ++ /* Transfer references to the callee */ ++ err = copy_reference_state(callee, caller); ++ err = err ?: set_callee_state_cb(env, caller, callee, callsite); ++ if (err) ++ goto err_out; ++ ++ /* only increment it after check_reg_arg() finished */ ++ state->curframe++; ++ ++ return 0; ++ ++err_out: ++ free_func_state(callee); ++ state->frame[state->curframe + 1] = NULL; ++ return err; ++} ++ ++static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ++ int *insn_idx, int subprog, ++ set_callee_state_fn set_callee_state_cb) ++{ ++ struct bpf_verifier_state *state = env->cur_state; ++ struct bpf_func_state *caller, *callee; ++ int err; ++ ++ caller = state->frame[state->curframe]; + err = btf_check_subprog_call(env, subprog, caller->regs); + if (err == -EFAULT) + return err; +@@ -9256,35 +9295,12 @@ static int __check_func_call(struct bpf_ + return 0; + } + +- callee = kzalloc(sizeof(*callee), GFP_KERNEL); +- if (!callee) +- return -ENOMEM; +- state->frame[state->curframe + 1] = callee; +- +- /* callee cannot access r0, r6 - r9 for reading and has to write +- * into its own stack before reading from it. +- * callee can read/write into caller's stack +- */ +- init_func_state(env, callee, +- /* remember the callsite, it will be used by bpf_exit */ +- *insn_idx /* callsite */, +- state->curframe + 1 /* frameno within this callchain */, +- subprog /* subprog number within this prog */); +- +- /* Transfer references to the callee */ +- err = copy_reference_state(callee, caller); ++ err = setup_func_entry(env, subprog, *insn_idx, set_callee_state_cb, state); + if (err) +- goto err_out; +- +- err = set_callee_state_cb(env, caller, callee, *insn_idx); +- if (err) +- goto err_out; ++ return err; + + clear_caller_saved_regs(env, caller->regs); + +- /* only increment it after check_reg_arg() finished */ +- state->curframe++; +- + /* and go analyze first insn of the callee */ + *insn_idx = env->subprog_info[subprog].start - 1; + +@@ -9292,14 +9308,10 @@ static int __check_func_call(struct bpf_ + verbose(env, "caller:\n"); + print_verifier_state(env, caller, true); + verbose(env, "callee:\n"); +- print_verifier_state(env, callee, true); ++ print_verifier_state(env, state->frame[state->curframe], true); + } +- return 0; + +-err_out: +- free_func_state(callee); +- state->frame[state->curframe + 1] = NULL; +- return err; ++ return 0; + } + + int map_set_for_each_callback_args(struct bpf_verifier_env *env, diff --git a/queue-6.6/bpf-keep-track-of-max-number-of-bpf_loop-callback-iterations.patch b/queue-6.6/bpf-keep-track-of-max-number-of-bpf_loop-callback-iterations.patch new file mode 100644 index 00000000000..492ce4331c7 --- /dev/null +++ b/queue-6.6/bpf-keep-track-of-max-number-of-bpf_loop-callback-iterations.patch @@ -0,0 +1,181 @@ +From bb124da69c47dd98d69361ec13244ece50bec63e Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:07:00 +0200 +Subject: bpf: keep track of max number of bpf_loop callback iterations + +From: Eduard Zingerman + +commit bb124da69c47dd98d69361ec13244ece50bec63e upstream. + +In some cases verifier can't infer convergence of the bpf_loop() +iteration. E.g. for the following program: + + static int cb(__u32 idx, struct num_context* ctx) + { + ctx->i++; + return 0; + } + + SEC("?raw_tp") + int prog(void *_) + { + struct num_context ctx = { .i = 0 }; + __u8 choice_arr[2] = { 0, 1 }; + + bpf_loop(2, cb, &ctx, 0); + return choice_arr[ctx.i]; + } + +Each 'cb' simulation would eventually return to 'prog' and reach +'return choice_arr[ctx.i]' statement. At which point ctx.i would be +marked precise, thus forcing verifier to track multitude of separate +states with {.i=0}, {.i=1}, ... at bpf_loop() callback entry. + +This commit allows "brute force" handling for such cases by limiting +number of callback body simulations using 'umax' value of the first +bpf_loop() parameter. + +For this, extend bpf_func_state with 'callback_depth' field. +Increment this field when callback visiting state is pushed to states +traversal stack. For frame #N it's 'callback_depth' field counts how +many times callback with frame depth N+1 had been executed. +Use bpf_func_state specifically to allow independent tracking of +callback depths when multiple nested bpf_loop() calls are present. + +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-11-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 11 +++ + kernel/bpf/verifier.c | 19 ++++- + tools/testing/selftests/bpf/progs/verifier_subprog_precision.c | 35 +++++++--- + 3 files changed, 53 insertions(+), 12 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -300,6 +300,17 @@ struct bpf_func_state { + bool in_callback_fn; + struct tnum callback_ret_range; + bool in_async_callback_fn; ++ /* For callback calling functions that limit number of possible ++ * callback executions (e.g. bpf_loop) keeps track of current ++ * simulated iteration number. ++ * Value in frame N refers to number of times callback with frame ++ * N+1 was simulated, e.g. for the following call: ++ * ++ * bpf_loop(..., fn, ...); | suppose current frame is N ++ * | fn would be simulated in frame N+1 ++ * | number of simulations is tracked in frame N ++ */ ++ u32 callback_depth; + + /* The following fields should be last. See copy_func_state() */ + int acquired_refs; +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -9301,6 +9301,8 @@ static int push_callback_call(struct bpf + return err; + + callback_state->callback_unroll_depth++; ++ callback_state->frame[callback_state->curframe - 1]->callback_depth++; ++ caller->callback_depth = 0; + return 0; + } + +@@ -10090,8 +10092,21 @@ static int check_helper_call(struct bpf_ + break; + case BPF_FUNC_loop: + update_loop_inline_state(env, meta.subprogno); +- err = push_callback_call(env, insn, insn_idx, meta.subprogno, +- set_loop_callback_state); ++ /* Verifier relies on R1 value to determine if bpf_loop() iteration ++ * is finished, thus mark it precise. ++ */ ++ err = mark_chain_precision(env, BPF_REG_1); ++ if (err) ++ return err; ++ if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) { ++ err = push_callback_call(env, insn, insn_idx, meta.subprogno, ++ set_loop_callback_state); ++ } else { ++ cur_func(env)->callback_depth = 0; ++ if (env->log.level & BPF_LOG_LEVEL2) ++ verbose(env, "frame%d bpf_loop iteration limit reached\n", ++ env->cur_state->curframe); ++ } + break; + case BPF_FUNC_dynptr_from_mem: + if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { +--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c ++++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +@@ -119,7 +119,23 @@ __naked int global_subprog_result_precis + + SEC("?raw_tp") + __success __log_level(2) +-/* First simulated path does not include callback body */ ++/* First simulated path does not include callback body, ++ * r1 and r4 are always precise for bpf_loop() calls. ++ */ ++__msg("9: (85) call bpf_loop#181") ++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") ++__msg("mark_precise: frame0: parent state regs=r4 stack=:") ++__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") ++__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0") ++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") ++__msg("mark_precise: frame0: parent state regs=r1 stack=:") ++__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") ++__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0") ++__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0") ++__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8") ++__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6") ++__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") ++/* r6 precision propagation */ + __msg("14: (0f) r1 += r6") + __msg("mark_precise: frame0: last_idx 14 first_idx 9") + __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") +@@ -134,10 +150,9 @@ __msg("17: (b7) r0 = 0") + __msg("18: (95) exit") + __msg("returning from callee:") + __msg("to caller at 9:") +-/* r4 (flags) is always precise for bpf_loop() */ +-__msg("frame 0: propagating r4") ++__msg("frame 0: propagating r1,r4") + __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +-__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit") ++__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit") + __msg("from 18 to 9: safe") + __naked int callback_result_precise(void) + { +@@ -264,12 +279,12 @@ __msg("15: (b7) r0 = 0") + __msg("16: (95) exit") + __msg("returning from callee:") + __msg("to caller at 9:") +-/* r4 (flags) is always precise for bpf_loop(), ++/* r1, r4 are always precise for bpf_loop(), + * r6 was marked before backtracking to callback body. + */ +-__msg("frame 0: propagating r4,r6") ++__msg("frame 0: propagating r1,r4,r6") + __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +-__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit") ++__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit") + __msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") + __msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") + __msg("mark_precise: frame0: parent state regs= stack=:") +@@ -422,12 +437,12 @@ __msg("17: (b7) r0 = 0") + __msg("18: (95) exit") + __msg("returning from callee:") + __msg("to caller at 10:") +-/* r4 (flags) is always precise for bpf_loop(), ++/* r1, r4 are always precise for bpf_loop(), + * fp-8 was marked before backtracking to callback body. + */ +-__msg("frame 0: propagating r4,fp-8") ++__msg("frame 0: propagating r1,r4,fp-8") + __msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") +-__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit") ++__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit") + __msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") + __msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") + __msg("mark_precise: frame0: parent state regs= stack=:") diff --git a/queue-6.6/bpf-move-explored_state-closer-to-the-beginning-of-verifier.c.patch b/queue-6.6/bpf-move-explored_state-closer-to-the-beginning-of-verifier.c.patch new file mode 100644 index 00000000000..a18642ae59e --- /dev/null +++ b/queue-6.6/bpf-move-explored_state-closer-to-the-beginning-of-verifier.c.patch @@ -0,0 +1,64 @@ +From 3c4e420cb6536026ddd50eaaff5f30e4f144200d Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 24 Oct 2023 03:09:11 +0300 +Subject: bpf: move explored_state() closer to the beginning of verifier.c + +From: Eduard Zingerman + +commit 3c4e420cb6536026ddd50eaaff5f30e4f144200d upstream. + +Subsequent patches would make use of explored_state() function. +Move it up to avoid adding unnecessary prototype. + +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231024000917.12153-2-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 28 +++++++++++++--------------- + 1 file changed, 13 insertions(+), 15 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -1786,6 +1786,19 @@ static int copy_verifier_state(struct bp + return 0; + } + ++static u32 state_htab_size(struct bpf_verifier_env *env) ++{ ++ return env->prog->len; ++} ++ ++static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx) ++{ ++ struct bpf_verifier_state *cur = env->cur_state; ++ struct bpf_func_state *state = cur->frame[cur->curframe]; ++ ++ return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; ++} ++ + static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) + { + while (st) { +@@ -14702,21 +14715,6 @@ enum { + BRANCH = 2, + }; + +-static u32 state_htab_size(struct bpf_verifier_env *env) +-{ +- return env->prog->len; +-} +- +-static struct bpf_verifier_state_list **explored_state( +- struct bpf_verifier_env *env, +- int idx) +-{ +- struct bpf_verifier_state *cur = env->cur_state; +- struct bpf_func_state *state = cur->frame[cur->curframe]; +- +- return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)]; +-} +- + static void mark_prune_point(struct bpf_verifier_env *env, int idx) + { + env->insn_aux_data[idx].prune_point = true; diff --git a/queue-6.6/bpf-print-full-verifier-states-on-infinite-loop-detection.patch b/queue-6.6/bpf-print-full-verifier-states-on-infinite-loop-detection.patch new file mode 100644 index 00000000000..b5d831a77e8 --- /dev/null +++ b/queue-6.6/bpf-print-full-verifier-states-on-infinite-loop-detection.patch @@ -0,0 +1,34 @@ +From b4d8239534fddc036abe4a0fdbf474d9894d4641 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 24 Oct 2023 03:09:17 +0300 +Subject: bpf: print full verifier states on infinite loop detection + +From: Eduard Zingerman + +commit b4d8239534fddc036abe4a0fdbf474d9894d4641 upstream. + +Additional logging in is_state_visited(): if infinite loop is detected +print full verifier state for both current and equivalent states. + +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231024000917.12153-8-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -16540,6 +16540,10 @@ static int is_state_visited(struct bpf_v + !iter_active_depths_differ(&sl->state, cur)) { + verbose_linfo(env, insn_idx, "; "); + verbose(env, "infinite loop detected at insn %d\n", insn_idx); ++ verbose(env, "cur state:"); ++ print_verifier_state(env, cur->frame[cur->curframe], true); ++ verbose(env, "old state:"); ++ print_verifier_state(env, sl->state.frame[cur->curframe], true); + return -EINVAL; + } + /* if the verifier is processing a loop, avoid adding new state diff --git a/queue-6.6/bpf-verify-callbacks-as-if-they-are-called-unknown-number-of-times.patch b/queue-6.6/bpf-verify-callbacks-as-if-they-are-called-unknown-number-of-times.patch new file mode 100644 index 00000000000..5405ba988fc --- /dev/null +++ b/queue-6.6/bpf-verify-callbacks-as-if-they-are-called-unknown-number-of-times.patch @@ -0,0 +1,660 @@ +From ab5cfac139ab8576fb54630d4cca23c3e690ee90 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:56 +0200 +Subject: bpf: verify callbacks as if they are called unknown number of times + +From: Eduard Zingerman + +commit ab5cfac139ab8576fb54630d4cca23c3e690ee90 upstream. + +Prior to this patch callbacks were handled as regular function calls, +execution of callback body was modeled exactly once. +This patch updates callbacks handling logic as follows: +- introduces a function push_callback_call() that schedules callback + body verification in env->head stack; +- updates prepare_func_exit() to reschedule callback body verification + upon BPF_EXIT; +- as calls to bpf_*_iter_next(), calls to callback invoking functions + are marked as checkpoints; +- is_state_visited() is updated to stop callback based iteration when + some identical parent state is found. + +Paths with callback function invoked zero times are now verified first, +which leads to necessity to modify some selftests: +- the following negative tests required adding release/unlock/drop + calls to avoid previously masked unrelated error reports: + - cb_refs.c:underflow_prog + - exceptions_fail.c:reject_rbtree_add_throw + - exceptions_fail.c:reject_with_cp_reference +- the following precision tracking selftests needed change in expected + log trace: + - verifier_subprog_precision.c:callback_result_precise + (note: r0 precision is no longer propagated inside callback and + I think this is a correct behavior) + - verifier_subprog_precision.c:parent_callee_saved_reg_precise_with_callback + - verifier_subprog_precision.c:parent_stack_slot_precise_with_callback + +Reported-by: Andrew Werner +Closes: https://lore.kernel.org/bpf/CA+vRuzPChFNXmouzGG+wsy=6eMcfr1mFG0F3g7rbg-sedGKW3w@mail.gmail.com/ +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-7-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/bpf_verifier.h | 5 + kernel/bpf/verifier.c | 272 ++++++---- + tools/testing/selftests/bpf/progs/cb_refs.c | 1 + tools/testing/selftests/bpf/progs/verifier_subprog_precision.c | 71 ++ + 4 files changed, 237 insertions(+), 112 deletions(-) + +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -399,6 +399,7 @@ struct bpf_verifier_state { + struct bpf_idx_pair *jmp_history; + u32 jmp_history_cnt; + u32 dfs_depth; ++ u32 callback_unroll_depth; + }; + + #define bpf_get_spilled_reg(slot, frame) \ +@@ -506,6 +507,10 @@ struct bpf_insn_aux_data { + * this instruction, regardless of any heuristics + */ + bool force_checkpoint; ++ /* true if instruction is a call to a helper function that ++ * accepts callback function as a parameter. ++ */ ++ bool calls_callback; + }; + + #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -542,12 +542,11 @@ static bool is_dynptr_ref_function(enum + return func_id == BPF_FUNC_dynptr_data; + } + +-static bool is_callback_calling_kfunc(u32 btf_id); ++static bool is_sync_callback_calling_kfunc(u32 btf_id); + +-static bool is_callback_calling_function(enum bpf_func_id func_id) ++static bool is_sync_callback_calling_function(enum bpf_func_id func_id) + { + return func_id == BPF_FUNC_for_each_map_elem || +- func_id == BPF_FUNC_timer_set_callback || + func_id == BPF_FUNC_find_vma || + func_id == BPF_FUNC_loop || + func_id == BPF_FUNC_user_ringbuf_drain; +@@ -558,6 +557,18 @@ static bool is_async_callback_calling_fu + return func_id == BPF_FUNC_timer_set_callback; + } + ++static bool is_callback_calling_function(enum bpf_func_id func_id) ++{ ++ return is_sync_callback_calling_function(func_id) || ++ is_async_callback_calling_function(func_id); ++} ++ ++static bool is_sync_callback_calling_insn(struct bpf_insn *insn) ++{ ++ return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) || ++ (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); ++} ++ + static bool is_storage_get_function(enum bpf_func_id func_id) + { + return func_id == BPF_FUNC_sk_storage_get || +@@ -1772,6 +1783,7 @@ static int copy_verifier_state(struct bp + dst_state->first_insn_idx = src->first_insn_idx; + dst_state->last_insn_idx = src->last_insn_idx; + dst_state->dfs_depth = src->dfs_depth; ++ dst_state->callback_unroll_depth = src->callback_unroll_depth; + dst_state->used_as_loop_entry = src->used_as_loop_entry; + for (i = 0; i <= src->curframe; i++) { + dst = dst_state->frame[i]; +@@ -3613,6 +3625,8 @@ static void fmt_stack_mask(char *buf, ss + } + } + ++static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); ++ + /* For given verifier state backtrack_insn() is called from the last insn to + * the first insn. Its purpose is to compute a bitmask of registers and + * stack slots that needs precision in the parent verifier state. +@@ -3788,16 +3802,13 @@ static int backtrack_insn(struct bpf_ver + return -EFAULT; + return 0; + } +- } else if ((bpf_helper_call(insn) && +- is_callback_calling_function(insn->imm) && +- !is_async_callback_calling_function(insn->imm)) || +- (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) { +- /* callback-calling helper or kfunc call, which means +- * we are exiting from subprog, but unlike the subprog +- * call handling above, we shouldn't propagate +- * precision of r1-r5 (if any requested), as they are +- * not actually arguments passed directly to callback +- * subprogs ++ } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) { ++ /* exit from callback subprog to callback-calling helper or ++ * kfunc call. Use idx/subseq_idx check to discern it from ++ * straight line code backtracking. ++ * Unlike the subprog call handling above, we shouldn't ++ * propagate precision of r1-r5 (if any requested), as they are ++ * not actually arguments passed directly to callback subprogs + */ + if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { + verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); +@@ -3832,10 +3843,18 @@ static int backtrack_insn(struct bpf_ver + } else if (opcode == BPF_EXIT) { + bool r0_precise; + ++ /* Backtracking to a nested function call, 'idx' is a part of ++ * the inner frame 'subseq_idx' is a part of the outer frame. ++ * In case of a regular function call, instructions giving ++ * precision to registers R1-R5 should have been found already. ++ * In case of a callback, it is ok to have R1-R5 marked for ++ * backtracking, as these registers are set by the function ++ * invoking callback. ++ */ ++ if (subseq_idx >= 0 && calls_callback(env, subseq_idx)) ++ for (i = BPF_REG_1; i <= BPF_REG_5; i++) ++ bt_clear_reg(bt, i); + if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { +- /* if backtracing was looking for registers R1-R5 +- * they should have been found already. +- */ + verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; +@@ -9218,11 +9237,11 @@ err_out: + return err; + } + +-static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, +- int *insn_idx, int subprog, +- set_callee_state_fn set_callee_state_cb) ++static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ++ int insn_idx, int subprog, ++ set_callee_state_fn set_callee_state_cb) + { +- struct bpf_verifier_state *state = env->cur_state; ++ struct bpf_verifier_state *state = env->cur_state, *callback_state; + struct bpf_func_state *caller, *callee; + int err; + +@@ -9230,43 +9249,21 @@ static int __check_func_call(struct bpf_ + err = btf_check_subprog_call(env, subprog, caller->regs); + if (err == -EFAULT) + return err; +- if (subprog_is_global(env, subprog)) { +- if (err) { +- verbose(env, "Caller passes invalid args into func#%d\n", +- subprog); +- return err; +- } else { +- if (env->log.level & BPF_LOG_LEVEL) +- verbose(env, +- "Func#%d is global and valid. Skipping.\n", +- subprog); +- clear_caller_saved_regs(env, caller->regs); +- +- /* All global functions return a 64-bit SCALAR_VALUE */ +- mark_reg_unknown(env, caller->regs, BPF_REG_0); +- caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; +- +- /* continue with next insn after call */ +- return 0; +- } +- } + + /* set_callee_state is used for direct subprog calls, but we are + * interested in validating only BPF helpers that can call subprogs as + * callbacks + */ +- if (set_callee_state_cb != set_callee_state) { +- if (bpf_pseudo_kfunc_call(insn) && +- !is_callback_calling_kfunc(insn->imm)) { +- verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", +- func_id_name(insn->imm), insn->imm); +- return -EFAULT; +- } else if (!bpf_pseudo_kfunc_call(insn) && +- !is_callback_calling_function(insn->imm)) { /* helper */ +- verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", +- func_id_name(insn->imm), insn->imm); +- return -EFAULT; +- } ++ if (bpf_pseudo_kfunc_call(insn) && ++ !is_sync_callback_calling_kfunc(insn->imm)) { ++ verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", ++ func_id_name(insn->imm), insn->imm); ++ return -EFAULT; ++ } else if (!bpf_pseudo_kfunc_call(insn) && ++ !is_callback_calling_function(insn->imm)) { /* helper */ ++ verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n", ++ func_id_name(insn->imm), insn->imm); ++ return -EFAULT; + } + + if (insn->code == (BPF_JMP | BPF_CALL) && +@@ -9277,25 +9274,76 @@ static int __check_func_call(struct bpf_ + /* there is no real recursion here. timer callbacks are async */ + env->subprog_info[subprog].is_async_cb = true; + async_cb = push_async_cb(env, env->subprog_info[subprog].start, +- *insn_idx, subprog); ++ insn_idx, subprog); + if (!async_cb) + return -EFAULT; + callee = async_cb->frame[0]; + callee->async_entry_cnt = caller->async_entry_cnt + 1; + + /* Convert bpf_timer_set_callback() args into timer callback args */ +- err = set_callee_state_cb(env, caller, callee, *insn_idx); ++ err = set_callee_state_cb(env, caller, callee, insn_idx); + if (err) + return err; + ++ return 0; ++ } ++ ++ /* for callback functions enqueue entry to callback and ++ * proceed with next instruction within current frame. ++ */ ++ callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false); ++ if (!callback_state) ++ return -ENOMEM; ++ ++ err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb, ++ callback_state); ++ if (err) ++ return err; ++ ++ callback_state->callback_unroll_depth++; ++ return 0; ++} ++ ++static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ++ int *insn_idx) ++{ ++ struct bpf_verifier_state *state = env->cur_state; ++ struct bpf_func_state *caller; ++ int err, subprog, target_insn; ++ ++ target_insn = *insn_idx + insn->imm + 1; ++ subprog = find_subprog(env, target_insn); ++ if (subprog < 0) { ++ verbose(env, "verifier bug. No program starts at insn %d\n", target_insn); ++ return -EFAULT; ++ } ++ ++ caller = state->frame[state->curframe]; ++ err = btf_check_subprog_call(env, subprog, caller->regs); ++ if (err == -EFAULT) ++ return err; ++ if (subprog_is_global(env, subprog)) { ++ if (err) { ++ verbose(env, "Caller passes invalid args into func#%d\n", subprog); ++ return err; ++ } ++ ++ if (env->log.level & BPF_LOG_LEVEL) ++ verbose(env, "Func#%d is global and valid. Skipping.\n", subprog); + clear_caller_saved_regs(env, caller->regs); ++ ++ /* All global functions return a 64-bit SCALAR_VALUE */ + mark_reg_unknown(env, caller->regs, BPF_REG_0); + caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; ++ + /* continue with next insn after call */ + return 0; + } + +- err = setup_func_entry(env, subprog, *insn_idx, set_callee_state_cb, state); ++ /* for regular function entry setup new frame and continue ++ * from that frame. ++ */ ++ err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state); + if (err) + return err; + +@@ -9355,22 +9403,6 @@ static int set_callee_state(struct bpf_v + return 0; + } + +-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, +- int *insn_idx) +-{ +- int subprog, target_insn; +- +- target_insn = *insn_idx + insn->imm + 1; +- subprog = find_subprog(env, target_insn); +- if (subprog < 0) { +- verbose(env, "verifier bug. No program starts at insn %d\n", +- target_insn); +- return -EFAULT; +- } +- +- return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); +-} +- + static int set_map_elem_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, +@@ -9601,6 +9633,11 @@ static int prepare_func_exit(struct bpf_ + verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); + return -EINVAL; + } ++ if (!calls_callback(env, callee->callsite)) { ++ verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n", ++ *insn_idx, callee->callsite); ++ return -EFAULT; ++ } + } else { + /* return to the caller whatever r0 had in the callee */ + caller->regs[BPF_REG_0] = *r0; +@@ -9618,7 +9655,15 @@ static int prepare_func_exit(struct bpf_ + return err; + } + +- *insn_idx = callee->callsite + 1; ++ /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, ++ * there function call logic would reschedule callback visit. If iteration ++ * converges is_state_visited() would prune that visit eventually. ++ */ ++ if (callee->in_callback_fn) ++ *insn_idx = callee->callsite; ++ else ++ *insn_idx = callee->callsite + 1; ++ + if (env->log.level & BPF_LOG_LEVEL) { + verbose(env, "returning from callee:\n"); + print_verifier_state(env, callee, true); +@@ -10009,24 +10054,24 @@ static int check_helper_call(struct bpf_ + } + break; + case BPF_FUNC_for_each_map_elem: +- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, +- set_map_elem_callback_state); ++ err = push_callback_call(env, insn, insn_idx, meta.subprogno, ++ set_map_elem_callback_state); + break; + case BPF_FUNC_timer_set_callback: +- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, +- set_timer_callback_state); ++ err = push_callback_call(env, insn, insn_idx, meta.subprogno, ++ set_timer_callback_state); + break; + case BPF_FUNC_find_vma: +- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, +- set_find_vma_callback_state); ++ err = push_callback_call(env, insn, insn_idx, meta.subprogno, ++ set_find_vma_callback_state); + break; + case BPF_FUNC_snprintf: + err = check_bpf_snprintf_call(env, regs); + break; + case BPF_FUNC_loop: + update_loop_inline_state(env, meta.subprogno); +- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, +- set_loop_callback_state); ++ err = push_callback_call(env, insn, insn_idx, meta.subprogno, ++ set_loop_callback_state); + break; + case BPF_FUNC_dynptr_from_mem: + if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { +@@ -10105,8 +10150,8 @@ static int check_helper_call(struct bpf_ + break; + } + case BPF_FUNC_user_ringbuf_drain: +- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, +- set_user_ringbuf_callback_state); ++ err = push_callback_call(env, insn, insn_idx, meta.subprogno, ++ set_user_ringbuf_callback_state); + break; + } + +@@ -10956,7 +11001,7 @@ static bool is_bpf_graph_api_kfunc(u32 b + btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; + } + +-static bool is_callback_calling_kfunc(u32 btf_id) ++static bool is_sync_callback_calling_kfunc(u32 btf_id) + { + return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; + } +@@ -11660,6 +11705,21 @@ static int check_kfunc_call(struct bpf_v + return -EACCES; + } + ++ /* Check the arguments */ ++ err = check_kfunc_args(env, &meta, insn_idx); ++ if (err < 0) ++ return err; ++ ++ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { ++ err = push_callback_call(env, insn, insn_idx, meta.subprogno, ++ set_rbtree_add_callback_state); ++ if (err) { ++ verbose(env, "kfunc %s#%d failed callback verification\n", ++ func_name, meta.func_id); ++ return err; ++ } ++ } ++ + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); + rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); + +@@ -11694,10 +11754,6 @@ static int check_kfunc_call(struct bpf_v + return -EINVAL; + } + +- /* Check the arguments */ +- err = check_kfunc_args(env, &meta, insn_idx); +- if (err < 0) +- return err; + /* In case of release function, we get register number of refcounted + * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. + */ +@@ -11731,16 +11787,6 @@ static int check_kfunc_call(struct bpf_v + } + } + +- if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { +- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, +- set_rbtree_add_callback_state); +- if (err) { +- verbose(env, "kfunc %s#%d failed callback verification\n", +- func_name, meta.func_id); +- return err; +- } +- } +- + for (i = 0; i < CALLER_SAVED_REGS; i++) + mark_reg_not_init(env, regs, caller_saved[i]); + +@@ -15047,6 +15093,15 @@ static bool is_force_checkpoint(struct b + return env->insn_aux_data[insn_idx].force_checkpoint; + } + ++static void mark_calls_callback(struct bpf_verifier_env *env, int idx) ++{ ++ env->insn_aux_data[idx].calls_callback = true; ++} ++ ++static bool calls_callback(struct bpf_verifier_env *env, int insn_idx) ++{ ++ return env->insn_aux_data[insn_idx].calls_callback; ++} + + enum { + DONE_EXPLORING = 0, +@@ -15160,6 +15215,21 @@ static int visit_insn(int t, struct bpf_ + * async state will be pushed for further exploration. + */ + mark_prune_point(env, t); ++ /* For functions that invoke callbacks it is not known how many times ++ * callback would be called. Verifier models callback calling functions ++ * by repeatedly visiting callback bodies and returning to origin call ++ * instruction. ++ * In order to stop such iteration verifier needs to identify when a ++ * state identical some state from a previous iteration is reached. ++ * Check below forces creation of checkpoint before callback calling ++ * instruction to allow search for such identical states. ++ */ ++ if (is_sync_callback_calling_insn(insn)) { ++ mark_calls_callback(env, t); ++ mark_force_checkpoint(env, t); ++ mark_prune_point(env, t); ++ mark_jmp_point(env, t); ++ } + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + struct bpf_kfunc_call_arg_meta meta; + +@@ -16553,10 +16623,16 @@ static int is_state_visited(struct bpf_v + } + goto skip_inf_loop_check; + } ++ if (calls_callback(env, insn_idx)) { ++ if (states_equal(env, &sl->state, cur, true)) ++ goto hit; ++ goto skip_inf_loop_check; ++ } + /* attempt to detect infinite loop to avoid unnecessary doomed work */ + if (states_maybe_looping(&sl->state, cur) && + states_equal(env, &sl->state, cur, false) && +- !iter_active_depths_differ(&sl->state, cur)) { ++ !iter_active_depths_differ(&sl->state, cur) && ++ sl->state.callback_unroll_depth == cur->callback_unroll_depth) { + verbose_linfo(env, insn_idx, "; "); + verbose(env, "infinite loop detected at insn %d\n", insn_idx); + verbose(env, "cur state:"); +--- a/tools/testing/selftests/bpf/progs/cb_refs.c ++++ b/tools/testing/selftests/bpf/progs/cb_refs.c +@@ -33,6 +33,7 @@ int underflow_prog(void *ctx) + if (!p) + return 0; + bpf_for_each_map_elem(&array_map, cb1, &p, 0); ++ bpf_kfunc_call_test_release(p); + return 0; + } + +--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c ++++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +@@ -119,15 +119,26 @@ __naked int global_subprog_result_precis + + SEC("?raw_tp") + __success __log_level(2) ++/* First simulated path does not include callback body */ + __msg("14: (0f) r1 += r6") +-__msg("mark_precise: frame0: last_idx 14 first_idx 10") ++__msg("mark_precise: frame0: last_idx 14 first_idx 9") + __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") + __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") + __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") + __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") +-__msg("mark_precise: frame0: parent state regs=r0 stack=:") +-__msg("mark_precise: frame0: last_idx 18 first_idx 0") +-__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") ++__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop") ++/* State entering callback body popped from states stack */ ++__msg("from 9 to 17: frame1:") ++__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") ++__msg("17: (b7) r0 = 0") ++__msg("18: (95) exit") ++__msg("returning from callee:") ++__msg("to caller at 9:") ++/* r4 (flags) is always precise for bpf_loop() */ ++__msg("frame 0: propagating r4") ++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") ++__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit") ++__msg("from 18 to 9: safe") + __naked int callback_result_precise(void) + { + asm volatile ( +@@ -233,20 +244,36 @@ __naked int parent_callee_saved_reg_prec + + SEC("?raw_tp") + __success __log_level(2) ++/* First simulated path does not include callback body */ + __msg("12: (0f) r1 += r6") +-__msg("mark_precise: frame0: last_idx 12 first_idx 10") ++__msg("mark_precise: frame0: last_idx 12 first_idx 9") + __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") + __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") ++__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop") + __msg("mark_precise: frame0: parent state regs=r6 stack=:") +-__msg("mark_precise: frame0: last_idx 16 first_idx 0") +-__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") +-__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +-__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") ++__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") + __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") + __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") + __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") + __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") + __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") ++/* State entering callback body popped from states stack */ ++__msg("from 9 to 15: frame1:") ++__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb") ++__msg("15: (b7) r0 = 0") ++__msg("16: (95) exit") ++__msg("returning from callee:") ++__msg("to caller at 9:") ++/* r4 (flags) is always precise for bpf_loop(), ++ * r6 was marked before backtracking to callback body. ++ */ ++__msg("frame 0: propagating r4,r6") ++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") ++__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit") ++__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") ++__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") ++__msg("mark_precise: frame0: parent state regs= stack=:") ++__msg("from 16 to 9: safe") + __naked int parent_callee_saved_reg_precise_with_callback(void) + { + asm volatile ( +@@ -373,22 +400,38 @@ __naked int parent_stack_slot_precise_gl + + SEC("?raw_tp") + __success __log_level(2) ++/* First simulated path does not include callback body */ + __msg("14: (0f) r1 += r6") +-__msg("mark_precise: frame0: last_idx 14 first_idx 11") ++__msg("mark_precise: frame0: last_idx 14 first_idx 10") + __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") + __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") + __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") ++__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop") + __msg("mark_precise: frame0: parent state regs= stack=-8:") +-__msg("mark_precise: frame0: last_idx 18 first_idx 0") +-__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") +-__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +-__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") ++__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10") + __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") + __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") + __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") + __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") + __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") + __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") ++/* State entering callback body popped from states stack */ ++__msg("from 10 to 17: frame1:") ++__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") ++__msg("17: (b7) r0 = 0") ++__msg("18: (95) exit") ++__msg("returning from callee:") ++__msg("to caller at 10:") ++/* r4 (flags) is always precise for bpf_loop(), ++ * fp-8 was marked before backtracking to callback body. ++ */ ++__msg("frame 0: propagating r4,fp-8") ++__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") ++__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit") ++__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") ++__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") ++__msg("mark_precise: frame0: parent state regs= stack=:") ++__msg("from 18 to 10: safe") + __naked int parent_stack_slot_precise_with_callback(void) + { + asm volatile ( diff --git a/queue-6.6/bpf-widening-for-callback-iterators.patch b/queue-6.6/bpf-widening-for-callback-iterators.patch new file mode 100644 index 00000000000..03e7d328379 --- /dev/null +++ b/queue-6.6/bpf-widening-for-callback-iterators.patch @@ -0,0 +1,83 @@ +From cafe2c21508a38cdb3ed22708842e957b2572c3e Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:58 +0200 +Subject: bpf: widening for callback iterators + +From: Eduard Zingerman + +commit cafe2c21508a38cdb3ed22708842e957b2572c3e upstream. + +Callbacks are similar to open coded iterators, so add imprecise +widening logic for callback body processing. This makes callback based +loops behave identically to open coded iterators, e.g. allowing to +verify programs like below: + + struct ctx { u32 i; }; + int cb(u32 idx, struct ctx* ctx) + { + ++ctx->i; + return 0; + } + ... + struct ctx ctx = { .i = 0 }; + bpf_loop(100, cb, &ctx, 0); + ... + +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-9-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/verifier.c | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -9595,9 +9595,10 @@ static bool in_rbtree_lock_required_cb(s + + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) + { +- struct bpf_verifier_state *state = env->cur_state; ++ struct bpf_verifier_state *state = env->cur_state, *prev_st; + struct bpf_func_state *caller, *callee; + struct bpf_reg_state *r0; ++ bool in_callback_fn; + int err; + + callee = state->frame[state->curframe]; +@@ -9659,7 +9660,8 @@ static int prepare_func_exit(struct bpf_ + * there function call logic would reschedule callback visit. If iteration + * converges is_state_visited() would prune that visit eventually. + */ +- if (callee->in_callback_fn) ++ in_callback_fn = callee->in_callback_fn; ++ if (in_callback_fn) + *insn_idx = callee->callsite; + else + *insn_idx = callee->callsite + 1; +@@ -9673,6 +9675,24 @@ static int prepare_func_exit(struct bpf_ + /* clear everything in the callee */ + free_func_state(callee); + state->frame[state->curframe--] = NULL; ++ ++ /* for callbacks widen imprecise scalars to make programs like below verify: ++ * ++ * struct ctx { int i; } ++ * void cb(int idx, struct ctx *ctx) { ctx->i++; ... } ++ * ... ++ * struct ctx = { .i = 0; } ++ * bpf_loop(100, cb, &ctx, 0); ++ * ++ * This is similar to what is done in process_iter_next_call() for open ++ * coded iterators. ++ */ ++ prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL; ++ if (prev_st) { ++ err = widen_imprecise_scalars(env, prev_st, state); ++ if (err) ++ return err; ++ } + return 0; + } + diff --git a/queue-6.6/selftests-bpf-check-if-max-number-of-bpf_loop-iterations-is-tracked.patch b/queue-6.6/selftests-bpf-check-if-max-number-of-bpf_loop-iterations-is-tracked.patch new file mode 100644 index 00000000000..98ea140d951 --- /dev/null +++ b/queue-6.6/selftests-bpf-check-if-max-number-of-bpf_loop-iterations-is-tracked.patch @@ -0,0 +1,103 @@ +From 57e2a52deeb12ab84c15c6d0fb93638b5b94001b Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:07:01 +0200 +Subject: selftests/bpf: check if max number of bpf_loop iterations is tracked + +From: Eduard Zingerman + +commit 57e2a52deeb12ab84c15c6d0fb93638b5b94001b upstream. + +Check that even if bpf_loop() callback simulation does not converge to +a specific state, verification could proceed via "brute force" +simulation of maximal number of callback calls. + +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-12-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c | 75 ++++++++++ + 1 file changed, 75 insertions(+) + +--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c ++++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +@@ -164,4 +164,79 @@ int unsafe_find_vma(void *unused) + return choice_arr[loop_ctx.i]; + } + ++static int iter_limit_cb(__u32 idx, struct num_context *ctx) ++{ ++ ctx->i++; ++ return 0; ++} ++ ++SEC("?raw_tp") ++__success ++int bpf_loop_iter_limit_ok(void *unused) ++{ ++ struct num_context ctx = { .i = 0 }; ++ ++ bpf_loop(1, iter_limit_cb, &ctx, 0); ++ return choice_arr[ctx.i]; ++} ++ ++SEC("?raw_tp") ++__failure __msg("invalid access to map value, value_size=2 off=2 size=1") ++int bpf_loop_iter_limit_overflow(void *unused) ++{ ++ struct num_context ctx = { .i = 0 }; ++ ++ bpf_loop(2, iter_limit_cb, &ctx, 0); ++ return choice_arr[ctx.i]; ++} ++ ++static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx) ++{ ++ ctx->i += 100; ++ return 0; ++} ++ ++static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx) ++{ ++ ctx->i += 10; ++ return 0; ++} ++ ++static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx) ++{ ++ ctx->i += 1; ++ bpf_loop(1, iter_limit_level2a_cb, ctx, 0); ++ bpf_loop(1, iter_limit_level2b_cb, ctx, 0); ++ return 0; ++} ++ ++/* Check that path visiting every callback function once had been ++ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags, ++ * with each decimal digit corresponding to a callback visit marker. ++ */ ++SEC("socket") ++__success __retval(111111) ++int bpf_loop_iter_limit_nested(void *unused) ++{ ++ struct num_context ctx1 = { .i = 0 }; ++ struct num_context ctx2 = { .i = 0 }; ++ __u64 a, b, c; ++ ++ bpf_loop(1, iter_limit_level1_cb, &ctx1, 0); ++ bpf_loop(1, iter_limit_level1_cb, &ctx2, 0); ++ a = ctx1.i; ++ b = ctx2.i; ++ /* Force 'ctx1.i' and 'ctx2.i' precise. */ ++ c = choice_arr[(a + b) % 2]; ++ /* This makes 'c' zero, but neither clang nor verifier know it. */ ++ c /= 10; ++ /* Make sure that verifier does not visit 'impossible' states: ++ * enumerate all possible callback visit masks. ++ */ ++ if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 && ++ b != 0 && b != 1 && b != 11 && b != 101 && b != 111) ++ asm volatile ("r0 /= 0;" ::: "r0"); ++ return 1000 * a + b + c; ++} ++ + char _license[] SEC("license") = "GPL"; diff --git a/queue-6.6/selftests-bpf-test-if-state-loops-are-detected-in-a-tricky-case.patch b/queue-6.6/selftests-bpf-test-if-state-loops-are-detected-in-a-tricky-case.patch new file mode 100644 index 00000000000..e4868c9061f --- /dev/null +++ b/queue-6.6/selftests-bpf-test-if-state-loops-are-detected-in-a-tricky-case.patch @@ -0,0 +1,228 @@ +From 64870feebecb7130291a55caf0ce839a87405a70 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 24 Oct 2023 03:09:16 +0300 +Subject: selftests/bpf: test if state loops are detected in a tricky case + +From: Eduard Zingerman + +commit 64870feebecb7130291a55caf0ce839a87405a70 upstream. + +A convoluted test case for iterators convergence logic that +demonstrates that states with branch count equal to 0 might still be +a part of not completely explored loop. + +E.g. consider the following state diagram: + + initial Here state 'succ' was processed first, + | it was eventually tracked to produce a + V state identical to 'hdr'. + .---------> hdr All branches from 'succ' had been explored + | | and thus 'succ' has its .branches == 0. + | V + | .------... Suppose states 'cur' and 'succ' correspond + | | | to the same instruction + callsites. + | V V In such case it is necessary to check + | ... ... whether 'succ' and 'cur' are identical. + | | | If 'succ' and 'cur' are a part of the same loop + | V V they have to be compared exactly. + | succ <- cur + | | + | V + | ... + | | + '----' + +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231024000917.12153-7-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/iters.c | 177 ++++++++++++++++++++++++++++++ + 1 file changed, 177 insertions(+) + +--- a/tools/testing/selftests/bpf/progs/iters.c ++++ b/tools/testing/selftests/bpf/progs/iters.c +@@ -999,6 +999,183 @@ __naked int loop_state_deps1(void) + } + + SEC("?raw_tp") ++__failure ++__msg("math between fp pointer and register with unbounded") ++__flag(BPF_F_TEST_STATE_FREQ) ++__naked int loop_state_deps2(void) ++{ ++ /* This is equivalent to C program below. ++ * ++ * The case turns out to be tricky in a sense that: ++ * - states with read+precise mark on c are explored only on a second ++ * iteration of the first inner loop and in a state which is pushed to ++ * states stack first. ++ * - states with c=-25 are explored only on a second iteration of the ++ * second inner loop and in a state which is pushed to states stack ++ * first. ++ * ++ * Depending on the details of iterator convergence logic ++ * verifier might stop states traversal too early and miss ++ * unsafe c=-25 memory access. ++ * ++ * j = iter_new(); // fp[-16] ++ * a = 0; // r6 ++ * b = 0; // r7 ++ * c = -24; // r8 ++ * while (iter_next(j)) { ++ * i = iter_new(); // fp[-8] ++ * a = 0; // r6 ++ * b = 0; // r7 ++ * while (iter_next(i)) { ++ * if (a == 1) { ++ * a = 0; ++ * b = 1; ++ * } else if (a == 0) { ++ * a = 1; ++ * if (random() == 42) ++ * continue; ++ * if (b == 1) { ++ * *(r10 + c) = 7; // this is not safe ++ * iter_destroy(i); ++ * iter_destroy(j); ++ * return; ++ * } ++ * } ++ * } ++ * iter_destroy(i); ++ * i = iter_new(); // fp[-8] ++ * a = 0; // r6 ++ * b = 0; // r7 ++ * while (iter_next(i)) { ++ * if (a == 1) { ++ * a = 0; ++ * b = 1; ++ * } else if (a == 0) { ++ * a = 1; ++ * if (random() == 42) ++ * continue; ++ * if (b == 1) { ++ * a = 0; ++ * c = -25; ++ * } ++ * } ++ * } ++ * iter_destroy(i); ++ * } ++ * iter_destroy(j); ++ * return; ++ */ ++ asm volatile ( ++ "r1 = r10;" ++ "r1 += -16;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "r6 = 0;" ++ "r7 = 0;" ++ "r8 = -24;" ++ "j_loop_%=:" ++ "r1 = r10;" ++ "r1 += -16;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto j_loop_end_%=;" ++ ++ /* first inner loop */ ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "r6 = 0;" ++ "r7 = 0;" ++ "i_loop_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto i_loop_end_%=;" ++ "check_one_r6_%=:" ++ "if r6 != 1 goto check_zero_r6_%=;" ++ "r6 = 0;" ++ "r7 = 1;" ++ "goto i_loop_%=;" ++ "check_zero_r6_%=:" ++ "if r6 != 0 goto i_loop_%=;" ++ "r6 = 1;" ++ "call %[bpf_get_prandom_u32];" ++ "if r0 != 42 goto check_one_r7_%=;" ++ "goto i_loop_%=;" ++ "check_one_r7_%=:" ++ "if r7 != 1 goto i_loop_%=;" ++ "r0 = r10;" ++ "r0 += r8;" ++ "r1 = 7;" ++ "*(u64 *)(r0 + 0) = r1;" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r1 = r10;" ++ "r1 += -16;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ "i_loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ ++ /* second inner loop */ ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "r6 = 0;" ++ "r7 = 0;" ++ "i2_loop_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto i2_loop_end_%=;" ++ "check2_one_r6_%=:" ++ "if r6 != 1 goto check2_zero_r6_%=;" ++ "r6 = 0;" ++ "r7 = 1;" ++ "goto i2_loop_%=;" ++ "check2_zero_r6_%=:" ++ "if r6 != 0 goto i2_loop_%=;" ++ "r6 = 1;" ++ "call %[bpf_get_prandom_u32];" ++ "if r0 != 42 goto check2_one_r7_%=;" ++ "goto i2_loop_%=;" ++ "check2_one_r7_%=:" ++ "if r7 != 1 goto i2_loop_%=;" ++ "r6 = 0;" ++ "r8 = -25;" ++ "goto i2_loop_%=;" ++ "i2_loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ ++ "r6 = 0;" ++ "r7 = 0;" ++ "goto j_loop_%=;" ++ "j_loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -16;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ : ++ : __imm(bpf_get_prandom_u32), ++ __imm(bpf_iter_num_new), ++ __imm(bpf_iter_num_next), ++ __imm(bpf_iter_num_destroy) ++ : __clobber_all ++ ); ++} ++ ++SEC("?raw_tp") + __success + __naked int triple_continue(void) + { diff --git a/queue-6.6/selftests-bpf-test-widening-for-iterating-callbacks.patch b/queue-6.6/selftests-bpf-test-widening-for-iterating-callbacks.patch new file mode 100644 index 00000000000..cbd8b5ee923 --- /dev/null +++ b/queue-6.6/selftests-bpf-test-widening-for-iterating-callbacks.patch @@ -0,0 +1,56 @@ +From 9f3330aa644d6d979eb064c46e85c62d4b4eac75 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:59 +0200 +Subject: selftests/bpf: test widening for iterating callbacks + +From: Eduard Zingerman + +commit 9f3330aa644d6d979eb064c46e85c62d4b4eac75 upstream. + +A test case to verify that imprecise scalars widening is applied to +callback entering state, when callback call is simulated repeatedly. + +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-10-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c | 20 ++++++++++ + 1 file changed, 20 insertions(+) + +--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c ++++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +@@ -25,6 +25,7 @@ struct buf_context { + + struct num_context { + __u64 i; ++ __u64 j; + }; + + __u8 choice_arr[2] = { 0, 1 }; +@@ -69,6 +70,25 @@ int unsafe_on_zero_iter(void *unused) + return choice_arr[loop_ctx.i]; + } + ++static int widening_cb(__u32 idx, struct num_context *ctx) ++{ ++ ++ctx->i; ++ return 0; ++} ++ ++SEC("?raw_tp") ++__success ++int widening(void *unused) ++{ ++ struct num_context loop_ctx = { .i = 0, .j = 1 }; ++ ++ bpf_loop(100, widening_cb, &loop_ctx, 0); ++ /* loop_ctx.j is not changed during callback iteration, ++ * verifier should not apply widening to it. ++ */ ++ return choice_arr[loop_ctx.j]; ++} ++ + static int loop_detection_cb(__u32 idx, struct num_context *ctx) + { + for (;;) {} diff --git a/queue-6.6/selftests-bpf-tests-for-iterating-callbacks.patch b/queue-6.6/selftests-bpf-tests-for-iterating-callbacks.patch new file mode 100644 index 00000000000..18dfeedb41b --- /dev/null +++ b/queue-6.6/selftests-bpf-tests-for-iterating-callbacks.patch @@ -0,0 +1,197 @@ +From 958465e217dbf5fc6677d42d8827fb3073d86afd Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:57 +0200 +Subject: selftests/bpf: tests for iterating callbacks + +From: Eduard Zingerman + +commit 958465e217dbf5fc6677d42d8827fb3073d86afd upstream. + +A set of test cases to check behavior of callback handling logic, +check if verifier catches the following situations: +- program not safe on second callback iteration; +- program not safe on zero callback iterations; +- infinite loop inside a callback. + +Verify that callback logic works for bpf_loop, bpf_for_each_map_elem, +bpf_user_ringbuf_drain, bpf_find_vma. + +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-8-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c | 147 ++++++++++ + 2 files changed, 149 insertions(+) + create mode 100644 tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c + +--- a/tools/testing/selftests/bpf/prog_tests/verifier.c ++++ b/tools/testing/selftests/bpf/prog_tests/verifier.c +@@ -31,6 +31,7 @@ + #include "verifier_helper_restricted.skel.h" + #include "verifier_helper_value_access.skel.h" + #include "verifier_int_ptr.skel.h" ++#include "verifier_iterating_callbacks.skel.h" + #include "verifier_jeq_infer_not_null.skel.h" + #include "verifier_ld_ind.skel.h" + #include "verifier_ldsx.skel.h" +@@ -138,6 +139,7 @@ void test_verifier_helper_packet_access( + void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } + void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } + void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } ++void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } + void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } + void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } + void test_verifier_ldsx(void) { RUN(verifier_ldsx); } +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +@@ -0,0 +1,147 @@ ++// SPDX-License-Identifier: GPL-2.0 ++ ++#include ++#include ++#include "bpf_misc.h" ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_ARRAY); ++ __uint(max_entries, 8); ++ __type(key, __u32); ++ __type(value, __u64); ++} map SEC(".maps"); ++ ++struct { ++ __uint(type, BPF_MAP_TYPE_USER_RINGBUF); ++ __uint(max_entries, 8); ++} ringbuf SEC(".maps"); ++ ++struct vm_area_struct; ++struct bpf_map; ++ ++struct buf_context { ++ char *buf; ++}; ++ ++struct num_context { ++ __u64 i; ++}; ++ ++__u8 choice_arr[2] = { 0, 1 }; ++ ++static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx) ++{ ++ if (idx == 0) { ++ ctx->buf = (char *)(0xDEAD); ++ return 0; ++ } ++ ++ if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE))) ++ return 1; ++ ++ return 0; ++} ++ ++SEC("?raw_tp") ++__failure __msg("R1 type=scalar expected=fp") ++int unsafe_on_2nd_iter(void *unused) ++{ ++ char buf[4]; ++ struct buf_context loop_ctx = { .buf = buf }; ++ ++ bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0); ++ return 0; ++} ++ ++static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx) ++{ ++ ctx->i = 0; ++ return 0; ++} ++ ++SEC("?raw_tp") ++__failure __msg("invalid access to map value, value_size=2 off=32 size=1") ++int unsafe_on_zero_iter(void *unused) ++{ ++ struct num_context loop_ctx = { .i = 32 }; ++ ++ bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0); ++ return choice_arr[loop_ctx.i]; ++} ++ ++static int loop_detection_cb(__u32 idx, struct num_context *ctx) ++{ ++ for (;;) {} ++ return 0; ++} ++ ++SEC("?raw_tp") ++__failure __msg("infinite loop detected") ++int loop_detection(void *unused) ++{ ++ struct num_context loop_ctx = { .i = 0 }; ++ ++ bpf_loop(100, loop_detection_cb, &loop_ctx, 0); ++ return 0; ++} ++ ++static __always_inline __u64 oob_state_machine(struct num_context *ctx) ++{ ++ switch (ctx->i) { ++ case 0: ++ ctx->i = 1; ++ break; ++ case 1: ++ ctx->i = 32; ++ break; ++ } ++ return 0; ++} ++ ++static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) ++{ ++ return oob_state_machine(data); ++} ++ ++SEC("?raw_tp") ++__failure __msg("invalid access to map value, value_size=2 off=32 size=1") ++int unsafe_for_each_map_elem(void *unused) ++{ ++ struct num_context loop_ctx = { .i = 0 }; ++ ++ bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0); ++ return choice_arr[loop_ctx.i]; ++} ++ ++static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data) ++{ ++ return oob_state_machine(data); ++} ++ ++SEC("?raw_tp") ++__failure __msg("invalid access to map value, value_size=2 off=32 size=1") ++int unsafe_ringbuf_drain(void *unused) ++{ ++ struct num_context loop_ctx = { .i = 0 }; ++ ++ bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0); ++ return choice_arr[loop_ctx.i]; ++} ++ ++static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data) ++{ ++ return oob_state_machine(data); ++} ++ ++SEC("?raw_tp") ++__failure __msg("invalid access to map value, value_size=2 off=32 size=1") ++int unsafe_find_vma(void *unused) ++{ ++ struct task_struct *task = bpf_get_current_task_btf(); ++ struct num_context loop_ctx = { .i = 0 }; ++ ++ bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0); ++ return choice_arr[loop_ctx.i]; ++} ++ ++char _license[] SEC("license") = "GPL"; diff --git a/queue-6.6/selftests-bpf-tests-with-delayed-read-precision-makrs-in-loop-body.patch b/queue-6.6/selftests-bpf-tests-with-delayed-read-precision-makrs-in-loop-body.patch new file mode 100644 index 00000000000..88e5d03b7ab --- /dev/null +++ b/queue-6.6/selftests-bpf-tests-with-delayed-read-precision-makrs-in-loop-body.patch @@ -0,0 +1,556 @@ +From 389ede06c2974b2f878a7ebff6b0f4f707f9db74 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 24 Oct 2023 03:09:14 +0300 +Subject: selftests/bpf: tests with delayed read/precision makrs in loop body + +From: Eduard Zingerman + +commit 389ede06c2974b2f878a7ebff6b0f4f707f9db74 upstream. + +These test cases try to hide read and precision marks from loop +convergence logic: marks would only be assigned on subsequent loop +iterations or after exploring states pushed to env->head stack first. +Without verifier fix to use exact states comparison logic for +iterators convergence these tests (except 'triple_continue') would be +errorneously marked as safe. + +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231024000917.12153-5-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/iters.c | 518 ++++++++++++++++++++++++++++++ + 1 file changed, 518 insertions(+) + +--- a/tools/testing/selftests/bpf/progs/iters.c ++++ b/tools/testing/selftests/bpf/progs/iters.c +@@ -14,6 +14,13 @@ int my_pid; + int arr[256]; + int small_arr[16] SEC(".data.small_arr"); + ++struct { ++ __uint(type, BPF_MAP_TYPE_HASH); ++ __uint(max_entries, 10); ++ __type(key, int); ++ __type(value, int); ++} amap SEC(".maps"); ++ + #ifdef REAL_TEST + #define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0 + #else +@@ -716,4 +723,515 @@ int iter_pass_iter_ptr_to_subprog(const + return 0; + } + ++SEC("?raw_tp") ++__failure ++__msg("R1 type=scalar expected=fp") ++__naked int delayed_read_mark(void) ++{ ++ /* This is equivalent to C program below. ++ * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead. ++ * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch. ++ * At this point iterator next() call is reached with r7 that has no read mark. ++ * Loop body with r7=0xdead would only be visited if verifier would decide to continue ++ * with second loop iteration. Absence of read mark on r7 might affect state ++ * equivalent logic used for iterator convergence tracking. ++ * ++ * r7 = &fp[-16] ++ * fp[-16] = 0 ++ * r6 = bpf_get_prandom_u32() ++ * bpf_iter_num_new(&fp[-8], 0, 10) ++ * while (bpf_iter_num_next(&fp[-8])) { ++ * r6++ ++ * if (r6 != 42) { ++ * r7 = 0xdead ++ * continue; ++ * } ++ * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe ++ * } ++ * bpf_iter_num_destroy(&fp[-8]) ++ * return 0 ++ */ ++ asm volatile ( ++ "r7 = r10;" ++ "r7 += -16;" ++ "r0 = 0;" ++ "*(u64 *)(r7 + 0) = r0;" ++ "call %[bpf_get_prandom_u32];" ++ "r6 = r0;" ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "1:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto 2f;" ++ "r6 += 1;" ++ "if r6 != 42 goto 3f;" ++ "r7 = 0xdead;" ++ "goto 1b;" ++ "3:" ++ "r1 = r7;" ++ "r2 = 8;" ++ "r3 = 0xdeadbeef;" ++ "call %[bpf_probe_read_user];" ++ "goto 1b;" ++ "2:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ : ++ : __imm(bpf_get_prandom_u32), ++ __imm(bpf_iter_num_new), ++ __imm(bpf_iter_num_next), ++ __imm(bpf_iter_num_destroy), ++ __imm(bpf_probe_read_user) ++ : __clobber_all ++ ); ++} ++ ++SEC("?raw_tp") ++__failure ++__msg("math between fp pointer and register with unbounded") ++__naked int delayed_precision_mark(void) ++{ ++ /* This is equivalent to C program below. ++ * The test is similar to delayed_iter_mark but verifies that incomplete ++ * precision don't fool verifier. ++ * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32. ++ * State with r7=-16 is visited first and follows r6 != 42 ... continue branch. ++ * At this point iterator next() call is reached with r7 that has no read ++ * and precision marks. ++ * Loop body with r7=-32 would only be visited if verifier would decide to continue ++ * with second loop iteration. Absence of precision mark on r7 might affect state ++ * equivalent logic used for iterator convergence tracking. ++ * ++ * r8 = 0 ++ * fp[-16] = 0 ++ * r7 = -16 ++ * r6 = bpf_get_prandom_u32() ++ * bpf_iter_num_new(&fp[-8], 0, 10) ++ * while (bpf_iter_num_next(&fp[-8])) { ++ * if (r6 != 42) { ++ * r7 = -32 ++ * r6 = bpf_get_prandom_u32() ++ * continue; ++ * } ++ * r0 = r10 ++ * r0 += r7 ++ * r8 = *(u64 *)(r0 + 0) // this is not safe ++ * r6 = bpf_get_prandom_u32() ++ * } ++ * bpf_iter_num_destroy(&fp[-8]) ++ * return r8 ++ */ ++ asm volatile ( ++ "r8 = 0;" ++ "*(u64 *)(r10 - 16) = r8;" ++ "r7 = -16;" ++ "call %[bpf_get_prandom_u32];" ++ "r6 = r0;" ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "1:" ++ "r1 = r10;" ++ "r1 += -8;\n" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto 2f;" ++ "if r6 != 42 goto 3f;" ++ "r7 = -32;" ++ "call %[bpf_get_prandom_u32];" ++ "r6 = r0;" ++ "goto 1b;\n" ++ "3:" ++ "r0 = r10;" ++ "r0 += r7;" ++ "r8 = *(u64 *)(r0 + 0);" ++ "call %[bpf_get_prandom_u32];" ++ "r6 = r0;" ++ "goto 1b;\n" ++ "2:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = r8;" ++ "exit;" ++ : ++ : __imm(bpf_get_prandom_u32), ++ __imm(bpf_iter_num_new), ++ __imm(bpf_iter_num_next), ++ __imm(bpf_iter_num_destroy), ++ __imm(bpf_probe_read_user) ++ : __clobber_all ++ ); ++} ++ ++SEC("?raw_tp") ++__failure ++__msg("math between fp pointer and register with unbounded") ++__flag(BPF_F_TEST_STATE_FREQ) ++__naked int loop_state_deps1(void) ++{ ++ /* This is equivalent to C program below. ++ * ++ * The case turns out to be tricky in a sense that: ++ * - states with c=-25 are explored only on a second iteration ++ * of the outer loop; ++ * - states with read+precise mark on c are explored only on ++ * second iteration of the inner loop and in a state which ++ * is pushed to states stack first. ++ * ++ * Depending on the details of iterator convergence logic ++ * verifier might stop states traversal too early and miss ++ * unsafe c=-25 memory access. ++ * ++ * j = iter_new(); // fp[-16] ++ * a = 0; // r6 ++ * b = 0; // r7 ++ * c = -24; // r8 ++ * while (iter_next(j)) { ++ * i = iter_new(); // fp[-8] ++ * a = 0; // r6 ++ * b = 0; // r7 ++ * while (iter_next(i)) { ++ * if (a == 1) { ++ * a = 0; ++ * b = 1; ++ * } else if (a == 0) { ++ * a = 1; ++ * if (random() == 42) ++ * continue; ++ * if (b == 1) { ++ * *(r10 + c) = 7; // this is not safe ++ * iter_destroy(i); ++ * iter_destroy(j); ++ * return; ++ * } ++ * } ++ * } ++ * iter_destroy(i); ++ * a = 0; ++ * b = 0; ++ * c = -25; ++ * } ++ * iter_destroy(j); ++ * return; ++ */ ++ asm volatile ( ++ "r1 = r10;" ++ "r1 += -16;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "r6 = 0;" ++ "r7 = 0;" ++ "r8 = -24;" ++ "j_loop_%=:" ++ "r1 = r10;" ++ "r1 += -16;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto j_loop_end_%=;" ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "r6 = 0;" ++ "r7 = 0;" ++ "i_loop_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto i_loop_end_%=;" ++ "check_one_r6_%=:" ++ "if r6 != 1 goto check_zero_r6_%=;" ++ "r6 = 0;" ++ "r7 = 1;" ++ "goto i_loop_%=;" ++ "check_zero_r6_%=:" ++ "if r6 != 0 goto i_loop_%=;" ++ "r6 = 1;" ++ "call %[bpf_get_prandom_u32];" ++ "if r0 != 42 goto check_one_r7_%=;" ++ "goto i_loop_%=;" ++ "check_one_r7_%=:" ++ "if r7 != 1 goto i_loop_%=;" ++ "r0 = r10;" ++ "r0 += r8;" ++ "r1 = 7;" ++ "*(u64 *)(r0 + 0) = r1;" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r1 = r10;" ++ "r1 += -16;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ "i_loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r6 = 0;" ++ "r7 = 0;" ++ "r8 = -25;" ++ "goto j_loop_%=;" ++ "j_loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -16;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ : ++ : __imm(bpf_get_prandom_u32), ++ __imm(bpf_iter_num_new), ++ __imm(bpf_iter_num_next), ++ __imm(bpf_iter_num_destroy) ++ : __clobber_all ++ ); ++} ++ ++SEC("?raw_tp") ++__success ++__naked int triple_continue(void) ++{ ++ /* This is equivalent to C program below. ++ * High branching factor of the loop body turned out to be ++ * problematic for one of the iterator convergence tracking ++ * algorithms explored. ++ * ++ * r6 = bpf_get_prandom_u32() ++ * bpf_iter_num_new(&fp[-8], 0, 10) ++ * while (bpf_iter_num_next(&fp[-8])) { ++ * if (bpf_get_prandom_u32() != 42) ++ * continue; ++ * if (bpf_get_prandom_u32() != 42) ++ * continue; ++ * if (bpf_get_prandom_u32() != 42) ++ * continue; ++ * r0 += 0; ++ * } ++ * bpf_iter_num_destroy(&fp[-8]) ++ * return 0 ++ */ ++ asm volatile ( ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "loop_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto loop_end_%=;" ++ "call %[bpf_get_prandom_u32];" ++ "if r0 != 42 goto loop_%=;" ++ "call %[bpf_get_prandom_u32];" ++ "if r0 != 42 goto loop_%=;" ++ "call %[bpf_get_prandom_u32];" ++ "if r0 != 42 goto loop_%=;" ++ "r0 += 0;" ++ "goto loop_%=;" ++ "loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ : ++ : __imm(bpf_get_prandom_u32), ++ __imm(bpf_iter_num_new), ++ __imm(bpf_iter_num_next), ++ __imm(bpf_iter_num_destroy) ++ : __clobber_all ++ ); ++} ++ ++SEC("?raw_tp") ++__success ++__naked int widen_spill(void) ++{ ++ /* This is equivalent to C program below. ++ * The counter is stored in fp[-16], if this counter is not widened ++ * verifier states representing loop iterations would never converge. ++ * ++ * fp[-16] = 0 ++ * bpf_iter_num_new(&fp[-8], 0, 10) ++ * while (bpf_iter_num_next(&fp[-8])) { ++ * r0 = fp[-16]; ++ * r0 += 1; ++ * fp[-16] = r0; ++ * } ++ * bpf_iter_num_destroy(&fp[-8]) ++ * return 0 ++ */ ++ asm volatile ( ++ "r0 = 0;" ++ "*(u64 *)(r10 - 16) = r0;" ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "loop_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto loop_end_%=;" ++ "r0 = *(u64 *)(r10 - 16);" ++ "r0 += 1;" ++ "*(u64 *)(r10 - 16) = r0;" ++ "goto loop_%=;" ++ "loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ : ++ : __imm(bpf_iter_num_new), ++ __imm(bpf_iter_num_next), ++ __imm(bpf_iter_num_destroy) ++ : __clobber_all ++ ); ++} ++ ++SEC("raw_tp") ++__success ++__naked int checkpoint_states_deletion(void) ++{ ++ /* This is equivalent to C program below. ++ * ++ * int *a, *b, *c, *d, *e, *f; ++ * int i, sum = 0; ++ * bpf_for(i, 0, 10) { ++ * a = bpf_map_lookup_elem(&amap, &i); ++ * b = bpf_map_lookup_elem(&amap, &i); ++ * c = bpf_map_lookup_elem(&amap, &i); ++ * d = bpf_map_lookup_elem(&amap, &i); ++ * e = bpf_map_lookup_elem(&amap, &i); ++ * f = bpf_map_lookup_elem(&amap, &i); ++ * if (a) sum += 1; ++ * if (b) sum += 1; ++ * if (c) sum += 1; ++ * if (d) sum += 1; ++ * if (e) sum += 1; ++ * if (f) sum += 1; ++ * } ++ * return 0; ++ * ++ * The body of the loop spawns multiple simulation paths ++ * with different combination of NULL/non-NULL information for a/b/c/d/e/f. ++ * Each combination is unique from states_equal() point of view. ++ * Explored states checkpoint is created after each iterator next call. ++ * Iterator convergence logic expects that eventually current state ++ * would get equal to one of the explored states and thus loop ++ * exploration would be finished (at-least for a specific path). ++ * Verifier evicts explored states with high miss to hit ratio ++ * to to avoid comparing current state with too many explored ++ * states per instruction. ++ * This test is designed to "stress test" eviction policy defined using formula: ++ * ++ * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted ++ * ++ * Currently N is set to 64, which allows for 6 variables in this test. ++ */ ++ asm volatile ( ++ "r6 = 0;" /* a */ ++ "r7 = 0;" /* b */ ++ "r8 = 0;" /* c */ ++ "*(u64 *)(r10 - 24) = r6;" /* d */ ++ "*(u64 *)(r10 - 32) = r6;" /* e */ ++ "*(u64 *)(r10 - 40) = r6;" /* f */ ++ "r9 = 0;" /* sum */ ++ "r1 = r10;" ++ "r1 += -8;" ++ "r2 = 0;" ++ "r3 = 10;" ++ "call %[bpf_iter_num_new];" ++ "loop_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_next];" ++ "if r0 == 0 goto loop_end_%=;" ++ ++ "*(u64 *)(r10 - 16) = r0;" ++ ++ "r1 = %[amap] ll;" ++ "r2 = r10;" ++ "r2 += -16;" ++ "call %[bpf_map_lookup_elem];" ++ "r6 = r0;" ++ ++ "r1 = %[amap] ll;" ++ "r2 = r10;" ++ "r2 += -16;" ++ "call %[bpf_map_lookup_elem];" ++ "r7 = r0;" ++ ++ "r1 = %[amap] ll;" ++ "r2 = r10;" ++ "r2 += -16;" ++ "call %[bpf_map_lookup_elem];" ++ "r8 = r0;" ++ ++ "r1 = %[amap] ll;" ++ "r2 = r10;" ++ "r2 += -16;" ++ "call %[bpf_map_lookup_elem];" ++ "*(u64 *)(r10 - 24) = r0;" ++ ++ "r1 = %[amap] ll;" ++ "r2 = r10;" ++ "r2 += -16;" ++ "call %[bpf_map_lookup_elem];" ++ "*(u64 *)(r10 - 32) = r0;" ++ ++ "r1 = %[amap] ll;" ++ "r2 = r10;" ++ "r2 += -16;" ++ "call %[bpf_map_lookup_elem];" ++ "*(u64 *)(r10 - 40) = r0;" ++ ++ "if r6 == 0 goto +1;" ++ "r9 += 1;" ++ "if r7 == 0 goto +1;" ++ "r9 += 1;" ++ "if r8 == 0 goto +1;" ++ "r9 += 1;" ++ "r0 = *(u64 *)(r10 - 24);" ++ "if r0 == 0 goto +1;" ++ "r9 += 1;" ++ "r0 = *(u64 *)(r10 - 32);" ++ "if r0 == 0 goto +1;" ++ "r9 += 1;" ++ "r0 = *(u64 *)(r10 - 40);" ++ "if r0 == 0 goto +1;" ++ "r9 += 1;" ++ ++ "goto loop_%=;" ++ "loop_end_%=:" ++ "r1 = r10;" ++ "r1 += -8;" ++ "call %[bpf_iter_num_destroy];" ++ "r0 = 0;" ++ "exit;" ++ : ++ : __imm(bpf_map_lookup_elem), ++ __imm(bpf_iter_num_new), ++ __imm(bpf_iter_num_next), ++ __imm(bpf_iter_num_destroy), ++ __imm_addr(amap) ++ : __clobber_all ++ ); ++} ++ + char _license[] SEC("license") = "GPL"; diff --git a/queue-6.6/selftests-bpf-track-string-payload-offset-as-scalar-in-strobemeta.patch b/queue-6.6/selftests-bpf-track-string-payload-offset-as-scalar-in-strobemeta.patch new file mode 100644 index 00000000000..8a467068fb7 --- /dev/null +++ b/queue-6.6/selftests-bpf-track-string-payload-offset-as-scalar-in-strobemeta.patch @@ -0,0 +1,252 @@ +From 87eb0152bcc102ecbda866978f4e54db5a3be1ef Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:52 +0200 +Subject: selftests/bpf: track string payload offset as scalar in strobemeta + +From: Eduard Zingerman + +commit 87eb0152bcc102ecbda866978f4e54db5a3be1ef upstream. + +This change prepares strobemeta for update in callbacks verification +logic. To allow bpf_loop() verification converge when multiple +callback iterations are considered: +- track offset inside strobemeta_payload->payload directly as scalar + value; +- at each iteration make sure that remaining + strobemeta_payload->payload capacity is sufficient for execution of + read_{map,str}_var functions; +- make sure that offset is tracked as unbound scalar between + iterations, otherwise verifier won't be able infer that bpf_loop + callback reaches identical states. + +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-3-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/strobemeta.h | 78 +++++++++++++++---------- + 1 file changed, 48 insertions(+), 30 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/strobemeta.h ++++ b/tools/testing/selftests/bpf/progs/strobemeta.h +@@ -24,9 +24,11 @@ struct task_struct {}; + #define STACK_TABLE_EPOCH_SHIFT 20 + #define STROBE_MAX_STR_LEN 1 + #define STROBE_MAX_CFGS 32 ++#define READ_MAP_VAR_PAYLOAD_CAP \ ++ ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) + #define STROBE_MAX_PAYLOAD \ + (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \ +- STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) ++ STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP) + + struct strobe_value_header { + /* +@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, +- void *payload) ++ size_t off) + { + void *location; + uint64_t len; +@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str + return 0; + + bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); +- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); ++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr); + /* + * if bpf_probe_read_user_str returns error (<0), due to casting to + * unsinged int, it will become big number, so next check is +@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str + return 0; + + data->str_lens[idx] = len; +- return len; ++ return off + len; + } + +-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, +- size_t idx, void *tls_base, +- struct strobe_value_generic *value, +- struct strobemeta_payload *data, +- void *payload) ++static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, ++ size_t idx, void *tls_base, ++ struct strobe_value_generic *value, ++ struct strobemeta_payload *data, ++ size_t off) + { + struct strobe_map_descr* descr = &data->map_descrs[idx]; + struct strobe_map_raw map; +@@ -397,11 +399,11 @@ static __always_inline void *read_map_va + + location = calc_location(&cfg->map_locs[idx], tls_base); + if (!location) +- return payload; ++ return off; + + bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); + if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) +- return payload; ++ return off; + + descr->id = map.id; + descr->cnt = map.cnt; +@@ -410,10 +412,10 @@ static __always_inline void *read_map_va + data->req_meta_valid = 1; + } + +- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); ++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag); + if (len <= STROBE_MAX_STR_LEN) { + descr->tag_len = len; +- payload += len; ++ off += len; + } + + #ifdef NO_UNROLL +@@ -426,22 +428,22 @@ static __always_inline void *read_map_va + break; + + descr->key_lens[i] = 0; +- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, ++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, + map.entries[i].key); + if (len <= STROBE_MAX_STR_LEN) { + descr->key_lens[i] = len; +- payload += len; ++ off += len; + } + descr->val_lens[i] = 0; +- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, ++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, + map.entries[i].val); + if (len <= STROBE_MAX_STR_LEN) { + descr->val_lens[i] = len; +- payload += len; ++ off += len; + } + } + +- return payload; ++ return off; + } + + #ifdef USE_BPF_LOOP +@@ -455,14 +457,20 @@ struct read_var_ctx { + struct strobemeta_payload *data; + void *tls_base; + struct strobemeta_cfg *cfg; +- void *payload; ++ size_t payload_off; + /* value gets mutated */ + struct strobe_value_generic *value; + enum read_type type; + }; + +-static int read_var_callback(__u32 index, struct read_var_ctx *ctx) ++static int read_var_callback(__u64 index, struct read_var_ctx *ctx) + { ++ /* lose precision info for ctx->payload_off, verifier won't track ++ * double xor, barrier_var() is needed to force clang keep both xors. ++ */ ++ ctx->payload_off ^= index; ++ barrier_var(ctx->payload_off); ++ ctx->payload_off ^= index; + switch (ctx->type) { + case READ_INT_VAR: + if (index >= STROBE_MAX_INTS) +@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index + case READ_MAP_VAR: + if (index >= STROBE_MAX_MAPS) + return 1; +- ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, +- ctx->value, ctx->data, ctx->payload); ++ if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP) ++ return 1; ++ ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base, ++ ctx->value, ctx->data, ctx->payload_off); + break; + case READ_STR_VAR: + if (index >= STROBE_MAX_STRS) + return 1; +- ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, +- ctx->value, ctx->data, ctx->payload); ++ if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN) ++ return 1; ++ ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base, ++ ctx->value, ctx->data, ctx->payload_off); + break; + } + return 0; +@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct tas + pid_t pid = bpf_get_current_pid_tgid() >> 32; + struct strobe_value_generic value = {0}; + struct strobemeta_cfg *cfg; +- void *tls_base, *payload; ++ size_t payload_off; ++ void *tls_base; + + cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); + if (!cfg) +@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct tas + + data->int_vals_set_mask = 0; + data->req_meta_valid = 0; +- payload = data->payload; ++ payload_off = 0; + /* + * we don't have struct task_struct definition, it should be: + * tls_base = (void *)task->thread.fsbase; +@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct tas + .tls_base = tls_base, + .value = &value, + .data = data, +- .payload = payload, ++ .payload_off = 0, + }; + int err; + +@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct tas + err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_MAPS) + return NULL; ++ ++ payload_off = ctx.payload_off; ++ /* this should not really happen, here only to satisfy verifer */ ++ if (payload_off > sizeof(data->payload)) ++ payload_off = sizeof(data->payload); + #else + #ifdef NO_UNROLL + #pragma clang loop unroll(disable) +@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct tas + #pragma unroll + #endif /* NO_UNROLL */ + for (int i = 0; i < STROBE_MAX_STRS; ++i) { +- payload += read_str_var(cfg, i, tls_base, &value, data, payload); ++ payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); + } + #ifdef NO_UNROLL + #pragma clang loop unroll(disable) +@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct tas + #pragma unroll + #endif /* NO_UNROLL */ + for (int i = 0; i < STROBE_MAX_MAPS; ++i) { +- payload = read_map_var(cfg, i, tls_base, &value, data, payload); ++ payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); + } + #endif /* USE_BPF_LOOP */ + +@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct tas + * return pointer right after end of payload, so it's possible to + * calculate exact amount of useful data that needs to be sent + */ +- return payload; ++ return &data->payload[payload_off]; + } + + SEC("raw_tracepoint/kfree_skb") diff --git a/queue-6.6/selftests-bpf-track-tcp-payload-offset-as-scalar-in-xdp_synproxy.patch b/queue-6.6/selftests-bpf-track-tcp-payload-offset-as-scalar-in-xdp_synproxy.patch new file mode 100644 index 00000000000..fe51c37599a --- /dev/null +++ b/queue-6.6/selftests-bpf-track-tcp-payload-offset-as-scalar-in-xdp_synproxy.patch @@ -0,0 +1,179 @@ +From 977bc146d4eb7070118d8a974919b33bb52732b4 Mon Sep 17 00:00:00 2001 +From: Eduard Zingerman +Date: Tue, 21 Nov 2023 04:06:51 +0200 +Subject: selftests/bpf: track tcp payload offset as scalar in xdp_synproxy + +From: Eduard Zingerman + +commit 977bc146d4eb7070118d8a974919b33bb52732b4 upstream. + +This change prepares syncookie_{tc,xdp} for update in callbakcs +verification logic. To allow bpf_loop() verification converge when +multiple callback itreations are considered: +- track offset inside TCP payload explicitly, not as a part of the + pointer; +- make sure that offset does not exceed MAX_PACKET_OFF enforced by + verifier; +- make sure that offset is tracked as unbound scalar between + iterations, otherwise verifier won't be able infer that bpf_loop + callback reaches identical states. + +Acked-by: Andrii Nakryiko +Signed-off-by: Eduard Zingerman +Link: https://lore.kernel.org/r/20231121020701.26440-2-eddyz87@gmail.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c | 84 +++++++++++------- + 1 file changed, 52 insertions(+), 32 deletions(-) + +--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c ++++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +@@ -53,6 +53,8 @@ + #define DEFAULT_TTL 64 + #define MAX_ALLOWED_PORTS 8 + ++#define MAX_PACKET_OFF 0xffff ++ + #define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +@@ -183,63 +185,76 @@ static __always_inline __u32 tcp_time_st + } + + struct tcpopt_context { +- __u8 *ptr; +- __u8 *end; ++ void *data; + void *data_end; + __be32 *tsecr; + __u8 wscale; + bool option_timestamp; + bool option_sack; ++ __u32 off; + }; + +-static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) ++static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) + { +- __u8 opcode, opsize; ++ __u64 off = ctx->off; ++ __u8 *data; + +- if (ctx->ptr >= ctx->end) +- return 1; +- if (ctx->ptr >= ctx->data_end) +- return 1; ++ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ ++ if (off > MAX_PACKET_OFF - sz) ++ return NULL; ++ ++ data = ctx->data + off; ++ barrier_var(data); ++ if (data + sz >= ctx->data_end) ++ return NULL; + +- opcode = ctx->ptr[0]; ++ ctx->off += sz; ++ return data; ++} + +- if (opcode == TCPOPT_EOL) +- return 1; +- if (opcode == TCPOPT_NOP) { +- ++ctx->ptr; +- return 0; +- } ++static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) ++{ ++ __u8 *opcode, *opsize, *wscale, *tsecr; ++ __u32 off = ctx->off; + +- if (ctx->ptr + 1 >= ctx->end) ++ opcode = next(ctx, 1); ++ if (!opcode) + return 1; +- if (ctx->ptr + 1 >= ctx->data_end) +- return 1; +- opsize = ctx->ptr[1]; +- if (opsize < 2) ++ ++ if (*opcode == TCPOPT_EOL) + return 1; ++ if (*opcode == TCPOPT_NOP) ++ return 0; + +- if (ctx->ptr + opsize > ctx->end) ++ opsize = next(ctx, 1); ++ if (!opsize || *opsize < 2) + return 1; + +- switch (opcode) { ++ switch (*opcode) { + case TCPOPT_WINDOW: +- if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) +- ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; ++ wscale = next(ctx, 1); ++ if (!wscale) ++ return 1; ++ if (*opsize == TCPOLEN_WINDOW) ++ ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; + break; + case TCPOPT_TIMESTAMP: +- if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { ++ tsecr = next(ctx, 4); ++ if (!tsecr) ++ return 1; ++ if (*opsize == TCPOLEN_TIMESTAMP) { + ctx->option_timestamp = true; + /* Client's tsval becomes our tsecr. */ +- *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); ++ *ctx->tsecr = get_unaligned((__be32 *)tsecr); + } + break; + case TCPOPT_SACK_PERM: +- if (opsize == TCPOLEN_SACK_PERM) ++ if (*opsize == TCPOLEN_SACK_PERM) + ctx->option_sack = true; + break; + } + +- ctx->ptr += opsize; ++ ctx->off = off + *opsize; + + return 0; + } +@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(_ + + static __always_inline bool tscookie_init(struct tcphdr *tcp_header, + __u16 tcp_len, __be32 *tsval, +- __be32 *tsecr, void *data_end) ++ __be32 *tsecr, void *data, void *data_end) + { + struct tcpopt_context loop_ctx = { +- .ptr = (__u8 *)(tcp_header + 1), +- .end = (__u8 *)tcp_header + tcp_len, ++ .data = data, + .data_end = data_end, + .tsecr = tsecr, + .wscale = TS_OPT_WSCALE_MASK, + .option_timestamp = false, + .option_sack = false, ++ /* Note: currently verifier would track .off as unbound scalar. ++ * In case if verifier would at some point get smarter and ++ * compute bounded value for this var, beware that it might ++ * hinder bpf_loop() convergence validation. ++ */ ++ .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, + }; + u32 cookie; + +@@ -635,7 +655,7 @@ static __always_inline int syncookie_han + cookie = (__u32)value; + + if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, +- &tsopt_buf[0], &tsopt_buf[1], data_end)) ++ &tsopt_buf[0], &tsopt_buf[1], data, data_end)) + tsopt = tsopt_buf; + + /* Check that there is enough space for a SYNACK. It also covers diff --git a/queue-6.6/series b/queue-6.6/series index 463a8c54a54..baa3a013fe9 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -143,3 +143,20 @@ ksmbd-don-t-increment-epoch-if-current-state-and-request-state-are-same.patch ksmbd-send-lease-break-notification-on-file_rename_information.patch ksmbd-add-missing-set_freezable-for-freezable-kthread.patch dt-bindings-net-snps-dwmac-tx-coe-unsupported.patch +bpf-move-explored_state-closer-to-the-beginning-of-verifier.c.patch +bpf-extract-same_callsites-as-utility-function.patch +bpf-exact-states-comparison-for-iterator-convergence-checks.patch +selftests-bpf-tests-with-delayed-read-precision-makrs-in-loop-body.patch +bpf-correct-loop-detection-for-iterators-convergence.patch +selftests-bpf-test-if-state-loops-are-detected-in-a-tricky-case.patch +bpf-print-full-verifier-states-on-infinite-loop-detection.patch +selftests-bpf-track-tcp-payload-offset-as-scalar-in-xdp_synproxy.patch +selftests-bpf-track-string-payload-offset-as-scalar-in-strobemeta.patch +bpf-extract-__check_reg_arg-utility-function.patch +bpf-extract-setup_func_entry-utility-function.patch +bpf-verify-callbacks-as-if-they-are-called-unknown-number-of-times.patch +selftests-bpf-tests-for-iterating-callbacks.patch +bpf-widening-for-callback-iterators.patch +selftests-bpf-test-widening-for-iterating-callbacks.patch +bpf-keep-track-of-max-number-of-bpf_loop-callback-iterations.patch +selftests-bpf-check-if-max-number-of-bpf_loop-iterations-is-tracked.patch -- 2.47.3