--- /dev/null
+From 2a0992829ea3864939d917a5c7b48be6629c6217 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 24 Oct 2023 03:09:15 +0300
+Subject: bpf: correct loop detection for iterators convergence
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 2a0992829ea3864939d917a5c7b48be6629c6217 upstream.
+
+It turns out that .branches > 0 in is_state_visited() is not a
+sufficient condition to identify if two verifier states form a loop
+when iterators convergence is computed. This commit adds logic to
+distinguish situations like below:
+
+ (I) initial (II) initial
+ | |
+ V V
+ .---------> hdr ..
+ | | |
+ | V V
+ | .------... .------..
+ | | | | |
+ | V V V V
+ | ... ... .-> hdr ..
+ | | | | | |
+ | V V | V V
+ | succ <- cur | succ <- cur
+ | | | |
+ | V | V
+ | ... | ...
+ | | | |
+ '----' '----'
+
+For both (I) and (II) successor 'succ' of the current state 'cur' was
+previously explored and has branches count at 0. However, loop entry
+'hdr' corresponding to 'succ' might be a part of current DFS path.
+If that is the case 'succ' and 'cur' are members of the same loop
+and have to be compared exactly.
+
+Co-developed-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
+Co-developed-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
+Reviewed-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231024000917.12153-6-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf_verifier.h | 15 +++
+ kernel/bpf/verifier.c | 207 ++++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 218 insertions(+), 4 deletions(-)
+
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -372,10 +372,25 @@ struct bpf_verifier_state {
+ struct bpf_active_lock active_lock;
+ bool speculative;
+ bool active_rcu_lock;
++ /* If this state was ever pointed-to by other state's loop_entry field
++ * this flag would be set to true. Used to avoid freeing such states
++ * while they are still in use.
++ */
++ bool used_as_loop_entry;
+
+ /* first and last insn idx of this verifier state */
+ u32 first_insn_idx;
+ u32 last_insn_idx;
++ /* If this state is a part of states loop this field points to some
++ * parent of this state such that:
++ * - it is also a member of the same states loop;
++ * - DFS states traversal starting from initial state visits loop_entry
++ * state before this state.
++ * Used to compute topmost loop entry for state loops.
++ * State loops might appear because of open coded iterators logic.
++ * See get_loop_entry() for more information.
++ */
++ struct bpf_verifier_state *loop_entry;
+ /* jmp history recorded from first to last.
+ * backtracking is using it to go from last to first.
+ * For most states jmp_history_cnt is [0-3].
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1772,6 +1772,7 @@ static int copy_verifier_state(struct bp
+ dst_state->first_insn_idx = src->first_insn_idx;
+ dst_state->last_insn_idx = src->last_insn_idx;
+ dst_state->dfs_depth = src->dfs_depth;
++ dst_state->used_as_loop_entry = src->used_as_loop_entry;
+ for (i = 0; i <= src->curframe; i++) {
+ dst = dst_state->frame[i];
+ if (!dst) {
+@@ -1814,11 +1815,176 @@ static bool same_callsites(struct bpf_ve
+ return true;
+ }
+
++/* Open coded iterators allow back-edges in the state graph in order to
++ * check unbounded loops that iterators.
++ *
++ * In is_state_visited() it is necessary to know if explored states are
++ * part of some loops in order to decide whether non-exact states
++ * comparison could be used:
++ * - non-exact states comparison establishes sub-state relation and uses
++ * read and precision marks to do so, these marks are propagated from
++ * children states and thus are not guaranteed to be final in a loop;
++ * - exact states comparison just checks if current and explored states
++ * are identical (and thus form a back-edge).
++ *
++ * Paper "A New Algorithm for Identifying Loops in Decompilation"
++ * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
++ * algorithm for loop structure detection and gives an overview of
++ * relevant terminology. It also has helpful illustrations.
++ *
++ * [1] https://api.semanticscholar.org/CorpusID:15784067
++ *
++ * We use a similar algorithm but because loop nested structure is
++ * irrelevant for verifier ours is significantly simpler and resembles
++ * strongly connected components algorithm from Sedgewick's textbook.
++ *
++ * Define topmost loop entry as a first node of the loop traversed in a
++ * depth first search starting from initial state. The goal of the loop
++ * tracking algorithm is to associate topmost loop entries with states
++ * derived from these entries.
++ *
++ * For each step in the DFS states traversal algorithm needs to identify
++ * the following situations:
++ *
++ * initial initial initial
++ * | | |
++ * V V V
++ * ... ... .---------> hdr
++ * | | | |
++ * V V | V
++ * cur .-> succ | .------...
++ * | | | | | |
++ * V | V | V V
++ * succ '-- cur | ... ...
++ * | | |
++ * | V V
++ * | succ <- cur
++ * | |
++ * | V
++ * | ...
++ * | |
++ * '----'
++ *
++ * (A) successor state of cur (B) successor state of cur or it's entry
++ * not yet traversed are in current DFS path, thus cur and succ
++ * are members of the same outermost loop
++ *
++ * initial initial
++ * | |
++ * V V
++ * ... ...
++ * | |
++ * V V
++ * .------... .------...
++ * | | | |
++ * V V V V
++ * .-> hdr ... ... ...
++ * | | | | |
++ * | V V V V
++ * | succ <- cur succ <- cur
++ * | | |
++ * | V V
++ * | ... ...
++ * | | |
++ * '----' exit
++ *
++ * (C) successor state of cur is a part of some loop but this loop
++ * does not include cur or successor state is not in a loop at all.
++ *
++ * Algorithm could be described as the following python code:
++ *
++ * traversed = set() # Set of traversed nodes
++ * entries = {} # Mapping from node to loop entry
++ * depths = {} # Depth level assigned to graph node
++ * path = set() # Current DFS path
++ *
++ * # Find outermost loop entry known for n
++ * def get_loop_entry(n):
++ * h = entries.get(n, None)
++ * while h in entries and entries[h] != h:
++ * h = entries[h]
++ * return h
++ *
++ * # Update n's loop entry if h's outermost entry comes
++ * # before n's outermost entry in current DFS path.
++ * def update_loop_entry(n, h):
++ * n1 = get_loop_entry(n) or n
++ * h1 = get_loop_entry(h) or h
++ * if h1 in path and depths[h1] <= depths[n1]:
++ * entries[n] = h1
++ *
++ * def dfs(n, depth):
++ * traversed.add(n)
++ * path.add(n)
++ * depths[n] = depth
++ * for succ in G.successors(n):
++ * if succ not in traversed:
++ * # Case A: explore succ and update cur's loop entry
++ * # only if succ's entry is in current DFS path.
++ * dfs(succ, depth + 1)
++ * h = get_loop_entry(succ)
++ * update_loop_entry(n, h)
++ * else:
++ * # Case B or C depending on `h1 in path` check in update_loop_entry().
++ * update_loop_entry(n, succ)
++ * path.remove(n)
++ *
++ * To adapt this algorithm for use with verifier:
++ * - use st->branch == 0 as a signal that DFS of succ had been finished
++ * and cur's loop entry has to be updated (case A), handle this in
++ * update_branch_counts();
++ * - use st->branch > 0 as a signal that st is in the current DFS path;
++ * - handle cases B and C in is_state_visited();
++ * - update topmost loop entry for intermediate states in get_loop_entry().
++ */
++static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st)
++{
++ struct bpf_verifier_state *topmost = st->loop_entry, *old;
++
++ while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
++ topmost = topmost->loop_entry;
++ /* Update loop entries for intermediate states to avoid this
++ * traversal in future get_loop_entry() calls.
++ */
++ while (st && st->loop_entry != topmost) {
++ old = st->loop_entry;
++ st->loop_entry = topmost;
++ st = old;
++ }
++ return topmost;
++}
++
++static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
++{
++ struct bpf_verifier_state *cur1, *hdr1;
++
++ cur1 = get_loop_entry(cur) ?: cur;
++ hdr1 = get_loop_entry(hdr) ?: hdr;
++ /* The head1->branches check decides between cases B and C in
++ * comment for get_loop_entry(). If hdr1->branches == 0 then
++ * head's topmost loop entry is not in current DFS path,
++ * hence 'cur' and 'hdr' are not in the same loop and there is
++ * no need to update cur->loop_entry.
++ */
++ if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
++ cur->loop_entry = hdr;
++ hdr->used_as_loop_entry = true;
++ }
++}
++
+ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+ {
+ while (st) {
+ u32 br = --st->branches;
+
++ /* br == 0 signals that DFS exploration for 'st' is finished,
++ * thus it is necessary to update parent's loop entry if it
++ * turned out that st is a part of some loop.
++ * This is a part of 'case A' in get_loop_entry() comment.
++ */
++ if (br == 0 && st->parent && st->loop_entry)
++ update_loop_entry(st->parent, st->loop_entry);
++
+ /* WARN_ON(br > 1) technically makes sense here,
+ * but see comment in push_stack(), hence:
+ */
+@@ -16262,10 +16428,11 @@ static int is_state_visited(struct bpf_v
+ {
+ struct bpf_verifier_state_list *new_sl;
+ struct bpf_verifier_state_list *sl, **pprev;
+- struct bpf_verifier_state *cur = env->cur_state, *new;
++ struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
+ int i, j, n, err, states_cnt = 0;
+ bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
+ bool add_new_state = force_new_state;
++ bool force_exact;
+
+ /* bpf progs typically have pruning point every 4 instructions
+ * http://vger.kernel.org/bpfconf2019.html#session-1
+@@ -16360,8 +16527,10 @@ static int is_state_visited(struct bpf_v
+ */
+ spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
+ iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
+- if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE)
++ if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
++ update_loop_entry(cur, &sl->state);
+ goto hit;
++ }
+ }
+ goto skip_inf_loop_check;
+ }
+@@ -16392,7 +16561,36 @@ skip_inf_loop_check:
+ add_new_state = false;
+ goto miss;
+ }
+- if (states_equal(env, &sl->state, cur, false)) {
++ /* If sl->state is a part of a loop and this loop's entry is a part of
++ * current verification path then states have to be compared exactly.
++ * 'force_exact' is needed to catch the following case:
++ *
++ * initial Here state 'succ' was processed first,
++ * | it was eventually tracked to produce a
++ * V state identical to 'hdr'.
++ * .---------> hdr All branches from 'succ' had been explored
++ * | | and thus 'succ' has its .branches == 0.
++ * | V
++ * | .------... Suppose states 'cur' and 'succ' correspond
++ * | | | to the same instruction + callsites.
++ * | V V In such case it is necessary to check
++ * | ... ... if 'succ' and 'cur' are states_equal().
++ * | | | If 'succ' and 'cur' are a part of the
++ * | V V same loop exact flag has to be set.
++ * | succ <- cur To check if that is the case, verify
++ * | | if loop entry of 'succ' is in current
++ * | V DFS path.
++ * | ...
++ * | |
++ * '----'
++ *
++ * Additional details are in the comment before get_loop_entry().
++ */
++ loop_entry = get_loop_entry(&sl->state);
++ force_exact = loop_entry && loop_entry->branches > 0;
++ if (states_equal(env, &sl->state, cur, force_exact)) {
++ if (force_exact)
++ update_loop_entry(cur, loop_entry);
+ hit:
+ sl->hit_cnt++;
+ /* reached equivalent register/stack state,
+@@ -16441,7 +16639,8 @@ miss:
+ * speed up verification
+ */
+ *pprev = sl->next;
+- if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
++ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE &&
++ !sl->state.used_as_loop_entry) {
+ u32 br = sl->state.branches;
+
+ WARN_ONCE(br,
--- /dev/null
+From 2793a8b015f7f1caadb9bce9c63dc659f7522676 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 24 Oct 2023 03:09:13 +0300
+Subject: bpf: exact states comparison for iterator convergence checks
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 2793a8b015f7f1caadb9bce9c63dc659f7522676 upstream.
+
+Convergence for open coded iterators is computed in is_state_visited()
+by examining states with branches count > 1 and using states_equal().
+states_equal() computes sub-state relation using read and precision marks.
+Read and precision marks are propagated from children states,
+thus are not guaranteed to be complete inside a loop when branches
+count > 1. This could be demonstrated using the following unsafe program:
+
+ 1. r7 = -16
+ 2. r6 = bpf_get_prandom_u32()
+ 3. while (bpf_iter_num_next(&fp[-8])) {
+ 4. if (r6 != 42) {
+ 5. r7 = -32
+ 6. r6 = bpf_get_prandom_u32()
+ 7. continue
+ 8. }
+ 9. r0 = r10
+ 10. r0 += r7
+ 11. r8 = *(u64 *)(r0 + 0)
+ 12. r6 = bpf_get_prandom_u32()
+ 13. }
+
+Here verifier would first visit path 1-3, create a checkpoint at 3
+with r7=-16, continue to 4-7,3 with r7=-32.
+
+Because instructions at 9-12 had not been visitied yet existing
+checkpoint at 3 does not have read or precision mark for r7.
+Thus states_equal() would return true and verifier would discard
+current state, thus unsafe memory access at 11 would not be caught.
+
+This commit fixes this loophole by introducing exact state comparisons
+for iterator convergence logic:
+- registers are compared using regs_exact() regardless of read or
+ precision marks;
+- stack slots have to have identical type.
+
+Unfortunately, this is too strict even for simple programs like below:
+
+ i = 0;
+ while(iter_next(&it))
+ i++;
+
+At each iteration step i++ would produce a new distinct state and
+eventually instruction processing limit would be reached.
+
+To avoid such behavior speculatively forget (widen) range for
+imprecise scalar registers, if those registers were not precise at the
+end of the previous iteration and do not match exactly.
+
+This a conservative heuristic that allows to verify wide range of
+programs, however it precludes verification of programs that conjure
+an imprecise value on the first loop iteration and use it as precise
+on the second.
+
+Test case iter_task_vma_for_each() presents one of such cases:
+
+ unsigned int seen = 0;
+ ...
+ bpf_for_each(task_vma, vma, task, 0) {
+ if (seen >= 1000)
+ break;
+ ...
+ seen++;
+ }
+
+Here clang generates the following code:
+
+<LBB0_4>:
+ 24: r8 = r6 ; stash current value of
+ ... body ... 'seen'
+ 29: r1 = r10
+ 30: r1 += -0x8
+ 31: call bpf_iter_task_vma_next
+ 32: r6 += 0x1 ; seen++;
+ 33: if r0 == 0x0 goto +0x2 <LBB0_6> ; exit on next() == NULL
+ 34: r7 += 0x10
+ 35: if r8 < 0x3e7 goto -0xc <LBB0_4> ; loop on seen < 1000
+
+<LBB0_6>:
+ ... exit ...
+
+Note that counter in r6 is copied to r8 and then incremented,
+conditional jump is done using r8. Because of this precision mark for
+r6 lags one state behind of precision mark on r8 and widening logic
+kicks in.
+
+Adding barrier_var(seen) after conditional is sufficient to force
+clang use the same register for both counting and conditional jump.
+
+This issue was discussed in the thread [1] which was started by
+Andrew Werner <awerner32@gmail.com> demonstrating a similar bug
+in callback functions handling. The callbacks would be addressed
+in a followup patch.
+
+[1] https://lore.kernel.org/bpf/97a90da09404c65c8e810cf83c94ac703705dc0e.camel@gmail.com/
+
+Co-developed-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
+Co-developed-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231024000917.12153-4-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf_verifier.h | 1
+ kernel/bpf/verifier.c | 218 ++++++++++++++++++++++++++++++++++++-------
+ 2 files changed, 188 insertions(+), 31 deletions(-)
+
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -383,6 +383,7 @@ struct bpf_verifier_state {
+ */
+ struct bpf_idx_pair *jmp_history;
+ u32 jmp_history_cnt;
++ u32 dfs_depth;
+ };
+
+ #define bpf_get_spilled_reg(slot, frame) \
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1771,6 +1771,7 @@ static int copy_verifier_state(struct bp
+ dst_state->parent = src->parent;
+ dst_state->first_insn_idx = src->first_insn_idx;
+ dst_state->last_insn_idx = src->last_insn_idx;
++ dst_state->dfs_depth = src->dfs_depth;
+ for (i = 0; i <= src->curframe; i++) {
+ dst = dst_state->frame[i];
+ if (!dst) {
+@@ -7554,6 +7555,81 @@ static int process_iter_arg(struct bpf_v
+ return 0;
+ }
+
++/* Look for a previous loop entry at insn_idx: nearest parent state
++ * stopped at insn_idx with callsites matching those in cur->frame.
++ */
++static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
++ struct bpf_verifier_state *cur,
++ int insn_idx)
++{
++ struct bpf_verifier_state_list *sl;
++ struct bpf_verifier_state *st;
++
++ /* Explored states are pushed in stack order, most recent states come first */
++ sl = *explored_state(env, insn_idx);
++ for (; sl; sl = sl->next) {
++ /* If st->branches != 0 state is a part of current DFS verification path,
++ * hence cur & st for a loop.
++ */
++ st = &sl->state;
++ if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
++ st->dfs_depth < cur->dfs_depth)
++ return st;
++ }
++
++ return NULL;
++}
++
++static void reset_idmap_scratch(struct bpf_verifier_env *env);
++static bool regs_exact(const struct bpf_reg_state *rold,
++ const struct bpf_reg_state *rcur,
++ struct bpf_idmap *idmap);
++
++static void maybe_widen_reg(struct bpf_verifier_env *env,
++ struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
++ struct bpf_idmap *idmap)
++{
++ if (rold->type != SCALAR_VALUE)
++ return;
++ if (rold->type != rcur->type)
++ return;
++ if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
++ return;
++ __mark_reg_unknown(env, rcur);
++}
++
++static int widen_imprecise_scalars(struct bpf_verifier_env *env,
++ struct bpf_verifier_state *old,
++ struct bpf_verifier_state *cur)
++{
++ struct bpf_func_state *fold, *fcur;
++ int i, fr;
++
++ reset_idmap_scratch(env);
++ for (fr = old->curframe; fr >= 0; fr--) {
++ fold = old->frame[fr];
++ fcur = cur->frame[fr];
++
++ for (i = 0; i < MAX_BPF_REG; i++)
++ maybe_widen_reg(env,
++ &fold->regs[i],
++ &fcur->regs[i],
++ &env->idmap_scratch);
++
++ for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
++ if (!is_spilled_reg(&fold->stack[i]) ||
++ !is_spilled_reg(&fcur->stack[i]))
++ continue;
++
++ maybe_widen_reg(env,
++ &fold->stack[i].spilled_ptr,
++ &fcur->stack[i].spilled_ptr,
++ &env->idmap_scratch);
++ }
++ }
++ return 0;
++}
++
+ /* process_iter_next_call() is called when verifier gets to iterator's next
+ * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
+ * to it as just "iter_next()" in comments below.
+@@ -7595,25 +7671,47 @@ static int process_iter_arg(struct bpf_v
+ * is some statically known limit on number of iterations (e.g., if there is
+ * an explicit `if n > 100 then break;` statement somewhere in the loop).
+ *
+- * One very subtle but very important aspect is that we *always* simulate NULL
+- * condition first (as the current state) before we simulate non-NULL case.
+- * This has to do with intricacies of scalar precision tracking. By simulating
+- * "exit condition" of iter_next() returning NULL first, we make sure all the
+- * relevant precision marks *that will be set **after** we exit iterator loop*
+- * are propagated backwards to common parent state of NULL and non-NULL
+- * branches. Thanks to that, state equivalence checks done later in forked
+- * state, when reaching iter_next() for ACTIVE iterator, can assume that
+- * precision marks are finalized and won't change. Because simulating another
+- * ACTIVE iterator iteration won't change them (because given same input
+- * states we'll end up with exactly same output states which we are currently
+- * comparing; and verification after the loop already propagated back what
+- * needs to be **additionally** tracked as precise). It's subtle, grok
+- * precision tracking for more intuitive understanding.
++ * Iteration convergence logic in is_state_visited() relies on exact
++ * states comparison, which ignores read and precision marks.
++ * This is necessary because read and precision marks are not finalized
++ * while in the loop. Exact comparison might preclude convergence for
++ * simple programs like below:
++ *
++ * i = 0;
++ * while(iter_next(&it))
++ * i++;
++ *
++ * At each iteration step i++ would produce a new distinct state and
++ * eventually instruction processing limit would be reached.
++ *
++ * To avoid such behavior speculatively forget (widen) range for
++ * imprecise scalar registers, if those registers were not precise at the
++ * end of the previous iteration and do not match exactly.
++ *
++ * This is a conservative heuristic that allows to verify wide range of programs,
++ * however it precludes verification of programs that conjure an
++ * imprecise value on the first loop iteration and use it as precise on a second.
++ * For example, the following safe program would fail to verify:
++ *
++ * struct bpf_num_iter it;
++ * int arr[10];
++ * int i = 0, a = 0;
++ * bpf_iter_num_new(&it, 0, 10);
++ * while (bpf_iter_num_next(&it)) {
++ * if (a == 0) {
++ * a = 1;
++ * i = 7; // Because i changed verifier would forget
++ * // it's range on second loop entry.
++ * } else {
++ * arr[i] = 42; // This would fail to verify.
++ * }
++ * }
++ * bpf_iter_num_destroy(&it);
+ */
+ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
+ {
+- struct bpf_verifier_state *cur_st = env->cur_state, *queued_st;
++ struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
+ struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
+ struct bpf_reg_state *cur_iter, *queued_iter;
+ int iter_frameno = meta->iter.frameno;
+@@ -7631,6 +7729,19 @@ static int process_iter_next_call(struct
+ }
+
+ if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
++ /* Because iter_next() call is a checkpoint is_state_visitied()
++ * should guarantee parent state with same call sites and insn_idx.
++ */
++ if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
++ !same_callsites(cur_st->parent, cur_st)) {
++ verbose(env, "bug: bad parent state for iter next call");
++ return -EFAULT;
++ }
++ /* Note cur_st->parent in the call below, it is necessary to skip
++ * checkpoint created for cur_st by is_state_visited()
++ * right at this instruction.
++ */
++ prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
+ /* branch out active iter state */
+ queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
+ if (!queued_st)
+@@ -7639,6 +7750,8 @@ static int process_iter_next_call(struct
+ queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+ queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
+ queued_iter->iter.depth++;
++ if (prev_st)
++ widen_imprecise_scalars(env, prev_st, queued_st);
+
+ queued_fr = queued_st->frame[queued_st->curframe];
+ mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
+@@ -15560,8 +15673,11 @@ static bool regs_exact(const struct bpf_
+
+ /* Returns true if (rold safe implies rcur safe) */
+ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+- struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
++ struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact)
+ {
++ if (exact)
++ return regs_exact(rold, rcur, idmap);
++
+ if (!(rold->live & REG_LIVE_READ))
+ /* explored state didn't use this */
+ return true;
+@@ -15678,7 +15794,7 @@ static bool regsafe(struct bpf_verifier_
+ }
+
+ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+- struct bpf_func_state *cur, struct bpf_idmap *idmap)
++ struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
+ {
+ int i, spi;
+
+@@ -15691,7 +15807,12 @@ static bool stacksafe(struct bpf_verifie
+
+ spi = i / BPF_REG_SIZE;
+
+- if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
++ if (exact &&
++ old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
++ cur->stack[spi].slot_type[i % BPF_REG_SIZE])
++ return false;
++
++ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) {
+ i += BPF_REG_SIZE - 1;
+ /* explored state didn't use this */
+ continue;
+@@ -15741,7 +15862,7 @@ static bool stacksafe(struct bpf_verifie
+ * return false to continue verification of this path
+ */
+ if (!regsafe(env, &old->stack[spi].spilled_ptr,
+- &cur->stack[spi].spilled_ptr, idmap))
++ &cur->stack[spi].spilled_ptr, idmap, exact))
+ return false;
+ break;
+ case STACK_DYNPTR:
+@@ -15823,16 +15944,16 @@ static bool refsafe(struct bpf_func_stat
+ * the current state will reach 'bpf_exit' instruction safely
+ */
+ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
+- struct bpf_func_state *cur)
++ struct bpf_func_state *cur, bool exact)
+ {
+ int i;
+
+ for (i = 0; i < MAX_BPF_REG; i++)
+ if (!regsafe(env, &old->regs[i], &cur->regs[i],
+- &env->idmap_scratch))
++ &env->idmap_scratch, exact))
+ return false;
+
+- if (!stacksafe(env, old, cur, &env->idmap_scratch))
++ if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
+ return false;
+
+ if (!refsafe(old, cur, &env->idmap_scratch))
+@@ -15841,17 +15962,23 @@ static bool func_states_equal(struct bpf
+ return true;
+ }
+
++static void reset_idmap_scratch(struct bpf_verifier_env *env)
++{
++ env->idmap_scratch.tmp_id_gen = env->id_gen;
++ memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
++}
++
+ static bool states_equal(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *old,
+- struct bpf_verifier_state *cur)
++ struct bpf_verifier_state *cur,
++ bool exact)
+ {
+ int i;
+
+ if (old->curframe != cur->curframe)
+ return false;
+
+- env->idmap_scratch.tmp_id_gen = env->id_gen;
+- memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
++ reset_idmap_scratch(env);
+
+ /* Verification state from speculative execution simulation
+ * must never prune a non-speculative execution one.
+@@ -15881,7 +16008,7 @@ static bool states_equal(struct bpf_veri
+ for (i = 0; i <= old->curframe; i++) {
+ if (old->frame[i]->callsite != cur->frame[i]->callsite)
+ return false;
+- if (!func_states_equal(env, old->frame[i], cur->frame[i]))
++ if (!func_states_equal(env, old->frame[i], cur->frame[i], exact))
+ return false;
+ }
+ return true;
+@@ -16136,7 +16263,7 @@ static int is_state_visited(struct bpf_v
+ struct bpf_verifier_state_list *new_sl;
+ struct bpf_verifier_state_list *sl, **pprev;
+ struct bpf_verifier_state *cur = env->cur_state, *new;
+- int i, j, err, states_cnt = 0;
++ int i, j, n, err, states_cnt = 0;
+ bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
+ bool add_new_state = force_new_state;
+
+@@ -16191,9 +16318,33 @@ static int is_state_visited(struct bpf_v
+ * It's safe to assume that iterator loop will finish, taking into
+ * account iter_next() contract of eventually returning
+ * sticky NULL result.
++ *
++ * Note, that states have to be compared exactly in this case because
++ * read and precision marks might not be finalized inside the loop.
++ * E.g. as in the program below:
++ *
++ * 1. r7 = -16
++ * 2. r6 = bpf_get_prandom_u32()
++ * 3. while (bpf_iter_num_next(&fp[-8])) {
++ * 4. if (r6 != 42) {
++ * 5. r7 = -32
++ * 6. r6 = bpf_get_prandom_u32()
++ * 7. continue
++ * 8. }
++ * 9. r0 = r10
++ * 10. r0 += r7
++ * 11. r8 = *(u64 *)(r0 + 0)
++ * 12. r6 = bpf_get_prandom_u32()
++ * 13. }
++ *
++ * Here verifier would first visit path 1-3, create a checkpoint at 3
++ * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
++ * not have read or precision mark for r7 yet, thus inexact states
++ * comparison would discard current state with r7=-32
++ * => unsafe memory access at 11 would not be caught.
+ */
+ if (is_iter_next_insn(env, insn_idx)) {
+- if (states_equal(env, &sl->state, cur)) {
++ if (states_equal(env, &sl->state, cur, true)) {
+ struct bpf_func_state *cur_frame;
+ struct bpf_reg_state *iter_state, *iter_reg;
+ int spi;
+@@ -16216,7 +16367,7 @@ static int is_state_visited(struct bpf_v
+ }
+ /* attempt to detect infinite loop to avoid unnecessary doomed work */
+ if (states_maybe_looping(&sl->state, cur) &&
+- states_equal(env, &sl->state, cur) &&
++ states_equal(env, &sl->state, cur, false) &&
+ !iter_active_depths_differ(&sl->state, cur)) {
+ verbose_linfo(env, insn_idx, "; ");
+ verbose(env, "infinite loop detected at insn %d\n", insn_idx);
+@@ -16241,7 +16392,7 @@ skip_inf_loop_check:
+ add_new_state = false;
+ goto miss;
+ }
+- if (states_equal(env, &sl->state, cur)) {
++ if (states_equal(env, &sl->state, cur, false)) {
+ hit:
+ sl->hit_cnt++;
+ /* reached equivalent register/stack state,
+@@ -16280,8 +16431,12 @@ miss:
+ * to keep checking from state equivalence point of view.
+ * Higher numbers increase max_states_per_insn and verification time,
+ * but do not meaningfully decrease insn_processed.
++ * 'n' controls how many times state could miss before eviction.
++ * Use bigger 'n' for checkpoints because evicting checkpoint states
++ * too early would hinder iterator convergence.
+ */
+- if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
++ n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
++ if (sl->miss_cnt > sl->hit_cnt * n + n) {
+ /* the state is unlikely to be useful. Remove it to
+ * speed up verification
+ */
+@@ -16355,6 +16510,7 @@ next:
+
+ cur->parent = new;
+ cur->first_insn_idx = insn_idx;
++ cur->dfs_depth = new->dfs_depth + 1;
+ clear_jmp_history(cur);
+ new_sl->next = *explored_state(env, insn_idx);
+ *explored_state(env, insn_idx) = new_sl;
--- /dev/null
+From 683b96f9606ab7308ffb23c46ab43cecdef8a241 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:54 +0200
+Subject: bpf: extract __check_reg_arg() utility function
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 683b96f9606ab7308ffb23c46ab43cecdef8a241 upstream.
+
+Split check_reg_arg() into two utility functions:
+- check_reg_arg() operating on registers from current verifier state;
+- __check_reg_arg() operating on a specific set of registers passed as
+ a parameter;
+
+The __check_reg_arg() function would be used by a follow-up change for
+callbacks handling.
+
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-5-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -3321,13 +3321,11 @@ static void mark_insn_zext(struct bpf_ve
+ reg->subreg_def = DEF_NOT_SUBREG;
+ }
+
+-static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
+- enum reg_arg_type t)
++static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
++ enum reg_arg_type t)
+ {
+- struct bpf_verifier_state *vstate = env->cur_state;
+- struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
+- struct bpf_reg_state *reg, *regs = state->regs;
++ struct bpf_reg_state *reg;
+ bool rw64;
+
+ if (regno >= MAX_BPF_REG) {
+@@ -3368,6 +3366,15 @@ static int check_reg_arg(struct bpf_veri
+ return 0;
+ }
+
++static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
++ enum reg_arg_type t)
++{
++ struct bpf_verifier_state *vstate = env->cur_state;
++ struct bpf_func_state *state = vstate->frame[vstate->curframe];
++
++ return __check_reg_arg(env, state->regs, regno, t);
++}
++
+ static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
+ {
+ env->insn_aux_data[idx].jmp_point = true;
+@@ -9147,7 +9154,7 @@ static void clear_caller_saved_regs(stru
+ /* after the call registers r0 - r5 were scratched */
+ for (i = 0; i < CALLER_SAVED_REGS; i++) {
+ mark_reg_not_init(env, regs, caller_saved[i]);
+- check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
++ __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
+ }
+ }
+
--- /dev/null
+From 4c97259abc9bc8df7712f76f58ce385581876857 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 24 Oct 2023 03:09:12 +0300
+Subject: bpf: extract same_callsites() as utility function
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 4c97259abc9bc8df7712f76f58ce385581876857 upstream.
+
+Extract same_callsites() from clean_live_states() as a utility function.
+This function would be used by the next patch in the set.
+
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231024000917.12153-3-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1799,6 +1799,20 @@ static struct bpf_verifier_state_list **
+ return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
+ }
+
++static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
++{
++ int fr;
++
++ if (a->curframe != b->curframe)
++ return false;
++
++ for (fr = a->curframe; fr >= 0; fr--)
++ if (a->frame[fr]->callsite != b->frame[fr]->callsite)
++ return false;
++
++ return true;
++}
++
+ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+ {
+ while (st) {
+@@ -15521,18 +15535,14 @@ static void clean_live_states(struct bpf
+ struct bpf_verifier_state *cur)
+ {
+ struct bpf_verifier_state_list *sl;
+- int i;
+
+ sl = *explored_state(env, insn);
+ while (sl) {
+ if (sl->state.branches)
+ goto next;
+ if (sl->state.insn_idx != insn ||
+- sl->state.curframe != cur->curframe)
++ !same_callsites(&sl->state, cur))
+ goto next;
+- for (i = 0; i <= cur->curframe; i++)
+- if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
+- goto next;
+ clean_verifier_state(env, &sl->state);
+ next:
+ sl = sl->next;
--- /dev/null
+From 58124a98cb8eda69d248d7f1de954c8b2767c945 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:55 +0200
+Subject: bpf: extract setup_func_entry() utility function
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 58124a98cb8eda69d248d7f1de954c8b2767c945 upstream.
+
+Move code for simulated stack frame creation to a separate utility
+function. This function would be used in the follow-up change for
+callbacks handling.
+
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-6-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 84 ++++++++++++++++++++++++++++----------------------
+ 1 file changed, 48 insertions(+), 36 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -9167,11 +9167,10 @@ static int set_callee_state(struct bpf_v
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee, int insn_idx);
+
+-static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+- int *insn_idx, int subprog,
+- set_callee_state_fn set_callee_state_cb)
++static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
++ set_callee_state_fn set_callee_state_cb,
++ struct bpf_verifier_state *state)
+ {
+- struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_func_state *caller, *callee;
+ int err;
+
+@@ -9181,13 +9180,53 @@ static int __check_func_call(struct bpf_
+ return -E2BIG;
+ }
+
+- caller = state->frame[state->curframe];
+ if (state->frame[state->curframe + 1]) {
+ verbose(env, "verifier bug. Frame %d already allocated\n",
+ state->curframe + 1);
+ return -EFAULT;
+ }
+
++ caller = state->frame[state->curframe];
++ callee = kzalloc(sizeof(*callee), GFP_KERNEL);
++ if (!callee)
++ return -ENOMEM;
++ state->frame[state->curframe + 1] = callee;
++
++ /* callee cannot access r0, r6 - r9 for reading and has to write
++ * into its own stack before reading from it.
++ * callee can read/write into caller's stack
++ */
++ init_func_state(env, callee,
++ /* remember the callsite, it will be used by bpf_exit */
++ callsite,
++ state->curframe + 1 /* frameno within this callchain */,
++ subprog /* subprog number within this prog */);
++ /* Transfer references to the callee */
++ err = copy_reference_state(callee, caller);
++ err = err ?: set_callee_state_cb(env, caller, callee, callsite);
++ if (err)
++ goto err_out;
++
++ /* only increment it after check_reg_arg() finished */
++ state->curframe++;
++
++ return 0;
++
++err_out:
++ free_func_state(callee);
++ state->frame[state->curframe + 1] = NULL;
++ return err;
++}
++
++static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
++ int *insn_idx, int subprog,
++ set_callee_state_fn set_callee_state_cb)
++{
++ struct bpf_verifier_state *state = env->cur_state;
++ struct bpf_func_state *caller, *callee;
++ int err;
++
++ caller = state->frame[state->curframe];
+ err = btf_check_subprog_call(env, subprog, caller->regs);
+ if (err == -EFAULT)
+ return err;
+@@ -9256,35 +9295,12 @@ static int __check_func_call(struct bpf_
+ return 0;
+ }
+
+- callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+- if (!callee)
+- return -ENOMEM;
+- state->frame[state->curframe + 1] = callee;
+-
+- /* callee cannot access r0, r6 - r9 for reading and has to write
+- * into its own stack before reading from it.
+- * callee can read/write into caller's stack
+- */
+- init_func_state(env, callee,
+- /* remember the callsite, it will be used by bpf_exit */
+- *insn_idx /* callsite */,
+- state->curframe + 1 /* frameno within this callchain */,
+- subprog /* subprog number within this prog */);
+-
+- /* Transfer references to the callee */
+- err = copy_reference_state(callee, caller);
++ err = setup_func_entry(env, subprog, *insn_idx, set_callee_state_cb, state);
+ if (err)
+- goto err_out;
+-
+- err = set_callee_state_cb(env, caller, callee, *insn_idx);
+- if (err)
+- goto err_out;
++ return err;
+
+ clear_caller_saved_regs(env, caller->regs);
+
+- /* only increment it after check_reg_arg() finished */
+- state->curframe++;
+-
+ /* and go analyze first insn of the callee */
+ *insn_idx = env->subprog_info[subprog].start - 1;
+
+@@ -9292,14 +9308,10 @@ static int __check_func_call(struct bpf_
+ verbose(env, "caller:\n");
+ print_verifier_state(env, caller, true);
+ verbose(env, "callee:\n");
+- print_verifier_state(env, callee, true);
++ print_verifier_state(env, state->frame[state->curframe], true);
+ }
+- return 0;
+
+-err_out:
+- free_func_state(callee);
+- state->frame[state->curframe + 1] = NULL;
+- return err;
++ return 0;
+ }
+
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
--- /dev/null
+From bb124da69c47dd98d69361ec13244ece50bec63e Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:07:00 +0200
+Subject: bpf: keep track of max number of bpf_loop callback iterations
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit bb124da69c47dd98d69361ec13244ece50bec63e upstream.
+
+In some cases verifier can't infer convergence of the bpf_loop()
+iteration. E.g. for the following program:
+
+ static int cb(__u32 idx, struct num_context* ctx)
+ {
+ ctx->i++;
+ return 0;
+ }
+
+ SEC("?raw_tp")
+ int prog(void *_)
+ {
+ struct num_context ctx = { .i = 0 };
+ __u8 choice_arr[2] = { 0, 1 };
+
+ bpf_loop(2, cb, &ctx, 0);
+ return choice_arr[ctx.i];
+ }
+
+Each 'cb' simulation would eventually return to 'prog' and reach
+'return choice_arr[ctx.i]' statement. At which point ctx.i would be
+marked precise, thus forcing verifier to track multitude of separate
+states with {.i=0}, {.i=1}, ... at bpf_loop() callback entry.
+
+This commit allows "brute force" handling for such cases by limiting
+number of callback body simulations using 'umax' value of the first
+bpf_loop() parameter.
+
+For this, extend bpf_func_state with 'callback_depth' field.
+Increment this field when callback visiting state is pushed to states
+traversal stack. For frame #N it's 'callback_depth' field counts how
+many times callback with frame depth N+1 had been executed.
+Use bpf_func_state specifically to allow independent tracking of
+callback depths when multiple nested bpf_loop() calls are present.
+
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-11-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf_verifier.h | 11 +++
+ kernel/bpf/verifier.c | 19 ++++-
+ tools/testing/selftests/bpf/progs/verifier_subprog_precision.c | 35 +++++++---
+ 3 files changed, 53 insertions(+), 12 deletions(-)
+
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -300,6 +300,17 @@ struct bpf_func_state {
+ bool in_callback_fn;
+ struct tnum callback_ret_range;
+ bool in_async_callback_fn;
++ /* For callback calling functions that limit number of possible
++ * callback executions (e.g. bpf_loop) keeps track of current
++ * simulated iteration number.
++ * Value in frame N refers to number of times callback with frame
++ * N+1 was simulated, e.g. for the following call:
++ *
++ * bpf_loop(..., fn, ...); | suppose current frame is N
++ * | fn would be simulated in frame N+1
++ * | number of simulations is tracked in frame N
++ */
++ u32 callback_depth;
+
+ /* The following fields should be last. See copy_func_state() */
+ int acquired_refs;
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -9301,6 +9301,8 @@ static int push_callback_call(struct bpf
+ return err;
+
+ callback_state->callback_unroll_depth++;
++ callback_state->frame[callback_state->curframe - 1]->callback_depth++;
++ caller->callback_depth = 0;
+ return 0;
+ }
+
+@@ -10090,8 +10092,21 @@ static int check_helper_call(struct bpf_
+ break;
+ case BPF_FUNC_loop:
+ update_loop_inline_state(env, meta.subprogno);
+- err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+- set_loop_callback_state);
++ /* Verifier relies on R1 value to determine if bpf_loop() iteration
++ * is finished, thus mark it precise.
++ */
++ err = mark_chain_precision(env, BPF_REG_1);
++ if (err)
++ return err;
++ if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
++ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
++ set_loop_callback_state);
++ } else {
++ cur_func(env)->callback_depth = 0;
++ if (env->log.level & BPF_LOG_LEVEL2)
++ verbose(env, "frame%d bpf_loop iteration limit reached\n",
++ env->cur_state->curframe);
++ }
+ break;
+ case BPF_FUNC_dynptr_from_mem:
+ if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
+--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
++++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+@@ -119,7 +119,23 @@ __naked int global_subprog_result_precis
+
+ SEC("?raw_tp")
+ __success __log_level(2)
+-/* First simulated path does not include callback body */
++/* First simulated path does not include callback body,
++ * r1 and r4 are always precise for bpf_loop() calls.
++ */
++__msg("9: (85) call bpf_loop#181")
++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
++__msg("mark_precise: frame0: parent state regs=r4 stack=:")
++__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
++__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
++__msg("mark_precise: frame0: parent state regs=r1 stack=:")
++__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
++__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
++__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
++__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
++__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
++__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
++/* r6 precision propagation */
+ __msg("14: (0f) r1 += r6")
+ __msg("mark_precise: frame0: last_idx 14 first_idx 9")
+ __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+@@ -134,10 +150,9 @@ __msg("17: (b7) r0 = 0")
+ __msg("18: (95) exit")
+ __msg("returning from callee:")
+ __msg("to caller at 9:")
+-/* r4 (flags) is always precise for bpf_loop() */
+-__msg("frame 0: propagating r4")
++__msg("frame 0: propagating r1,r4")
+ __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+-__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit")
++__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+ __msg("from 18 to 9: safe")
+ __naked int callback_result_precise(void)
+ {
+@@ -264,12 +279,12 @@ __msg("15: (b7) r0 = 0")
+ __msg("16: (95) exit")
+ __msg("returning from callee:")
+ __msg("to caller at 9:")
+-/* r4 (flags) is always precise for bpf_loop(),
++/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+-__msg("frame 0: propagating r4,r6")
++__msg("frame 0: propagating r1,r4,r6")
+ __msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+-__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit")
++__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+ __msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+ __msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+ __msg("mark_precise: frame0: parent state regs= stack=:")
+@@ -422,12 +437,12 @@ __msg("17: (b7) r0 = 0")
+ __msg("18: (95) exit")
+ __msg("returning from callee:")
+ __msg("to caller at 10:")
+-/* r4 (flags) is always precise for bpf_loop(),
++/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+-__msg("frame 0: propagating r4,fp-8")
++__msg("frame 0: propagating r1,r4,fp-8")
+ __msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+-__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit")
++__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+ __msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+ __msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+ __msg("mark_precise: frame0: parent state regs= stack=:")
--- /dev/null
+From 3c4e420cb6536026ddd50eaaff5f30e4f144200d Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 24 Oct 2023 03:09:11 +0300
+Subject: bpf: move explored_state() closer to the beginning of verifier.c
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 3c4e420cb6536026ddd50eaaff5f30e4f144200d upstream.
+
+Subsequent patches would make use of explored_state() function.
+Move it up to avoid adding unnecessary prototype.
+
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231024000917.12153-2-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 28 +++++++++++++---------------
+ 1 file changed, 13 insertions(+), 15 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1786,6 +1786,19 @@ static int copy_verifier_state(struct bp
+ return 0;
+ }
+
++static u32 state_htab_size(struct bpf_verifier_env *env)
++{
++ return env->prog->len;
++}
++
++static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
++{
++ struct bpf_verifier_state *cur = env->cur_state;
++ struct bpf_func_state *state = cur->frame[cur->curframe];
++
++ return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
++}
++
+ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+ {
+ while (st) {
+@@ -14702,21 +14715,6 @@ enum {
+ BRANCH = 2,
+ };
+
+-static u32 state_htab_size(struct bpf_verifier_env *env)
+-{
+- return env->prog->len;
+-}
+-
+-static struct bpf_verifier_state_list **explored_state(
+- struct bpf_verifier_env *env,
+- int idx)
+-{
+- struct bpf_verifier_state *cur = env->cur_state;
+- struct bpf_func_state *state = cur->frame[cur->curframe];
+-
+- return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
+-}
+-
+ static void mark_prune_point(struct bpf_verifier_env *env, int idx)
+ {
+ env->insn_aux_data[idx].prune_point = true;
--- /dev/null
+From b4d8239534fddc036abe4a0fdbf474d9894d4641 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 24 Oct 2023 03:09:17 +0300
+Subject: bpf: print full verifier states on infinite loop detection
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit b4d8239534fddc036abe4a0fdbf474d9894d4641 upstream.
+
+Additional logging in is_state_visited(): if infinite loop is detected
+print full verifier state for both current and equivalent states.
+
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231024000917.12153-8-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -16540,6 +16540,10 @@ static int is_state_visited(struct bpf_v
+ !iter_active_depths_differ(&sl->state, cur)) {
+ verbose_linfo(env, insn_idx, "; ");
+ verbose(env, "infinite loop detected at insn %d\n", insn_idx);
++ verbose(env, "cur state:");
++ print_verifier_state(env, cur->frame[cur->curframe], true);
++ verbose(env, "old state:");
++ print_verifier_state(env, sl->state.frame[cur->curframe], true);
+ return -EINVAL;
+ }
+ /* if the verifier is processing a loop, avoid adding new state
--- /dev/null
+From ab5cfac139ab8576fb54630d4cca23c3e690ee90 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:56 +0200
+Subject: bpf: verify callbacks as if they are called unknown number of times
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit ab5cfac139ab8576fb54630d4cca23c3e690ee90 upstream.
+
+Prior to this patch callbacks were handled as regular function calls,
+execution of callback body was modeled exactly once.
+This patch updates callbacks handling logic as follows:
+- introduces a function push_callback_call() that schedules callback
+ body verification in env->head stack;
+- updates prepare_func_exit() to reschedule callback body verification
+ upon BPF_EXIT;
+- as calls to bpf_*_iter_next(), calls to callback invoking functions
+ are marked as checkpoints;
+- is_state_visited() is updated to stop callback based iteration when
+ some identical parent state is found.
+
+Paths with callback function invoked zero times are now verified first,
+which leads to necessity to modify some selftests:
+- the following negative tests required adding release/unlock/drop
+ calls to avoid previously masked unrelated error reports:
+ - cb_refs.c:underflow_prog
+ - exceptions_fail.c:reject_rbtree_add_throw
+ - exceptions_fail.c:reject_with_cp_reference
+- the following precision tracking selftests needed change in expected
+ log trace:
+ - verifier_subprog_precision.c:callback_result_precise
+ (note: r0 precision is no longer propagated inside callback and
+ I think this is a correct behavior)
+ - verifier_subprog_precision.c:parent_callee_saved_reg_precise_with_callback
+ - verifier_subprog_precision.c:parent_stack_slot_precise_with_callback
+
+Reported-by: Andrew Werner <awerner32@gmail.com>
+Closes: https://lore.kernel.org/bpf/CA+vRuzPChFNXmouzGG+wsy=6eMcfr1mFG0F3g7rbg-sedGKW3w@mail.gmail.com/
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-7-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf_verifier.h | 5
+ kernel/bpf/verifier.c | 272 ++++++----
+ tools/testing/selftests/bpf/progs/cb_refs.c | 1
+ tools/testing/selftests/bpf/progs/verifier_subprog_precision.c | 71 ++
+ 4 files changed, 237 insertions(+), 112 deletions(-)
+
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -399,6 +399,7 @@ struct bpf_verifier_state {
+ struct bpf_idx_pair *jmp_history;
+ u32 jmp_history_cnt;
+ u32 dfs_depth;
++ u32 callback_unroll_depth;
+ };
+
+ #define bpf_get_spilled_reg(slot, frame) \
+@@ -506,6 +507,10 @@ struct bpf_insn_aux_data {
+ * this instruction, regardless of any heuristics
+ */
+ bool force_checkpoint;
++ /* true if instruction is a call to a helper function that
++ * accepts callback function as a parameter.
++ */
++ bool calls_callback;
+ };
+
+ #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -542,12 +542,11 @@ static bool is_dynptr_ref_function(enum
+ return func_id == BPF_FUNC_dynptr_data;
+ }
+
+-static bool is_callback_calling_kfunc(u32 btf_id);
++static bool is_sync_callback_calling_kfunc(u32 btf_id);
+
+-static bool is_callback_calling_function(enum bpf_func_id func_id)
++static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
+ {
+ return func_id == BPF_FUNC_for_each_map_elem ||
+- func_id == BPF_FUNC_timer_set_callback ||
+ func_id == BPF_FUNC_find_vma ||
+ func_id == BPF_FUNC_loop ||
+ func_id == BPF_FUNC_user_ringbuf_drain;
+@@ -558,6 +557,18 @@ static bool is_async_callback_calling_fu
+ return func_id == BPF_FUNC_timer_set_callback;
+ }
+
++static bool is_callback_calling_function(enum bpf_func_id func_id)
++{
++ return is_sync_callback_calling_function(func_id) ||
++ is_async_callback_calling_function(func_id);
++}
++
++static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
++{
++ return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
++ (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
++}
++
+ static bool is_storage_get_function(enum bpf_func_id func_id)
+ {
+ return func_id == BPF_FUNC_sk_storage_get ||
+@@ -1772,6 +1783,7 @@ static int copy_verifier_state(struct bp
+ dst_state->first_insn_idx = src->first_insn_idx;
+ dst_state->last_insn_idx = src->last_insn_idx;
+ dst_state->dfs_depth = src->dfs_depth;
++ dst_state->callback_unroll_depth = src->callback_unroll_depth;
+ dst_state->used_as_loop_entry = src->used_as_loop_entry;
+ for (i = 0; i <= src->curframe; i++) {
+ dst = dst_state->frame[i];
+@@ -3613,6 +3625,8 @@ static void fmt_stack_mask(char *buf, ss
+ }
+ }
+
++static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
++
+ /* For given verifier state backtrack_insn() is called from the last insn to
+ * the first insn. Its purpose is to compute a bitmask of registers and
+ * stack slots that needs precision in the parent verifier state.
+@@ -3788,16 +3802,13 @@ static int backtrack_insn(struct bpf_ver
+ return -EFAULT;
+ return 0;
+ }
+- } else if ((bpf_helper_call(insn) &&
+- is_callback_calling_function(insn->imm) &&
+- !is_async_callback_calling_function(insn->imm)) ||
+- (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) {
+- /* callback-calling helper or kfunc call, which means
+- * we are exiting from subprog, but unlike the subprog
+- * call handling above, we shouldn't propagate
+- * precision of r1-r5 (if any requested), as they are
+- * not actually arguments passed directly to callback
+- * subprogs
++ } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
++ /* exit from callback subprog to callback-calling helper or
++ * kfunc call. Use idx/subseq_idx check to discern it from
++ * straight line code backtracking.
++ * Unlike the subprog call handling above, we shouldn't
++ * propagate precision of r1-r5 (if any requested), as they are
++ * not actually arguments passed directly to callback subprogs
+ */
+ if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
+ verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
+@@ -3832,10 +3843,18 @@ static int backtrack_insn(struct bpf_ver
+ } else if (opcode == BPF_EXIT) {
+ bool r0_precise;
+
++ /* Backtracking to a nested function call, 'idx' is a part of
++ * the inner frame 'subseq_idx' is a part of the outer frame.
++ * In case of a regular function call, instructions giving
++ * precision to registers R1-R5 should have been found already.
++ * In case of a callback, it is ok to have R1-R5 marked for
++ * backtracking, as these registers are set by the function
++ * invoking callback.
++ */
++ if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
++ for (i = BPF_REG_1; i <= BPF_REG_5; i++)
++ bt_clear_reg(bt, i);
+ if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
+- /* if backtracing was looking for registers R1-R5
+- * they should have been found already.
+- */
+ verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
+ WARN_ONCE(1, "verifier backtracking bug");
+ return -EFAULT;
+@@ -9218,11 +9237,11 @@ err_out:
+ return err;
+ }
+
+-static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+- int *insn_idx, int subprog,
+- set_callee_state_fn set_callee_state_cb)
++static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
++ int insn_idx, int subprog,
++ set_callee_state_fn set_callee_state_cb)
+ {
+- struct bpf_verifier_state *state = env->cur_state;
++ struct bpf_verifier_state *state = env->cur_state, *callback_state;
+ struct bpf_func_state *caller, *callee;
+ int err;
+
+@@ -9230,43 +9249,21 @@ static int __check_func_call(struct bpf_
+ err = btf_check_subprog_call(env, subprog, caller->regs);
+ if (err == -EFAULT)
+ return err;
+- if (subprog_is_global(env, subprog)) {
+- if (err) {
+- verbose(env, "Caller passes invalid args into func#%d\n",
+- subprog);
+- return err;
+- } else {
+- if (env->log.level & BPF_LOG_LEVEL)
+- verbose(env,
+- "Func#%d is global and valid. Skipping.\n",
+- subprog);
+- clear_caller_saved_regs(env, caller->regs);
+-
+- /* All global functions return a 64-bit SCALAR_VALUE */
+- mark_reg_unknown(env, caller->regs, BPF_REG_0);
+- caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+-
+- /* continue with next insn after call */
+- return 0;
+- }
+- }
+
+ /* set_callee_state is used for direct subprog calls, but we are
+ * interested in validating only BPF helpers that can call subprogs as
+ * callbacks
+ */
+- if (set_callee_state_cb != set_callee_state) {
+- if (bpf_pseudo_kfunc_call(insn) &&
+- !is_callback_calling_kfunc(insn->imm)) {
+- verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
+- func_id_name(insn->imm), insn->imm);
+- return -EFAULT;
+- } else if (!bpf_pseudo_kfunc_call(insn) &&
+- !is_callback_calling_function(insn->imm)) { /* helper */
+- verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
+- func_id_name(insn->imm), insn->imm);
+- return -EFAULT;
+- }
++ if (bpf_pseudo_kfunc_call(insn) &&
++ !is_sync_callback_calling_kfunc(insn->imm)) {
++ verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
++ func_id_name(insn->imm), insn->imm);
++ return -EFAULT;
++ } else if (!bpf_pseudo_kfunc_call(insn) &&
++ !is_callback_calling_function(insn->imm)) { /* helper */
++ verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
++ func_id_name(insn->imm), insn->imm);
++ return -EFAULT;
+ }
+
+ if (insn->code == (BPF_JMP | BPF_CALL) &&
+@@ -9277,25 +9274,76 @@ static int __check_func_call(struct bpf_
+ /* there is no real recursion here. timer callbacks are async */
+ env->subprog_info[subprog].is_async_cb = true;
+ async_cb = push_async_cb(env, env->subprog_info[subprog].start,
+- *insn_idx, subprog);
++ insn_idx, subprog);
+ if (!async_cb)
+ return -EFAULT;
+ callee = async_cb->frame[0];
+ callee->async_entry_cnt = caller->async_entry_cnt + 1;
+
+ /* Convert bpf_timer_set_callback() args into timer callback args */
+- err = set_callee_state_cb(env, caller, callee, *insn_idx);
++ err = set_callee_state_cb(env, caller, callee, insn_idx);
+ if (err)
+ return err;
+
++ return 0;
++ }
++
++ /* for callback functions enqueue entry to callback and
++ * proceed with next instruction within current frame.
++ */
++ callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
++ if (!callback_state)
++ return -ENOMEM;
++
++ err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
++ callback_state);
++ if (err)
++ return err;
++
++ callback_state->callback_unroll_depth++;
++ return 0;
++}
++
++static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
++ int *insn_idx)
++{
++ struct bpf_verifier_state *state = env->cur_state;
++ struct bpf_func_state *caller;
++ int err, subprog, target_insn;
++
++ target_insn = *insn_idx + insn->imm + 1;
++ subprog = find_subprog(env, target_insn);
++ if (subprog < 0) {
++ verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
++ return -EFAULT;
++ }
++
++ caller = state->frame[state->curframe];
++ err = btf_check_subprog_call(env, subprog, caller->regs);
++ if (err == -EFAULT)
++ return err;
++ if (subprog_is_global(env, subprog)) {
++ if (err) {
++ verbose(env, "Caller passes invalid args into func#%d\n", subprog);
++ return err;
++ }
++
++ if (env->log.level & BPF_LOG_LEVEL)
++ verbose(env, "Func#%d is global and valid. Skipping.\n", subprog);
+ clear_caller_saved_regs(env, caller->regs);
++
++ /* All global functions return a 64-bit SCALAR_VALUE */
+ mark_reg_unknown(env, caller->regs, BPF_REG_0);
+ caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
++
+ /* continue with next insn after call */
+ return 0;
+ }
+
+- err = setup_func_entry(env, subprog, *insn_idx, set_callee_state_cb, state);
++ /* for regular function entry setup new frame and continue
++ * from that frame.
++ */
++ err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
+ if (err)
+ return err;
+
+@@ -9355,22 +9403,6 @@ static int set_callee_state(struct bpf_v
+ return 0;
+ }
+
+-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+- int *insn_idx)
+-{
+- int subprog, target_insn;
+-
+- target_insn = *insn_idx + insn->imm + 1;
+- subprog = find_subprog(env, target_insn);
+- if (subprog < 0) {
+- verbose(env, "verifier bug. No program starts at insn %d\n",
+- target_insn);
+- return -EFAULT;
+- }
+-
+- return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
+-}
+-
+ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+@@ -9601,6 +9633,11 @@ static int prepare_func_exit(struct bpf_
+ verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
+ return -EINVAL;
+ }
++ if (!calls_callback(env, callee->callsite)) {
++ verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
++ *insn_idx, callee->callsite);
++ return -EFAULT;
++ }
+ } else {
+ /* return to the caller whatever r0 had in the callee */
+ caller->regs[BPF_REG_0] = *r0;
+@@ -9618,7 +9655,15 @@ static int prepare_func_exit(struct bpf_
+ return err;
+ }
+
+- *insn_idx = callee->callsite + 1;
++ /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
++ * there function call logic would reschedule callback visit. If iteration
++ * converges is_state_visited() would prune that visit eventually.
++ */
++ if (callee->in_callback_fn)
++ *insn_idx = callee->callsite;
++ else
++ *insn_idx = callee->callsite + 1;
++
+ if (env->log.level & BPF_LOG_LEVEL) {
+ verbose(env, "returning from callee:\n");
+ print_verifier_state(env, callee, true);
+@@ -10009,24 +10054,24 @@ static int check_helper_call(struct bpf_
+ }
+ break;
+ case BPF_FUNC_for_each_map_elem:
+- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+- set_map_elem_callback_state);
++ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
++ set_map_elem_callback_state);
+ break;
+ case BPF_FUNC_timer_set_callback:
+- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+- set_timer_callback_state);
++ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
++ set_timer_callback_state);
+ break;
+ case BPF_FUNC_find_vma:
+- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+- set_find_vma_callback_state);
++ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
++ set_find_vma_callback_state);
+ break;
+ case BPF_FUNC_snprintf:
+ err = check_bpf_snprintf_call(env, regs);
+ break;
+ case BPF_FUNC_loop:
+ update_loop_inline_state(env, meta.subprogno);
+- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+- set_loop_callback_state);
++ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
++ set_loop_callback_state);
+ break;
+ case BPF_FUNC_dynptr_from_mem:
+ if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
+@@ -10105,8 +10150,8 @@ static int check_helper_call(struct bpf_
+ break;
+ }
+ case BPF_FUNC_user_ringbuf_drain:
+- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+- set_user_ringbuf_callback_state);
++ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
++ set_user_ringbuf_callback_state);
+ break;
+ }
+
+@@ -10956,7 +11001,7 @@ static bool is_bpf_graph_api_kfunc(u32 b
+ btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
+ }
+
+-static bool is_callback_calling_kfunc(u32 btf_id)
++static bool is_sync_callback_calling_kfunc(u32 btf_id)
+ {
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
+ }
+@@ -11660,6 +11705,21 @@ static int check_kfunc_call(struct bpf_v
+ return -EACCES;
+ }
+
++ /* Check the arguments */
++ err = check_kfunc_args(env, &meta, insn_idx);
++ if (err < 0)
++ return err;
++
++ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
++ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
++ set_rbtree_add_callback_state);
++ if (err) {
++ verbose(env, "kfunc %s#%d failed callback verification\n",
++ func_name, meta.func_id);
++ return err;
++ }
++ }
++
+ rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
+ rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
+
+@@ -11694,10 +11754,6 @@ static int check_kfunc_call(struct bpf_v
+ return -EINVAL;
+ }
+
+- /* Check the arguments */
+- err = check_kfunc_args(env, &meta, insn_idx);
+- if (err < 0)
+- return err;
+ /* In case of release function, we get register number of refcounted
+ * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
+ */
+@@ -11731,16 +11787,6 @@ static int check_kfunc_call(struct bpf_v
+ }
+ }
+
+- if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+- set_rbtree_add_callback_state);
+- if (err) {
+- verbose(env, "kfunc %s#%d failed callback verification\n",
+- func_name, meta.func_id);
+- return err;
+- }
+- }
+-
+ for (i = 0; i < CALLER_SAVED_REGS; i++)
+ mark_reg_not_init(env, regs, caller_saved[i]);
+
+@@ -15047,6 +15093,15 @@ static bool is_force_checkpoint(struct b
+ return env->insn_aux_data[insn_idx].force_checkpoint;
+ }
+
++static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
++{
++ env->insn_aux_data[idx].calls_callback = true;
++}
++
++static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
++{
++ return env->insn_aux_data[insn_idx].calls_callback;
++}
+
+ enum {
+ DONE_EXPLORING = 0,
+@@ -15160,6 +15215,21 @@ static int visit_insn(int t, struct bpf_
+ * async state will be pushed for further exploration.
+ */
+ mark_prune_point(env, t);
++ /* For functions that invoke callbacks it is not known how many times
++ * callback would be called. Verifier models callback calling functions
++ * by repeatedly visiting callback bodies and returning to origin call
++ * instruction.
++ * In order to stop such iteration verifier needs to identify when a
++ * state identical some state from a previous iteration is reached.
++ * Check below forces creation of checkpoint before callback calling
++ * instruction to allow search for such identical states.
++ */
++ if (is_sync_callback_calling_insn(insn)) {
++ mark_calls_callback(env, t);
++ mark_force_checkpoint(env, t);
++ mark_prune_point(env, t);
++ mark_jmp_point(env, t);
++ }
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ struct bpf_kfunc_call_arg_meta meta;
+
+@@ -16553,10 +16623,16 @@ static int is_state_visited(struct bpf_v
+ }
+ goto skip_inf_loop_check;
+ }
++ if (calls_callback(env, insn_idx)) {
++ if (states_equal(env, &sl->state, cur, true))
++ goto hit;
++ goto skip_inf_loop_check;
++ }
+ /* attempt to detect infinite loop to avoid unnecessary doomed work */
+ if (states_maybe_looping(&sl->state, cur) &&
+ states_equal(env, &sl->state, cur, false) &&
+- !iter_active_depths_differ(&sl->state, cur)) {
++ !iter_active_depths_differ(&sl->state, cur) &&
++ sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
+ verbose_linfo(env, insn_idx, "; ");
+ verbose(env, "infinite loop detected at insn %d\n", insn_idx);
+ verbose(env, "cur state:");
+--- a/tools/testing/selftests/bpf/progs/cb_refs.c
++++ b/tools/testing/selftests/bpf/progs/cb_refs.c
+@@ -33,6 +33,7 @@ int underflow_prog(void *ctx)
+ if (!p)
+ return 0;
+ bpf_for_each_map_elem(&array_map, cb1, &p, 0);
++ bpf_kfunc_call_test_release(p);
+ return 0;
+ }
+
+--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
++++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+@@ -119,15 +119,26 @@ __naked int global_subprog_result_precis
+
+ SEC("?raw_tp")
+ __success __log_level(2)
++/* First simulated path does not include callback body */
+ __msg("14: (0f) r1 += r6")
+-__msg("mark_precise: frame0: last_idx 14 first_idx 10")
++__msg("mark_precise: frame0: last_idx 14 first_idx 9")
+ __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+ __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+ __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
+ __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
+-__msg("mark_precise: frame0: parent state regs=r0 stack=:")
+-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
+-__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
++__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
++/* State entering callback body popped from states stack */
++__msg("from 9 to 17: frame1:")
++__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
++__msg("17: (b7) r0 = 0")
++__msg("18: (95) exit")
++__msg("returning from callee:")
++__msg("to caller at 9:")
++/* r4 (flags) is always precise for bpf_loop() */
++__msg("frame 0: propagating r4")
++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
++__msg("mark_precise: frame0: regs=r4 stack= before 18: (95) exit")
++__msg("from 18 to 9: safe")
+ __naked int callback_result_precise(void)
+ {
+ asm volatile (
+@@ -233,20 +244,36 @@ __naked int parent_callee_saved_reg_prec
+
+ SEC("?raw_tp")
+ __success __log_level(2)
++/* First simulated path does not include callback body */
+ __msg("12: (0f) r1 += r6")
+-__msg("mark_precise: frame0: last_idx 12 first_idx 10")
++__msg("mark_precise: frame0: last_idx 12 first_idx 9")
+ __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
+ __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
++__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
+ __msg("mark_precise: frame0: parent state regs=r6 stack=:")
+-__msg("mark_precise: frame0: last_idx 16 first_idx 0")
+-__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
+-__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+-__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
++__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+ __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
+ __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
+ __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
+ __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
+ __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
++/* State entering callback body popped from states stack */
++__msg("from 9 to 15: frame1:")
++__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
++__msg("15: (b7) r0 = 0")
++__msg("16: (95) exit")
++__msg("returning from callee:")
++__msg("to caller at 9:")
++/* r4 (flags) is always precise for bpf_loop(),
++ * r6 was marked before backtracking to callback body.
++ */
++__msg("frame 0: propagating r4,r6")
++__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
++__msg("mark_precise: frame0: regs=r4,r6 stack= before 16: (95) exit")
++__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
++__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
++__msg("mark_precise: frame0: parent state regs= stack=:")
++__msg("from 16 to 9: safe")
+ __naked int parent_callee_saved_reg_precise_with_callback(void)
+ {
+ asm volatile (
+@@ -373,22 +400,38 @@ __naked int parent_stack_slot_precise_gl
+
+ SEC("?raw_tp")
+ __success __log_level(2)
++/* First simulated path does not include callback body */
+ __msg("14: (0f) r1 += r6")
+-__msg("mark_precise: frame0: last_idx 14 first_idx 11")
++__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+ __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+ __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+ __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
++__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
+ __msg("mark_precise: frame0: parent state regs= stack=-8:")
+-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
+-__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
+-__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+-__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
++__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
+ __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
+ __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
+ __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
+ __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
+ __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
+ __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
++/* State entering callback body popped from states stack */
++__msg("from 10 to 17: frame1:")
++__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
++__msg("17: (b7) r0 = 0")
++__msg("18: (95) exit")
++__msg("returning from callee:")
++__msg("to caller at 10:")
++/* r4 (flags) is always precise for bpf_loop(),
++ * fp-8 was marked before backtracking to callback body.
++ */
++__msg("frame 0: propagating r4,fp-8")
++__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
++__msg("mark_precise: frame0: regs=r4 stack=-8 before 18: (95) exit")
++__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
++__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
++__msg("mark_precise: frame0: parent state regs= stack=:")
++__msg("from 18 to 10: safe")
+ __naked int parent_stack_slot_precise_with_callback(void)
+ {
+ asm volatile (
--- /dev/null
+From cafe2c21508a38cdb3ed22708842e957b2572c3e Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:58 +0200
+Subject: bpf: widening for callback iterators
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit cafe2c21508a38cdb3ed22708842e957b2572c3e upstream.
+
+Callbacks are similar to open coded iterators, so add imprecise
+widening logic for callback body processing. This makes callback based
+loops behave identically to open coded iterators, e.g. allowing to
+verify programs like below:
+
+ struct ctx { u32 i; };
+ int cb(u32 idx, struct ctx* ctx)
+ {
+ ++ctx->i;
+ return 0;
+ }
+ ...
+ struct ctx ctx = { .i = 0 };
+ bpf_loop(100, cb, &ctx, 0);
+ ...
+
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-9-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c | 24 ++++++++++++++++++++++--
+ 1 file changed, 22 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -9595,9 +9595,10 @@ static bool in_rbtree_lock_required_cb(s
+
+ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+ {
+- struct bpf_verifier_state *state = env->cur_state;
++ struct bpf_verifier_state *state = env->cur_state, *prev_st;
+ struct bpf_func_state *caller, *callee;
+ struct bpf_reg_state *r0;
++ bool in_callback_fn;
+ int err;
+
+ callee = state->frame[state->curframe];
+@@ -9659,7 +9660,8 @@ static int prepare_func_exit(struct bpf_
+ * there function call logic would reschedule callback visit. If iteration
+ * converges is_state_visited() would prune that visit eventually.
+ */
+- if (callee->in_callback_fn)
++ in_callback_fn = callee->in_callback_fn;
++ if (in_callback_fn)
+ *insn_idx = callee->callsite;
+ else
+ *insn_idx = callee->callsite + 1;
+@@ -9673,6 +9675,24 @@ static int prepare_func_exit(struct bpf_
+ /* clear everything in the callee */
+ free_func_state(callee);
+ state->frame[state->curframe--] = NULL;
++
++ /* for callbacks widen imprecise scalars to make programs like below verify:
++ *
++ * struct ctx { int i; }
++ * void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
++ * ...
++ * struct ctx = { .i = 0; }
++ * bpf_loop(100, cb, &ctx, 0);
++ *
++ * This is similar to what is done in process_iter_next_call() for open
++ * coded iterators.
++ */
++ prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
++ if (prev_st) {
++ err = widen_imprecise_scalars(env, prev_st, state);
++ if (err)
++ return err;
++ }
+ return 0;
+ }
+
--- /dev/null
+From 57e2a52deeb12ab84c15c6d0fb93638b5b94001b Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:07:01 +0200
+Subject: selftests/bpf: check if max number of bpf_loop iterations is tracked
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 57e2a52deeb12ab84c15c6d0fb93638b5b94001b upstream.
+
+Check that even if bpf_loop() callback simulation does not converge to
+a specific state, verification could proceed via "brute force"
+simulation of maximal number of callback calls.
+
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-12-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c | 75 ++++++++++
+ 1 file changed, 75 insertions(+)
+
+--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
++++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+@@ -164,4 +164,79 @@ int unsafe_find_vma(void *unused)
+ return choice_arr[loop_ctx.i];
+ }
+
++static int iter_limit_cb(__u32 idx, struct num_context *ctx)
++{
++ ctx->i++;
++ return 0;
++}
++
++SEC("?raw_tp")
++__success
++int bpf_loop_iter_limit_ok(void *unused)
++{
++ struct num_context ctx = { .i = 0 };
++
++ bpf_loop(1, iter_limit_cb, &ctx, 0);
++ return choice_arr[ctx.i];
++}
++
++SEC("?raw_tp")
++__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
++int bpf_loop_iter_limit_overflow(void *unused)
++{
++ struct num_context ctx = { .i = 0 };
++
++ bpf_loop(2, iter_limit_cb, &ctx, 0);
++ return choice_arr[ctx.i];
++}
++
++static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
++{
++ ctx->i += 100;
++ return 0;
++}
++
++static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
++{
++ ctx->i += 10;
++ return 0;
++}
++
++static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
++{
++ ctx->i += 1;
++ bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
++ bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
++ return 0;
++}
++
++/* Check that path visiting every callback function once had been
++ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
++ * with each decimal digit corresponding to a callback visit marker.
++ */
++SEC("socket")
++__success __retval(111111)
++int bpf_loop_iter_limit_nested(void *unused)
++{
++ struct num_context ctx1 = { .i = 0 };
++ struct num_context ctx2 = { .i = 0 };
++ __u64 a, b, c;
++
++ bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
++ bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
++ a = ctx1.i;
++ b = ctx2.i;
++ /* Force 'ctx1.i' and 'ctx2.i' precise. */
++ c = choice_arr[(a + b) % 2];
++ /* This makes 'c' zero, but neither clang nor verifier know it. */
++ c /= 10;
++ /* Make sure that verifier does not visit 'impossible' states:
++ * enumerate all possible callback visit masks.
++ */
++ if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
++ b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
++ asm volatile ("r0 /= 0;" ::: "r0");
++ return 1000 * a + b + c;
++}
++
+ char _license[] SEC("license") = "GPL";
--- /dev/null
+From 64870feebecb7130291a55caf0ce839a87405a70 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 24 Oct 2023 03:09:16 +0300
+Subject: selftests/bpf: test if state loops are detected in a tricky case
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 64870feebecb7130291a55caf0ce839a87405a70 upstream.
+
+A convoluted test case for iterators convergence logic that
+demonstrates that states with branch count equal to 0 might still be
+a part of not completely explored loop.
+
+E.g. consider the following state diagram:
+
+ initial Here state 'succ' was processed first,
+ | it was eventually tracked to produce a
+ V state identical to 'hdr'.
+ .---------> hdr All branches from 'succ' had been explored
+ | | and thus 'succ' has its .branches == 0.
+ | V
+ | .------... Suppose states 'cur' and 'succ' correspond
+ | | | to the same instruction + callsites.
+ | V V In such case it is necessary to check
+ | ... ... whether 'succ' and 'cur' are identical.
+ | | | If 'succ' and 'cur' are a part of the same loop
+ | V V they have to be compared exactly.
+ | succ <- cur
+ | |
+ | V
+ | ...
+ | |
+ '----'
+
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231024000917.12153-7-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/progs/iters.c | 177 ++++++++++++++++++++++++++++++
+ 1 file changed, 177 insertions(+)
+
+--- a/tools/testing/selftests/bpf/progs/iters.c
++++ b/tools/testing/selftests/bpf/progs/iters.c
+@@ -999,6 +999,183 @@ __naked int loop_state_deps1(void)
+ }
+
+ SEC("?raw_tp")
++__failure
++__msg("math between fp pointer and register with unbounded")
++__flag(BPF_F_TEST_STATE_FREQ)
++__naked int loop_state_deps2(void)
++{
++ /* This is equivalent to C program below.
++ *
++ * The case turns out to be tricky in a sense that:
++ * - states with read+precise mark on c are explored only on a second
++ * iteration of the first inner loop and in a state which is pushed to
++ * states stack first.
++ * - states with c=-25 are explored only on a second iteration of the
++ * second inner loop and in a state which is pushed to states stack
++ * first.
++ *
++ * Depending on the details of iterator convergence logic
++ * verifier might stop states traversal too early and miss
++ * unsafe c=-25 memory access.
++ *
++ * j = iter_new(); // fp[-16]
++ * a = 0; // r6
++ * b = 0; // r7
++ * c = -24; // r8
++ * while (iter_next(j)) {
++ * i = iter_new(); // fp[-8]
++ * a = 0; // r6
++ * b = 0; // r7
++ * while (iter_next(i)) {
++ * if (a == 1) {
++ * a = 0;
++ * b = 1;
++ * } else if (a == 0) {
++ * a = 1;
++ * if (random() == 42)
++ * continue;
++ * if (b == 1) {
++ * *(r10 + c) = 7; // this is not safe
++ * iter_destroy(i);
++ * iter_destroy(j);
++ * return;
++ * }
++ * }
++ * }
++ * iter_destroy(i);
++ * i = iter_new(); // fp[-8]
++ * a = 0; // r6
++ * b = 0; // r7
++ * while (iter_next(i)) {
++ * if (a == 1) {
++ * a = 0;
++ * b = 1;
++ * } else if (a == 0) {
++ * a = 1;
++ * if (random() == 42)
++ * continue;
++ * if (b == 1) {
++ * a = 0;
++ * c = -25;
++ * }
++ * }
++ * }
++ * iter_destroy(i);
++ * }
++ * iter_destroy(j);
++ * return;
++ */
++ asm volatile (
++ "r1 = r10;"
++ "r1 += -16;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "r6 = 0;"
++ "r7 = 0;"
++ "r8 = -24;"
++ "j_loop_%=:"
++ "r1 = r10;"
++ "r1 += -16;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto j_loop_end_%=;"
++
++ /* first inner loop */
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "r6 = 0;"
++ "r7 = 0;"
++ "i_loop_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto i_loop_end_%=;"
++ "check_one_r6_%=:"
++ "if r6 != 1 goto check_zero_r6_%=;"
++ "r6 = 0;"
++ "r7 = 1;"
++ "goto i_loop_%=;"
++ "check_zero_r6_%=:"
++ "if r6 != 0 goto i_loop_%=;"
++ "r6 = 1;"
++ "call %[bpf_get_prandom_u32];"
++ "if r0 != 42 goto check_one_r7_%=;"
++ "goto i_loop_%=;"
++ "check_one_r7_%=:"
++ "if r7 != 1 goto i_loop_%=;"
++ "r0 = r10;"
++ "r0 += r8;"
++ "r1 = 7;"
++ "*(u64 *)(r0 + 0) = r1;"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r1 = r10;"
++ "r1 += -16;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ "i_loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++
++ /* second inner loop */
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "r6 = 0;"
++ "r7 = 0;"
++ "i2_loop_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto i2_loop_end_%=;"
++ "check2_one_r6_%=:"
++ "if r6 != 1 goto check2_zero_r6_%=;"
++ "r6 = 0;"
++ "r7 = 1;"
++ "goto i2_loop_%=;"
++ "check2_zero_r6_%=:"
++ "if r6 != 0 goto i2_loop_%=;"
++ "r6 = 1;"
++ "call %[bpf_get_prandom_u32];"
++ "if r0 != 42 goto check2_one_r7_%=;"
++ "goto i2_loop_%=;"
++ "check2_one_r7_%=:"
++ "if r7 != 1 goto i2_loop_%=;"
++ "r6 = 0;"
++ "r8 = -25;"
++ "goto i2_loop_%=;"
++ "i2_loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++
++ "r6 = 0;"
++ "r7 = 0;"
++ "goto j_loop_%=;"
++ "j_loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -16;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ :
++ : __imm(bpf_get_prandom_u32),
++ __imm(bpf_iter_num_new),
++ __imm(bpf_iter_num_next),
++ __imm(bpf_iter_num_destroy)
++ : __clobber_all
++ );
++}
++
++SEC("?raw_tp")
+ __success
+ __naked int triple_continue(void)
+ {
--- /dev/null
+From 9f3330aa644d6d979eb064c46e85c62d4b4eac75 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:59 +0200
+Subject: selftests/bpf: test widening for iterating callbacks
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 9f3330aa644d6d979eb064c46e85c62d4b4eac75 upstream.
+
+A test case to verify that imprecise scalars widening is applied to
+callback entering state, when callback call is simulated repeatedly.
+
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-10-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c | 20 ++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
++++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+@@ -25,6 +25,7 @@ struct buf_context {
+
+ struct num_context {
+ __u64 i;
++ __u64 j;
+ };
+
+ __u8 choice_arr[2] = { 0, 1 };
+@@ -69,6 +70,25 @@ int unsafe_on_zero_iter(void *unused)
+ return choice_arr[loop_ctx.i];
+ }
+
++static int widening_cb(__u32 idx, struct num_context *ctx)
++{
++ ++ctx->i;
++ return 0;
++}
++
++SEC("?raw_tp")
++__success
++int widening(void *unused)
++{
++ struct num_context loop_ctx = { .i = 0, .j = 1 };
++
++ bpf_loop(100, widening_cb, &loop_ctx, 0);
++ /* loop_ctx.j is not changed during callback iteration,
++ * verifier should not apply widening to it.
++ */
++ return choice_arr[loop_ctx.j];
++}
++
+ static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+ {
+ for (;;) {}
--- /dev/null
+From 958465e217dbf5fc6677d42d8827fb3073d86afd Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:57 +0200
+Subject: selftests/bpf: tests for iterating callbacks
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 958465e217dbf5fc6677d42d8827fb3073d86afd upstream.
+
+A set of test cases to check behavior of callback handling logic,
+check if verifier catches the following situations:
+- program not safe on second callback iteration;
+- program not safe on zero callback iterations;
+- infinite loop inside a callback.
+
+Verify that callback logic works for bpf_loop, bpf_for_each_map_elem,
+bpf_user_ringbuf_drain, bpf_find_vma.
+
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-8-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/prog_tests/verifier.c | 2
+ tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c | 147 ++++++++++
+ 2 files changed, 149 insertions(+)
+ create mode 100644 tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+
+--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
++++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
+@@ -31,6 +31,7 @@
+ #include "verifier_helper_restricted.skel.h"
+ #include "verifier_helper_value_access.skel.h"
+ #include "verifier_int_ptr.skel.h"
++#include "verifier_iterating_callbacks.skel.h"
+ #include "verifier_jeq_infer_not_null.skel.h"
+ #include "verifier_ld_ind.skel.h"
+ #include "verifier_ldsx.skel.h"
+@@ -138,6 +139,7 @@ void test_verifier_helper_packet_access(
+ void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
+ void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); }
+ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
++void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
+ void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
+ void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
+ void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
+--- /dev/null
++++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+@@ -0,0 +1,147 @@
++// SPDX-License-Identifier: GPL-2.0
++
++#include <linux/bpf.h>
++#include <bpf/bpf_helpers.h>
++#include "bpf_misc.h"
++
++struct {
++ __uint(type, BPF_MAP_TYPE_ARRAY);
++ __uint(max_entries, 8);
++ __type(key, __u32);
++ __type(value, __u64);
++} map SEC(".maps");
++
++struct {
++ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
++ __uint(max_entries, 8);
++} ringbuf SEC(".maps");
++
++struct vm_area_struct;
++struct bpf_map;
++
++struct buf_context {
++ char *buf;
++};
++
++struct num_context {
++ __u64 i;
++};
++
++__u8 choice_arr[2] = { 0, 1 };
++
++static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
++{
++ if (idx == 0) {
++ ctx->buf = (char *)(0xDEAD);
++ return 0;
++ }
++
++ if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
++ return 1;
++
++ return 0;
++}
++
++SEC("?raw_tp")
++__failure __msg("R1 type=scalar expected=fp")
++int unsafe_on_2nd_iter(void *unused)
++{
++ char buf[4];
++ struct buf_context loop_ctx = { .buf = buf };
++
++ bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
++ return 0;
++}
++
++static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
++{
++ ctx->i = 0;
++ return 0;
++}
++
++SEC("?raw_tp")
++__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
++int unsafe_on_zero_iter(void *unused)
++{
++ struct num_context loop_ctx = { .i = 32 };
++
++ bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
++ return choice_arr[loop_ctx.i];
++}
++
++static int loop_detection_cb(__u32 idx, struct num_context *ctx)
++{
++ for (;;) {}
++ return 0;
++}
++
++SEC("?raw_tp")
++__failure __msg("infinite loop detected")
++int loop_detection(void *unused)
++{
++ struct num_context loop_ctx = { .i = 0 };
++
++ bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
++ return 0;
++}
++
++static __always_inline __u64 oob_state_machine(struct num_context *ctx)
++{
++ switch (ctx->i) {
++ case 0:
++ ctx->i = 1;
++ break;
++ case 1:
++ ctx->i = 32;
++ break;
++ }
++ return 0;
++}
++
++static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
++{
++ return oob_state_machine(data);
++}
++
++SEC("?raw_tp")
++__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
++int unsafe_for_each_map_elem(void *unused)
++{
++ struct num_context loop_ctx = { .i = 0 };
++
++ bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
++ return choice_arr[loop_ctx.i];
++}
++
++static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
++{
++ return oob_state_machine(data);
++}
++
++SEC("?raw_tp")
++__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
++int unsafe_ringbuf_drain(void *unused)
++{
++ struct num_context loop_ctx = { .i = 0 };
++
++ bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
++ return choice_arr[loop_ctx.i];
++}
++
++static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
++{
++ return oob_state_machine(data);
++}
++
++SEC("?raw_tp")
++__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
++int unsafe_find_vma(void *unused)
++{
++ struct task_struct *task = bpf_get_current_task_btf();
++ struct num_context loop_ctx = { .i = 0 };
++
++ bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
++ return choice_arr[loop_ctx.i];
++}
++
++char _license[] SEC("license") = "GPL";
--- /dev/null
+From 389ede06c2974b2f878a7ebff6b0f4f707f9db74 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 24 Oct 2023 03:09:14 +0300
+Subject: selftests/bpf: tests with delayed read/precision makrs in loop body
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 389ede06c2974b2f878a7ebff6b0f4f707f9db74 upstream.
+
+These test cases try to hide read and precision marks from loop
+convergence logic: marks would only be assigned on subsequent loop
+iterations or after exploring states pushed to env->head stack first.
+Without verifier fix to use exact states comparison logic for
+iterators convergence these tests (except 'triple_continue') would be
+errorneously marked as safe.
+
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231024000917.12153-5-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/progs/iters.c | 518 ++++++++++++++++++++++++++++++
+ 1 file changed, 518 insertions(+)
+
+--- a/tools/testing/selftests/bpf/progs/iters.c
++++ b/tools/testing/selftests/bpf/progs/iters.c
+@@ -14,6 +14,13 @@ int my_pid;
+ int arr[256];
+ int small_arr[16] SEC(".data.small_arr");
+
++struct {
++ __uint(type, BPF_MAP_TYPE_HASH);
++ __uint(max_entries, 10);
++ __type(key, int);
++ __type(value, int);
++} amap SEC(".maps");
++
+ #ifdef REAL_TEST
+ #define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0
+ #else
+@@ -716,4 +723,515 @@ int iter_pass_iter_ptr_to_subprog(const
+ return 0;
+ }
+
++SEC("?raw_tp")
++__failure
++__msg("R1 type=scalar expected=fp")
++__naked int delayed_read_mark(void)
++{
++ /* This is equivalent to C program below.
++ * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead.
++ * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch.
++ * At this point iterator next() call is reached with r7 that has no read mark.
++ * Loop body with r7=0xdead would only be visited if verifier would decide to continue
++ * with second loop iteration. Absence of read mark on r7 might affect state
++ * equivalent logic used for iterator convergence tracking.
++ *
++ * r7 = &fp[-16]
++ * fp[-16] = 0
++ * r6 = bpf_get_prandom_u32()
++ * bpf_iter_num_new(&fp[-8], 0, 10)
++ * while (bpf_iter_num_next(&fp[-8])) {
++ * r6++
++ * if (r6 != 42) {
++ * r7 = 0xdead
++ * continue;
++ * }
++ * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe
++ * }
++ * bpf_iter_num_destroy(&fp[-8])
++ * return 0
++ */
++ asm volatile (
++ "r7 = r10;"
++ "r7 += -16;"
++ "r0 = 0;"
++ "*(u64 *)(r7 + 0) = r0;"
++ "call %[bpf_get_prandom_u32];"
++ "r6 = r0;"
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "1:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto 2f;"
++ "r6 += 1;"
++ "if r6 != 42 goto 3f;"
++ "r7 = 0xdead;"
++ "goto 1b;"
++ "3:"
++ "r1 = r7;"
++ "r2 = 8;"
++ "r3 = 0xdeadbeef;"
++ "call %[bpf_probe_read_user];"
++ "goto 1b;"
++ "2:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ :
++ : __imm(bpf_get_prandom_u32),
++ __imm(bpf_iter_num_new),
++ __imm(bpf_iter_num_next),
++ __imm(bpf_iter_num_destroy),
++ __imm(bpf_probe_read_user)
++ : __clobber_all
++ );
++}
++
++SEC("?raw_tp")
++__failure
++__msg("math between fp pointer and register with unbounded")
++__naked int delayed_precision_mark(void)
++{
++ /* This is equivalent to C program below.
++ * The test is similar to delayed_iter_mark but verifies that incomplete
++ * precision don't fool verifier.
++ * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32.
++ * State with r7=-16 is visited first and follows r6 != 42 ... continue branch.
++ * At this point iterator next() call is reached with r7 that has no read
++ * and precision marks.
++ * Loop body with r7=-32 would only be visited if verifier would decide to continue
++ * with second loop iteration. Absence of precision mark on r7 might affect state
++ * equivalent logic used for iterator convergence tracking.
++ *
++ * r8 = 0
++ * fp[-16] = 0
++ * r7 = -16
++ * r6 = bpf_get_prandom_u32()
++ * bpf_iter_num_new(&fp[-8], 0, 10)
++ * while (bpf_iter_num_next(&fp[-8])) {
++ * if (r6 != 42) {
++ * r7 = -32
++ * r6 = bpf_get_prandom_u32()
++ * continue;
++ * }
++ * r0 = r10
++ * r0 += r7
++ * r8 = *(u64 *)(r0 + 0) // this is not safe
++ * r6 = bpf_get_prandom_u32()
++ * }
++ * bpf_iter_num_destroy(&fp[-8])
++ * return r8
++ */
++ asm volatile (
++ "r8 = 0;"
++ "*(u64 *)(r10 - 16) = r8;"
++ "r7 = -16;"
++ "call %[bpf_get_prandom_u32];"
++ "r6 = r0;"
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "1:"
++ "r1 = r10;"
++ "r1 += -8;\n"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto 2f;"
++ "if r6 != 42 goto 3f;"
++ "r7 = -32;"
++ "call %[bpf_get_prandom_u32];"
++ "r6 = r0;"
++ "goto 1b;\n"
++ "3:"
++ "r0 = r10;"
++ "r0 += r7;"
++ "r8 = *(u64 *)(r0 + 0);"
++ "call %[bpf_get_prandom_u32];"
++ "r6 = r0;"
++ "goto 1b;\n"
++ "2:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = r8;"
++ "exit;"
++ :
++ : __imm(bpf_get_prandom_u32),
++ __imm(bpf_iter_num_new),
++ __imm(bpf_iter_num_next),
++ __imm(bpf_iter_num_destroy),
++ __imm(bpf_probe_read_user)
++ : __clobber_all
++ );
++}
++
++SEC("?raw_tp")
++__failure
++__msg("math between fp pointer and register with unbounded")
++__flag(BPF_F_TEST_STATE_FREQ)
++__naked int loop_state_deps1(void)
++{
++ /* This is equivalent to C program below.
++ *
++ * The case turns out to be tricky in a sense that:
++ * - states with c=-25 are explored only on a second iteration
++ * of the outer loop;
++ * - states with read+precise mark on c are explored only on
++ * second iteration of the inner loop and in a state which
++ * is pushed to states stack first.
++ *
++ * Depending on the details of iterator convergence logic
++ * verifier might stop states traversal too early and miss
++ * unsafe c=-25 memory access.
++ *
++ * j = iter_new(); // fp[-16]
++ * a = 0; // r6
++ * b = 0; // r7
++ * c = -24; // r8
++ * while (iter_next(j)) {
++ * i = iter_new(); // fp[-8]
++ * a = 0; // r6
++ * b = 0; // r7
++ * while (iter_next(i)) {
++ * if (a == 1) {
++ * a = 0;
++ * b = 1;
++ * } else if (a == 0) {
++ * a = 1;
++ * if (random() == 42)
++ * continue;
++ * if (b == 1) {
++ * *(r10 + c) = 7; // this is not safe
++ * iter_destroy(i);
++ * iter_destroy(j);
++ * return;
++ * }
++ * }
++ * }
++ * iter_destroy(i);
++ * a = 0;
++ * b = 0;
++ * c = -25;
++ * }
++ * iter_destroy(j);
++ * return;
++ */
++ asm volatile (
++ "r1 = r10;"
++ "r1 += -16;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "r6 = 0;"
++ "r7 = 0;"
++ "r8 = -24;"
++ "j_loop_%=:"
++ "r1 = r10;"
++ "r1 += -16;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto j_loop_end_%=;"
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "r6 = 0;"
++ "r7 = 0;"
++ "i_loop_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto i_loop_end_%=;"
++ "check_one_r6_%=:"
++ "if r6 != 1 goto check_zero_r6_%=;"
++ "r6 = 0;"
++ "r7 = 1;"
++ "goto i_loop_%=;"
++ "check_zero_r6_%=:"
++ "if r6 != 0 goto i_loop_%=;"
++ "r6 = 1;"
++ "call %[bpf_get_prandom_u32];"
++ "if r0 != 42 goto check_one_r7_%=;"
++ "goto i_loop_%=;"
++ "check_one_r7_%=:"
++ "if r7 != 1 goto i_loop_%=;"
++ "r0 = r10;"
++ "r0 += r8;"
++ "r1 = 7;"
++ "*(u64 *)(r0 + 0) = r1;"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r1 = r10;"
++ "r1 += -16;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ "i_loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r6 = 0;"
++ "r7 = 0;"
++ "r8 = -25;"
++ "goto j_loop_%=;"
++ "j_loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -16;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ :
++ : __imm(bpf_get_prandom_u32),
++ __imm(bpf_iter_num_new),
++ __imm(bpf_iter_num_next),
++ __imm(bpf_iter_num_destroy)
++ : __clobber_all
++ );
++}
++
++SEC("?raw_tp")
++__success
++__naked int triple_continue(void)
++{
++ /* This is equivalent to C program below.
++ * High branching factor of the loop body turned out to be
++ * problematic for one of the iterator convergence tracking
++ * algorithms explored.
++ *
++ * r6 = bpf_get_prandom_u32()
++ * bpf_iter_num_new(&fp[-8], 0, 10)
++ * while (bpf_iter_num_next(&fp[-8])) {
++ * if (bpf_get_prandom_u32() != 42)
++ * continue;
++ * if (bpf_get_prandom_u32() != 42)
++ * continue;
++ * if (bpf_get_prandom_u32() != 42)
++ * continue;
++ * r0 += 0;
++ * }
++ * bpf_iter_num_destroy(&fp[-8])
++ * return 0
++ */
++ asm volatile (
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "loop_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto loop_end_%=;"
++ "call %[bpf_get_prandom_u32];"
++ "if r0 != 42 goto loop_%=;"
++ "call %[bpf_get_prandom_u32];"
++ "if r0 != 42 goto loop_%=;"
++ "call %[bpf_get_prandom_u32];"
++ "if r0 != 42 goto loop_%=;"
++ "r0 += 0;"
++ "goto loop_%=;"
++ "loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ :
++ : __imm(bpf_get_prandom_u32),
++ __imm(bpf_iter_num_new),
++ __imm(bpf_iter_num_next),
++ __imm(bpf_iter_num_destroy)
++ : __clobber_all
++ );
++}
++
++SEC("?raw_tp")
++__success
++__naked int widen_spill(void)
++{
++ /* This is equivalent to C program below.
++ * The counter is stored in fp[-16], if this counter is not widened
++ * verifier states representing loop iterations would never converge.
++ *
++ * fp[-16] = 0
++ * bpf_iter_num_new(&fp[-8], 0, 10)
++ * while (bpf_iter_num_next(&fp[-8])) {
++ * r0 = fp[-16];
++ * r0 += 1;
++ * fp[-16] = r0;
++ * }
++ * bpf_iter_num_destroy(&fp[-8])
++ * return 0
++ */
++ asm volatile (
++ "r0 = 0;"
++ "*(u64 *)(r10 - 16) = r0;"
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "loop_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto loop_end_%=;"
++ "r0 = *(u64 *)(r10 - 16);"
++ "r0 += 1;"
++ "*(u64 *)(r10 - 16) = r0;"
++ "goto loop_%=;"
++ "loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ :
++ : __imm(bpf_iter_num_new),
++ __imm(bpf_iter_num_next),
++ __imm(bpf_iter_num_destroy)
++ : __clobber_all
++ );
++}
++
++SEC("raw_tp")
++__success
++__naked int checkpoint_states_deletion(void)
++{
++ /* This is equivalent to C program below.
++ *
++ * int *a, *b, *c, *d, *e, *f;
++ * int i, sum = 0;
++ * bpf_for(i, 0, 10) {
++ * a = bpf_map_lookup_elem(&amap, &i);
++ * b = bpf_map_lookup_elem(&amap, &i);
++ * c = bpf_map_lookup_elem(&amap, &i);
++ * d = bpf_map_lookup_elem(&amap, &i);
++ * e = bpf_map_lookup_elem(&amap, &i);
++ * f = bpf_map_lookup_elem(&amap, &i);
++ * if (a) sum += 1;
++ * if (b) sum += 1;
++ * if (c) sum += 1;
++ * if (d) sum += 1;
++ * if (e) sum += 1;
++ * if (f) sum += 1;
++ * }
++ * return 0;
++ *
++ * The body of the loop spawns multiple simulation paths
++ * with different combination of NULL/non-NULL information for a/b/c/d/e/f.
++ * Each combination is unique from states_equal() point of view.
++ * Explored states checkpoint is created after each iterator next call.
++ * Iterator convergence logic expects that eventually current state
++ * would get equal to one of the explored states and thus loop
++ * exploration would be finished (at-least for a specific path).
++ * Verifier evicts explored states with high miss to hit ratio
++ * to to avoid comparing current state with too many explored
++ * states per instruction.
++ * This test is designed to "stress test" eviction policy defined using formula:
++ *
++ * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted
++ *
++ * Currently N is set to 64, which allows for 6 variables in this test.
++ */
++ asm volatile (
++ "r6 = 0;" /* a */
++ "r7 = 0;" /* b */
++ "r8 = 0;" /* c */
++ "*(u64 *)(r10 - 24) = r6;" /* d */
++ "*(u64 *)(r10 - 32) = r6;" /* e */
++ "*(u64 *)(r10 - 40) = r6;" /* f */
++ "r9 = 0;" /* sum */
++ "r1 = r10;"
++ "r1 += -8;"
++ "r2 = 0;"
++ "r3 = 10;"
++ "call %[bpf_iter_num_new];"
++ "loop_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_next];"
++ "if r0 == 0 goto loop_end_%=;"
++
++ "*(u64 *)(r10 - 16) = r0;"
++
++ "r1 = %[amap] ll;"
++ "r2 = r10;"
++ "r2 += -16;"
++ "call %[bpf_map_lookup_elem];"
++ "r6 = r0;"
++
++ "r1 = %[amap] ll;"
++ "r2 = r10;"
++ "r2 += -16;"
++ "call %[bpf_map_lookup_elem];"
++ "r7 = r0;"
++
++ "r1 = %[amap] ll;"
++ "r2 = r10;"
++ "r2 += -16;"
++ "call %[bpf_map_lookup_elem];"
++ "r8 = r0;"
++
++ "r1 = %[amap] ll;"
++ "r2 = r10;"
++ "r2 += -16;"
++ "call %[bpf_map_lookup_elem];"
++ "*(u64 *)(r10 - 24) = r0;"
++
++ "r1 = %[amap] ll;"
++ "r2 = r10;"
++ "r2 += -16;"
++ "call %[bpf_map_lookup_elem];"
++ "*(u64 *)(r10 - 32) = r0;"
++
++ "r1 = %[amap] ll;"
++ "r2 = r10;"
++ "r2 += -16;"
++ "call %[bpf_map_lookup_elem];"
++ "*(u64 *)(r10 - 40) = r0;"
++
++ "if r6 == 0 goto +1;"
++ "r9 += 1;"
++ "if r7 == 0 goto +1;"
++ "r9 += 1;"
++ "if r8 == 0 goto +1;"
++ "r9 += 1;"
++ "r0 = *(u64 *)(r10 - 24);"
++ "if r0 == 0 goto +1;"
++ "r9 += 1;"
++ "r0 = *(u64 *)(r10 - 32);"
++ "if r0 == 0 goto +1;"
++ "r9 += 1;"
++ "r0 = *(u64 *)(r10 - 40);"
++ "if r0 == 0 goto +1;"
++ "r9 += 1;"
++
++ "goto loop_%=;"
++ "loop_end_%=:"
++ "r1 = r10;"
++ "r1 += -8;"
++ "call %[bpf_iter_num_destroy];"
++ "r0 = 0;"
++ "exit;"
++ :
++ : __imm(bpf_map_lookup_elem),
++ __imm(bpf_iter_num_new),
++ __imm(bpf_iter_num_next),
++ __imm(bpf_iter_num_destroy),
++ __imm_addr(amap)
++ : __clobber_all
++ );
++}
++
+ char _license[] SEC("license") = "GPL";
--- /dev/null
+From 87eb0152bcc102ecbda866978f4e54db5a3be1ef Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:52 +0200
+Subject: selftests/bpf: track string payload offset as scalar in strobemeta
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 87eb0152bcc102ecbda866978f4e54db5a3be1ef upstream.
+
+This change prepares strobemeta for update in callbacks verification
+logic. To allow bpf_loop() verification converge when multiple
+callback iterations are considered:
+- track offset inside strobemeta_payload->payload directly as scalar
+ value;
+- at each iteration make sure that remaining
+ strobemeta_payload->payload capacity is sufficient for execution of
+ read_{map,str}_var functions;
+- make sure that offset is tracked as unbound scalar between
+ iterations, otherwise verifier won't be able infer that bpf_loop
+ callback reaches identical states.
+
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-3-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/progs/strobemeta.h | 78 +++++++++++++++----------
+ 1 file changed, 48 insertions(+), 30 deletions(-)
+
+--- a/tools/testing/selftests/bpf/progs/strobemeta.h
++++ b/tools/testing/selftests/bpf/progs/strobemeta.h
+@@ -24,9 +24,11 @@ struct task_struct {};
+ #define STACK_TABLE_EPOCH_SHIFT 20
+ #define STROBE_MAX_STR_LEN 1
+ #define STROBE_MAX_CFGS 32
++#define READ_MAP_VAR_PAYLOAD_CAP \
++ ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+ #define STROBE_MAX_PAYLOAD \
+ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
+- STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
++ STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
+
+ struct strobe_value_header {
+ /*
+@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data,
+- void *payload)
++ size_t off)
+ {
+ void *location;
+ uint64_t len;
+@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str
+ return 0;
+
+ bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
+- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
+ /*
+ * if bpf_probe_read_user_str returns error (<0), due to casting to
+ * unsinged int, it will become big number, so next check is
+@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str
+ return 0;
+
+ data->str_lens[idx] = len;
+- return len;
++ return off + len;
+ }
+
+-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
+- size_t idx, void *tls_base,
+- struct strobe_value_generic *value,
+- struct strobemeta_payload *data,
+- void *payload)
++static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
++ size_t idx, void *tls_base,
++ struct strobe_value_generic *value,
++ struct strobemeta_payload *data,
++ size_t off)
+ {
+ struct strobe_map_descr* descr = &data->map_descrs[idx];
+ struct strobe_map_raw map;
+@@ -397,11 +399,11 @@ static __always_inline void *read_map_va
+
+ location = calc_location(&cfg->map_locs[idx], tls_base);
+ if (!location)
+- return payload;
++ return off;
+
+ bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
+ if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
+- return payload;
++ return off;
+
+ descr->id = map.id;
+ descr->cnt = map.cnt;
+@@ -410,10 +412,10 @@ static __always_inline void *read_map_va
+ data->req_meta_valid = 1;
+ }
+
+- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
+ if (len <= STROBE_MAX_STR_LEN) {
+ descr->tag_len = len;
+- payload += len;
++ off += len;
+ }
+
+ #ifdef NO_UNROLL
+@@ -426,22 +428,22 @@ static __always_inline void *read_map_va
+ break;
+
+ descr->key_lens[i] = 0;
+- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
+ map.entries[i].key);
+ if (len <= STROBE_MAX_STR_LEN) {
+ descr->key_lens[i] = len;
+- payload += len;
++ off += len;
+ }
+ descr->val_lens[i] = 0;
+- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
++ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
+ map.entries[i].val);
+ if (len <= STROBE_MAX_STR_LEN) {
+ descr->val_lens[i] = len;
+- payload += len;
++ off += len;
+ }
+ }
+
+- return payload;
++ return off;
+ }
+
+ #ifdef USE_BPF_LOOP
+@@ -455,14 +457,20 @@ struct read_var_ctx {
+ struct strobemeta_payload *data;
+ void *tls_base;
+ struct strobemeta_cfg *cfg;
+- void *payload;
++ size_t payload_off;
+ /* value gets mutated */
+ struct strobe_value_generic *value;
+ enum read_type type;
+ };
+
+-static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
++static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
+ {
++ /* lose precision info for ctx->payload_off, verifier won't track
++ * double xor, barrier_var() is needed to force clang keep both xors.
++ */
++ ctx->payload_off ^= index;
++ barrier_var(ctx->payload_off);
++ ctx->payload_off ^= index;
+ switch (ctx->type) {
+ case READ_INT_VAR:
+ if (index >= STROBE_MAX_INTS)
+@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index
+ case READ_MAP_VAR:
+ if (index >= STROBE_MAX_MAPS)
+ return 1;
+- ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
+- ctx->value, ctx->data, ctx->payload);
++ if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
++ return 1;
++ ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
++ ctx->value, ctx->data, ctx->payload_off);
+ break;
+ case READ_STR_VAR:
+ if (index >= STROBE_MAX_STRS)
+ return 1;
+- ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
+- ctx->value, ctx->data, ctx->payload);
++ if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
++ return 1;
++ ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
++ ctx->value, ctx->data, ctx->payload_off);
+ break;
+ }
+ return 0;
+@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct tas
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ struct strobe_value_generic value = {0};
+ struct strobemeta_cfg *cfg;
+- void *tls_base, *payload;
++ size_t payload_off;
++ void *tls_base;
+
+ cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
+ if (!cfg)
+@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct tas
+
+ data->int_vals_set_mask = 0;
+ data->req_meta_valid = 0;
+- payload = data->payload;
++ payload_off = 0;
+ /*
+ * we don't have struct task_struct definition, it should be:
+ * tls_base = (void *)task->thread.fsbase;
+@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct tas
+ .tls_base = tls_base,
+ .value = &value,
+ .data = data,
+- .payload = payload,
++ .payload_off = 0,
+ };
+ int err;
+
+@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct tas
+ err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_MAPS)
+ return NULL;
++
++ payload_off = ctx.payload_off;
++ /* this should not really happen, here only to satisfy verifer */
++ if (payload_off > sizeof(data->payload))
++ payload_off = sizeof(data->payload);
+ #else
+ #ifdef NO_UNROLL
+ #pragma clang loop unroll(disable)
+@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct tas
+ #pragma unroll
+ #endif /* NO_UNROLL */
+ for (int i = 0; i < STROBE_MAX_STRS; ++i) {
+- payload += read_str_var(cfg, i, tls_base, &value, data, payload);
++ payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
+ }
+ #ifdef NO_UNROLL
+ #pragma clang loop unroll(disable)
+@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct tas
+ #pragma unroll
+ #endif /* NO_UNROLL */
+ for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
+- payload = read_map_var(cfg, i, tls_base, &value, data, payload);
++ payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
+ }
+ #endif /* USE_BPF_LOOP */
+
+@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct tas
+ * return pointer right after end of payload, so it's possible to
+ * calculate exact amount of useful data that needs to be sent
+ */
+- return payload;
++ return &data->payload[payload_off];
+ }
+
+ SEC("raw_tracepoint/kfree_skb")
--- /dev/null
+From 977bc146d4eb7070118d8a974919b33bb52732b4 Mon Sep 17 00:00:00 2001
+From: Eduard Zingerman <eddyz87@gmail.com>
+Date: Tue, 21 Nov 2023 04:06:51 +0200
+Subject: selftests/bpf: track tcp payload offset as scalar in xdp_synproxy
+
+From: Eduard Zingerman <eddyz87@gmail.com>
+
+commit 977bc146d4eb7070118d8a974919b33bb52732b4 upstream.
+
+This change prepares syncookie_{tc,xdp} for update in callbakcs
+verification logic. To allow bpf_loop() verification converge when
+multiple callback itreations are considered:
+- track offset inside TCP payload explicitly, not as a part of the
+ pointer;
+- make sure that offset does not exceed MAX_PACKET_OFF enforced by
+ verifier;
+- make sure that offset is tracked as unbound scalar between
+ iterations, otherwise verifier won't be able infer that bpf_loop
+ callback reaches identical states.
+
+Acked-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
+Link: https://lore.kernel.org/r/20231121020701.26440-2-eddyz87@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c | 84 +++++++++++-------
+ 1 file changed, 52 insertions(+), 32 deletions(-)
+
+--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
++++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+@@ -53,6 +53,8 @@
+ #define DEFAULT_TTL 64
+ #define MAX_ALLOWED_PORTS 8
+
++#define MAX_PACKET_OFF 0xffff
++
+ #define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+@@ -183,63 +185,76 @@ static __always_inline __u32 tcp_time_st
+ }
+
+ struct tcpopt_context {
+- __u8 *ptr;
+- __u8 *end;
++ void *data;
+ void *data_end;
+ __be32 *tsecr;
+ __u8 wscale;
+ bool option_timestamp;
+ bool option_sack;
++ __u32 off;
+ };
+
+-static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
++static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
+ {
+- __u8 opcode, opsize;
++ __u64 off = ctx->off;
++ __u8 *data;
+
+- if (ctx->ptr >= ctx->end)
+- return 1;
+- if (ctx->ptr >= ctx->data_end)
+- return 1;
++ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
++ if (off > MAX_PACKET_OFF - sz)
++ return NULL;
++
++ data = ctx->data + off;
++ barrier_var(data);
++ if (data + sz >= ctx->data_end)
++ return NULL;
+
+- opcode = ctx->ptr[0];
++ ctx->off += sz;
++ return data;
++}
+
+- if (opcode == TCPOPT_EOL)
+- return 1;
+- if (opcode == TCPOPT_NOP) {
+- ++ctx->ptr;
+- return 0;
+- }
++static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
++{
++ __u8 *opcode, *opsize, *wscale, *tsecr;
++ __u32 off = ctx->off;
+
+- if (ctx->ptr + 1 >= ctx->end)
++ opcode = next(ctx, 1);
++ if (!opcode)
+ return 1;
+- if (ctx->ptr + 1 >= ctx->data_end)
+- return 1;
+- opsize = ctx->ptr[1];
+- if (opsize < 2)
++
++ if (*opcode == TCPOPT_EOL)
+ return 1;
++ if (*opcode == TCPOPT_NOP)
++ return 0;
+
+- if (ctx->ptr + opsize > ctx->end)
++ opsize = next(ctx, 1);
++ if (!opsize || *opsize < 2)
+ return 1;
+
+- switch (opcode) {
++ switch (*opcode) {
+ case TCPOPT_WINDOW:
+- if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
+- ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
++ wscale = next(ctx, 1);
++ if (!wscale)
++ return 1;
++ if (*opsize == TCPOLEN_WINDOW)
++ ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
+ break;
+ case TCPOPT_TIMESTAMP:
+- if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
++ tsecr = next(ctx, 4);
++ if (!tsecr)
++ return 1;
++ if (*opsize == TCPOLEN_TIMESTAMP) {
+ ctx->option_timestamp = true;
+ /* Client's tsval becomes our tsecr. */
+- *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
++ *ctx->tsecr = get_unaligned((__be32 *)tsecr);
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+- if (opsize == TCPOLEN_SACK_PERM)
++ if (*opsize == TCPOLEN_SACK_PERM)
+ ctx->option_sack = true;
+ break;
+ }
+
+- ctx->ptr += opsize;
++ ctx->off = off + *opsize;
+
+ return 0;
+ }
+@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(_
+
+ static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
+ __u16 tcp_len, __be32 *tsval,
+- __be32 *tsecr, void *data_end)
++ __be32 *tsecr, void *data, void *data_end)
+ {
+ struct tcpopt_context loop_ctx = {
+- .ptr = (__u8 *)(tcp_header + 1),
+- .end = (__u8 *)tcp_header + tcp_len,
++ .data = data,
+ .data_end = data_end,
+ .tsecr = tsecr,
+ .wscale = TS_OPT_WSCALE_MASK,
+ .option_timestamp = false,
+ .option_sack = false,
++ /* Note: currently verifier would track .off as unbound scalar.
++ * In case if verifier would at some point get smarter and
++ * compute bounded value for this var, beware that it might
++ * hinder bpf_loop() convergence validation.
++ */
++ .off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
+ };
+ u32 cookie;
+
+@@ -635,7 +655,7 @@ static __always_inline int syncookie_han
+ cookie = (__u32)value;
+
+ if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
+- &tsopt_buf[0], &tsopt_buf[1], data_end))
++ &tsopt_buf[0], &tsopt_buf[1], data, data_end))
+ tsopt = tsopt_buf;
+
+ /* Check that there is enough space for a SYNACK. It also covers
ksmbd-send-lease-break-notification-on-file_rename_information.patch
ksmbd-add-missing-set_freezable-for-freezable-kthread.patch
dt-bindings-net-snps-dwmac-tx-coe-unsupported.patch
+bpf-move-explored_state-closer-to-the-beginning-of-verifier.c.patch
+bpf-extract-same_callsites-as-utility-function.patch
+bpf-exact-states-comparison-for-iterator-convergence-checks.patch
+selftests-bpf-tests-with-delayed-read-precision-makrs-in-loop-body.patch
+bpf-correct-loop-detection-for-iterators-convergence.patch
+selftests-bpf-test-if-state-loops-are-detected-in-a-tricky-case.patch
+bpf-print-full-verifier-states-on-infinite-loop-detection.patch
+selftests-bpf-track-tcp-payload-offset-as-scalar-in-xdp_synproxy.patch
+selftests-bpf-track-string-payload-offset-as-scalar-in-strobemeta.patch
+bpf-extract-__check_reg_arg-utility-function.patch
+bpf-extract-setup_func_entry-utility-function.patch
+bpf-verify-callbacks-as-if-they-are-called-unknown-number-of-times.patch
+selftests-bpf-tests-for-iterating-callbacks.patch
+bpf-widening-for-callback-iterators.patch
+selftests-bpf-test-widening-for-iterating-callbacks.patch
+bpf-keep-track-of-max-number-of-bpf_loop-callback-iterations.patch
+selftests-bpf-check-if-max-number-of-bpf_loop-iterations-is-tracked.patch