]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bpf: compute SCCs in program control flow graph
authorEduard Zingerman <eddyz87@gmail.com>
Wed, 11 Jun 2025 20:08:27 +0000 (13:08 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 12 Jun 2025 23:52:42 +0000 (16:52 -0700)
Compute strongly connected components in the program CFG.
Assign an SCC number to each instruction, recorded in
env->insn_aux[*].scc. Use Tarjan's algorithm for SCC computation
adapted to run non-recursively.

For debug purposes print out computed SCCs as a part of full program
dump in compute_live_registers() at log level 2, e.g.:

  func#0 @0
  Live regs before insn:
        0: .......... (b4) w6 = 10
    2   1: ......6... (18) r1 = 0xffff88810bbb5565
    2   3: .1....6... (b4) w2 = 2
    2   4: .12...6... (85) call bpf_trace_printk#6
    2   5: ......6... (04) w6 += -1
    2   6: ......6... (56) if w6 != 0x0 goto pc-6
        7: .......... (b4) w6 = 5
    1   8: ......6... (18) r1 = 0xffff88810bbb5567
    1  10: .1....6... (b4) w2 = 2
    1  11: .12...6... (85) call bpf_trace_printk#6
    1  12: ......6... (04) w6 += -1
    1  13: ......6... (56) if w6 != 0x0 goto pc-6
       14: .......... (b4) w0 = 0
       15: 0......... (95) exit
   ^^^
  SCC number for the instruction

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20250611200836.4135542-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/bpf_verifier.h
kernel/bpf/verifier.c

index 3e77befdbc4ba91519ed13dae048d2f9f1fc6bad..95f5211610f459b90cd1fce26056d073240576a6 100644 (file)
@@ -609,6 +609,11 @@ struct bpf_insn_aux_data {
         * accepts callback function as a parameter.
         */
        bool calls_callback;
+       /*
+        * CFG strongly connected component this instruction belongs to,
+        * zero if it is a singleton SCC.
+        */
+       u32 scc;
        /* registers alive before this instruction. */
        u16 live_regs_before;
 };
index 92f2dad5f45370ed0a7ed23320a87f1446236e46..75e4f6544b2adaf890a1e67cd77a8a33ca294887 100644 (file)
@@ -24013,6 +24013,10 @@ static int compute_live_registers(struct bpf_verifier_env *env)
        if (env->log.level & BPF_LOG_LEVEL2) {
                verbose(env, "Live regs before insn:\n");
                for (i = 0; i < insn_cnt; ++i) {
+                       if (env->insn_aux_data[i].scc)
+                               verbose(env, "%3d ", env->insn_aux_data[i].scc);
+                       else
+                               verbose(env, "    ");
                        verbose(env, "%3d: ", i);
                        for (j = BPF_REG_0; j < BPF_REG_10; ++j)
                                if (insn_aux[i].live_regs_before & BIT(j))
@@ -24034,6 +24038,180 @@ out:
        return err;
 }
 
+/*
+ * Compute strongly connected components (SCCs) on the CFG.
+ * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
+ * If instruction is a sole member of its SCC and there are no self edges,
+ * assign it SCC number of zero.
+ * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
+ */
+static int compute_scc(struct bpf_verifier_env *env)
+{
+       const u32 NOT_ON_STACK = U32_MAX;
+
+       struct bpf_insn_aux_data *aux = env->insn_aux_data;
+       const u32 insn_cnt = env->prog->len;
+       int stack_sz, dfs_sz, err = 0;
+       u32 *stack, *pre, *low, *dfs;
+       u32 succ_cnt, i, j, t, w;
+       u32 next_preorder_num;
+       u32 next_scc_id;
+       bool assign_scc;
+       u32 succ[2];
+
+       next_preorder_num = 1;
+       next_scc_id = 1;
+       /*
+        * - 'stack' accumulates vertices in DFS order, see invariant comment below;
+        * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
+        * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
+        * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
+        */
+       stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+       pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+       low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+       dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL);
+       if (!stack || !pre || !low || !dfs) {
+               err = -ENOMEM;
+               goto exit;
+       }
+       /*
+        * References:
+        * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
+        * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
+        *
+        * The algorithm maintains the following invariant:
+        * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
+        * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
+        *
+        * Consequently:
+        * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
+        *   such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
+        *   and thus there is an SCC (loop) containing both 'u' and 'v'.
+        * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
+        *   and 'v' can be considered the root of some SCC.
+        *
+        * Here is a pseudo-code for an explicitly recursive version of the algorithm:
+        *
+        *    NOT_ON_STACK = insn_cnt + 1
+        *    pre = [0] * insn_cnt
+        *    low = [0] * insn_cnt
+        *    scc = [0] * insn_cnt
+        *    stack = []
+        *
+        *    next_preorder_num = 1
+        *    next_scc_id = 1
+        *
+        *    def recur(w):
+        *        nonlocal next_preorder_num
+        *        nonlocal next_scc_id
+        *
+        *        pre[w] = next_preorder_num
+        *        low[w] = next_preorder_num
+        *        next_preorder_num += 1
+        *        stack.append(w)
+        *        for s in successors(w):
+        *            # Note: for classic algorithm the block below should look as:
+        *            #
+        *            # if pre[s] == 0:
+        *            #     recur(s)
+        *            #     low[w] = min(low[w], low[s])
+        *            # elif low[s] != NOT_ON_STACK:
+        *            #     low[w] = min(low[w], pre[s])
+        *            #
+        *            # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
+        *            # does not break the invariant and makes itartive version of the algorithm
+        *            # simpler. See 'Algorithm #3' from [2].
+        *
+        *            # 's' not yet visited
+        *            if pre[s] == 0:
+        *                recur(s)
+        *            # if 's' is on stack, pick lowest reachable preorder number from it;
+        *            # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
+        *            # so 'min' would be a noop.
+        *            low[w] = min(low[w], low[s])
+        *
+        *        if low[w] == pre[w]:
+        *            # 'w' is the root of an SCC, pop all vertices
+        *            # below 'w' on stack and assign same SCC to them.
+        *            while True:
+        *                t = stack.pop()
+        *                low[t] = NOT_ON_STACK
+        *                scc[t] = next_scc_id
+        *                if t == w:
+        *                    break
+        *            next_scc_id += 1
+        *
+        *    for i in range(0, insn_cnt):
+        *        if pre[i] == 0:
+        *            recur(i)
+        *
+        * Below implementation replaces explicit recusion with array 'dfs'.
+        */
+       for (i = 0; i < insn_cnt; i++) {
+               if (pre[i])
+                       continue;
+               stack_sz = 0;
+               dfs_sz = 1;
+               dfs[0] = i;
+dfs_continue:
+               while (dfs_sz) {
+                       w = dfs[dfs_sz - 1];
+                       if (pre[w] == 0) {
+                               low[w] = next_preorder_num;
+                               pre[w] = next_preorder_num;
+                               next_preorder_num++;
+                               stack[stack_sz++] = w;
+                       }
+                       /* Visit 'w' successors */
+                       succ_cnt = insn_successors(env->prog, w, succ);
+                       for (j = 0; j < succ_cnt; ++j) {
+                               if (pre[succ[j]]) {
+                                       low[w] = min(low[w], low[succ[j]]);
+                               } else {
+                                       dfs[dfs_sz++] = succ[j];
+                                       goto dfs_continue;
+                               }
+                       }
+                       /*
+                        * Preserve the invariant: if some vertex above in the stack
+                        * is reachable from 'w', keep 'w' on the stack.
+                        */
+                       if (low[w] < pre[w]) {
+                               dfs_sz--;
+                               goto dfs_continue;
+                       }
+                       /*
+                        * Assign SCC number only if component has two or more elements,
+                        * or if component has a self reference.
+                        */
+                       assign_scc = stack[stack_sz - 1] != w;
+                       for (j = 0; j < succ_cnt; ++j) {
+                               if (succ[j] == w) {
+                                       assign_scc = true;
+                                       break;
+                               }
+                       }
+                       /* Pop component elements from stack */
+                       do {
+                               t = stack[--stack_sz];
+                               low[t] = NOT_ON_STACK;
+                               if (assign_scc)
+                                       aux[t].scc = next_scc_id;
+                       } while (t != w);
+                       if (assign_scc)
+                               next_scc_id++;
+                       dfs_sz--;
+               }
+       }
+exit:
+       kvfree(stack);
+       kvfree(pre);
+       kvfree(low);
+       kvfree(dfs);
+       return err;
+}
+
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
        u64 start_time = ktime_get_ns();
@@ -24155,6 +24333,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
        if (ret)
                goto skip_full_check;
 
+       ret = compute_scc(env);
+       if (ret < 0)
+               goto skip_full_check;
+
        ret = compute_live_registers(env);
        if (ret < 0)
                goto skip_full_check;