bpf: introduce forward arg-tracking dataflow analysis

author Eduard Zingerman <eddyz87@gmail.com>

Fri, 10 Apr 2026 20:55:58 +0000 (13:55 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Fri, 10 Apr 2026 22:05:05 +0000 (15:05 -0700)
author Eduard Zingerman <eddyz87@gmail.com>
Fri, 10 Apr 2026 20:55:58 +0000 (13:55 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Fri, 10 Apr 2026 22:05:05 +0000 (15:05 -0700)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index 7b31d8024c616a613a9a6dd8b7c2eb7e65b59ac3..49b19118c326db9748c9fc3b395f21756ff80504 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -226,6 +226,7 @@ enum bpf_stack_slot_type {
  
  /* 4-byte stack slot granularity for liveness analysis */
  #define BPF_HALF_REG_SIZE      4
+#define STACK_SLOT_SZ          4
  #define STACK_SLOTS            (MAX_BPF_STACK / BPF_HALF_REG_SIZE)     /* 128 */
  
  typedef struct {
@@ -886,6 +887,8 @@ struct bpf_verifier_env {
         } cfg;
         struct backtrack_state bt;
         struct bpf_jmp_history_entry *cur_hist_ent;
+       /* Per-callsite copy of parent's converged at_stack_in for cross-frame fills. */
+       struct arg_track **callsite_at_stack;
         u32 pass_cnt; /* number of times do_check() was called */
         u32 subprog_cnt;
         /* number of instructions analyzed by the verifier */
@@ -1213,6 +1216,7 @@ s64 bpf_helper_stack_access_bytes(struct bpf_verifier_env *env,
  s64 bpf_kfunc_stack_access_bytes(struct bpf_verifier_env *env,
                                  struct bpf_insn *insn, int arg,
                                  int insn_idx);
+int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env);
  
  int bpf_stack_liveness_init(struct bpf_verifier_env *env);
  void bpf_stack_liveness_free(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c

index a3af5972520f3c19d4d5fc003d2ca8088d6b4489..c5d6760454d69d84d3909eed9146d5bb99cfe381 100644 (file)
--- a/kernel/bpf/liveness.c
+++ b/kernel/bpf/liveness.c
@@ -2,10 +2,13 @@
  /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
  
  #include <linux/bpf_verifier.h>
+#include <linux/btf.h>
  #include <linux/hashtable.h>
  #include <linux/jhash.h>
  #include <linux/slab.h>
  
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
  /*
   * This file implements live stack slots analysis. After accumulating
   * stack usage data, the analysis answers queries about whether a
@@ -107,6 +110,7 @@ struct per_frame_masks {
  struct func_instance {
         struct hlist_node hl_node;
         struct callchain callchain;
+       u32 subprog;            /* subprog index */
         u32 insn_cnt;           /* cached number of insns in the function */
         bool updated;
         bool must_write_dropped;
@@ -133,6 +137,7 @@ struct bpf_liveness {
          */
         spis_t write_masks_acc[MAX_CALL_FRAMES];
         u32 write_insn_idx;
+       u32 subprog_calls;                              /* analyze_subprog() invocations */
  };
  
  /* Compute callchain corresponding to state @st at depth @frameno */
@@ -200,11 +205,30 @@ static struct func_instance *__lookup_instance(struct bpf_verifier_env *env,
                 return ERR_PTR(-ENOMEM);
         }
         memcpy(&result->callchain, callchain, sizeof(*callchain));
+       result->subprog = subprog - env->subprog_info;
         result->insn_cnt = subprog_sz;
         hash_add(liveness->func_instances, &result->hl_node, key);
         return result;
  }
  
+static struct func_instance *call_instance(struct bpf_verifier_env *env,
+                                          struct func_instance *caller,
+                                          u32 callsite, int subprog)
+{
+       struct callchain cc;
+
+       if (caller) {
+               cc = caller->callchain;
+               cc.callsites[cc.curframe] = callsite;
+               cc.curframe++;
+       } else {
+               memset(&cc, 0, sizeof(cc));
+       }
+       cc.sp_starts[cc.curframe] = env->subprog_info[subprog].start;
+       cc.callsites[cc.curframe] = cc.sp_starts[cc.curframe];
+       return __lookup_instance(env, &cc);
+}
+
  static struct func_instance *lookup_instance(struct bpf_verifier_env *env,
                                              struct bpf_verifier_state *st,
                                              u32 frameno)
@@ -786,3 +810,1038 @@ bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 half_sp
  
         return false;
  }
+
+/*
+ * Per-register tracking state for compute_subprog_args().
+ * Tracks which frame's FP a value is derived from
+ * and the byte offset from that frame's FP.
+ *
+ * The .frame field forms a lattice with three levels of precision:
+ *
+ *   precise {frame=N, off=V}      -- known absolute frame index and byte offset
+ *        |
+ *   offset-imprecise {frame=N, off=OFF_IMPRECISE}
+ *        |                        -- known frame identity, unknown offset
+ *   fully-imprecise {frame=ARG_IMPRECISE, mask=bitmask}
+ *                                 -- unknown frame identity; .mask is a
+ *                                    bitmask of which frame indices might be
+ *                                    involved
+ *
+ * At CFG merge points, arg_track_join() moves down the lattice:
+ *   - same frame + same offset  -> precise
+ *   - same frame + different offset -> offset-imprecise
+ *   - different frames          -> fully-imprecise (bitmask OR)
+ *
+ * At memory access sites (LDX/STX/ST), offset-imprecise marks only
+ * the known frame's access mask as SPIS_ALL, while fully-imprecise
+ * iterates bits in the bitmask and routes each frame to its target.
+ */
+#define MAX_ARG_OFFSETS 4
+
+struct arg_track {
+       union {
+               s16 off[MAX_ARG_OFFSETS]; /* byte offsets; off_cnt says how many */
+               u16 mask;       /* arg bitmask when arg == ARG_IMPRECISE */
+       };
+       s8 frame;       /* absolute frame index, or enum arg_track_state */
+       s8 off_cnt;     /* 0 = offset-imprecise, 1-4 = # of precise offsets */
+};
+
+enum arg_track_state {
+       ARG_NONE        = -1,   /* not derived from any argument */
+       ARG_UNVISITED   = -2,   /* not yet reached by dataflow */
+       ARG_IMPRECISE   = -3,   /* lost identity; .mask is arg bitmask */
+};
+
+#define OFF_IMPRECISE  S16_MIN /* arg identity known but offset unknown */
+
+/* Track callee stack slots fp-8 through fp-512 (64 slots of 8 bytes each) */
+#define MAX_ARG_SPILL_SLOTS 64
+
+static bool arg_is_visited(const struct arg_track *at)
+{
+       return at->frame != ARG_UNVISITED;
+}
+
+static bool arg_is_fp(const struct arg_track *at)
+{
+       return at->frame >= 0 || at->frame == ARG_IMPRECISE;
+}
+
+/*
+ * Clear all tracked callee stack slots overlapping the byte range
+ * [off, off+sz-1] where off is a negative FP-relative offset.
+ */
+static void clear_overlapping_stack_slots(struct arg_track *at_stack, s16 off, u32 sz)
+{
+       struct arg_track none = { .frame = ARG_NONE };
+
+       if (off == OFF_IMPRECISE) {
+               for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++)
+                       at_stack[i] = none;
+               return;
+       }
+       for (int i = 0; i < MAX_ARG_SPILL_SLOTS; i++) {
+               int slot_start = -((i + 1) * 8);
+               int slot_end = slot_start + 8;
+
+               if (slot_start < off + (int)sz && slot_end > off)
+                       at_stack[i] = none;
+       }
+}
+
+static void verbose_arg_track(struct bpf_verifier_env *env, struct arg_track *at)
+{
+       int i;
+
+       switch (at->frame) {
+       case ARG_NONE:      verbose(env, "_");                          break;
+       case ARG_UNVISITED: verbose(env, "?");                          break;
+       case ARG_IMPRECISE: verbose(env, "IMP%x", at->mask);            break;
+       default:
+               /* frame >= 0: absolute frame index */
+               if (at->off_cnt == 0) {
+                       verbose(env, "fp%d ?", at->frame);
+               } else {
+                       for (i = 0; i < at->off_cnt; i++) {
+                               if (i)
+                                       verbose(env, "|");
+                               verbose(env, "fp%d%+d", at->frame, at->off[i]);
+                       }
+               }
+               break;
+       }
+}
+
+static bool arg_track_eq(const struct arg_track *a, const struct arg_track *b)
+{
+       int i;
+
+       if (a->frame != b->frame)
+               return false;
+       if (a->frame == ARG_IMPRECISE)
+               return a->mask == b->mask;
+       if (a->frame < 0)
+               return true;
+       if (a->off_cnt != b->off_cnt)
+               return false;
+       for (i = 0; i < a->off_cnt; i++)
+               if (a->off[i] != b->off[i])
+                       return false;
+       return true;
+}
+
+static struct arg_track arg_single(s8 arg, s16 off)
+{
+       struct arg_track at = {};
+
+       at.frame = arg;
+       at.off[0] = off;
+       at.off_cnt = 1;
+       return at;
+}
+
+/*
+ * Merge two sorted offset arrays, deduplicate.
+ * Returns off_cnt=0 if the result exceeds MAX_ARG_OFFSETS.
+ * Both args must have the same frame and off_cnt > 0.
+ */
+static struct arg_track arg_merge_offsets(struct arg_track a, struct arg_track b)
+{
+       struct arg_track result = { .frame = a.frame };
+       struct arg_track imp = { .frame = a.frame };
+       int i = 0, j = 0, k = 0;
+
+       while (i < a.off_cnt && j < b.off_cnt) {
+               s16 v;
+
+               if (a.off[i] <= b.off[j]) {
+                       v = a.off[i++];
+                       if (v == b.off[j])
+                               j++;
+               } else {
+                       v = b.off[j++];
+               }
+               if (k > 0 && result.off[k - 1] == v)
+                       continue;
+               if (k >= MAX_ARG_OFFSETS)
+                       return imp;
+               result.off[k++] = v;
+       }
+       while (i < a.off_cnt) {
+               if (k >= MAX_ARG_OFFSETS)
+                       return imp;
+               result.off[k++] = a.off[i++];
+       }
+       while (j < b.off_cnt) {
+               if (k >= MAX_ARG_OFFSETS)
+                       return imp;
+               result.off[k++] = b.off[j++];
+       }
+       result.off_cnt = k;
+       return result;
+}
+
+/*
+ * Merge two arg_tracks into ARG_IMPRECISE, collecting the frame
+ * bits from both operands. Precise frame indices (frame >= 0)
+ * contribute a single bit; existing ARG_IMPRECISE values
+ * contribute their full bitmask.
+ */
+static struct arg_track arg_join_imprecise(struct arg_track a, struct arg_track b)
+{
+       u32 m = 0;
+
+       if (a.frame >= 0)
+               m |= BIT(a.frame);
+       else if (a.frame == ARG_IMPRECISE)
+               m |= a.mask;
+
+       if (b.frame >= 0)
+               m |= BIT(b.frame);
+       else if (b.frame == ARG_IMPRECISE)
+               m |= b.mask;
+
+       return (struct arg_track){ .mask = m, .frame = ARG_IMPRECISE };
+}
+
+/* Join two arg_track values at merge points */
+static struct arg_track __arg_track_join(struct arg_track a, struct arg_track b)
+{
+       if (!arg_is_visited(&b))
+               return a;
+       if (!arg_is_visited(&a))
+               return b;
+       if (a.frame == b.frame && a.frame >= 0) {
+               /* Both offset-imprecise: stay imprecise */
+               if (a.off_cnt == 0 || b.off_cnt == 0)
+                       return (struct arg_track){ .frame = a.frame };
+               /* Merge offset sets; falls back to off_cnt=0 if >4 */
+               return arg_merge_offsets(a, b);
+       }
+
+       /*
+        * args are different, but one of them is known
+        * arg + none -> arg
+        * none + arg -> arg
+        *
+        * none + none -> none
+        */
+       if (a.frame == ARG_NONE && b.frame == ARG_NONE)
+               return a;
+       if (a.frame >= 0 && b.frame == ARG_NONE) {
+               /*
+                * When joining single fp-N add fake fp+0 to
+                * keep stack_use and prevent stack_def
+                */
+               if (a.off_cnt == 1)
+                       return arg_merge_offsets(a, arg_single(a.frame, 0));
+               return a;
+       }
+       if (b.frame >= 0 && a.frame == ARG_NONE) {
+               if (b.off_cnt == 1)
+                       return arg_merge_offsets(b, arg_single(b.frame, 0));
+               return b;
+       }
+
+       return arg_join_imprecise(a, b);
+}
+
+static bool arg_track_join(struct bpf_verifier_env *env, int idx, int target, int r,
+                          struct arg_track *in, struct arg_track out)
+{
+       struct arg_track old = *in;
+       struct arg_track new_val = __arg_track_join(old, out);
+
+       if (arg_track_eq(&new_val, &old))
+               return false;
+
+       *in = new_val;
+       if (!(env->log.level & BPF_LOG_LEVEL2) || !arg_is_visited(&old))
+               return true;
+
+       verbose(env, "arg JOIN insn %d -> %d ", idx, target);
+       if (r >= 0)
+               verbose(env, "r%d: ", r);
+       else
+               verbose(env, "fp%+d: ", r * 8);
+       verbose_arg_track(env, &old);
+       verbose(env, " + ");
+       verbose_arg_track(env, &out);
+       verbose(env, " => ");
+       verbose_arg_track(env, &new_val);
+       verbose(env, "\n");
+       return true;
+}
+
+/*
+ * Compute the result when an ALU op destroys offset precision.
+ * If a single arg is identifiable, preserve it with OFF_IMPRECISE.
+ * If two different args are involved or one is already ARG_IMPRECISE,
+ * the result is fully ARG_IMPRECISE.
+ */
+static void arg_track_alu64(struct arg_track *dst, const struct arg_track *src)
+{
+       WARN_ON_ONCE(!arg_is_visited(dst));
+       WARN_ON_ONCE(!arg_is_visited(src));
+
+       if (dst->frame >= 0 && (src->frame == ARG_NONE || src->frame == dst->frame)) {
+               /*
+                * rX += rY where rY is not arg derived
+                * rX += rX
+                */
+               dst->off_cnt = 0;
+               return;
+       }
+       if (src->frame >= 0 && dst->frame == ARG_NONE) {
+               /*
+                * rX += rY where rX is not arg derived
+                * rY identity leaks into rX
+                */
+               dst->off_cnt = 0;
+               dst->frame = src->frame;
+               return;
+       }
+
+       if (dst->frame == ARG_NONE && src->frame == ARG_NONE)
+               return;
+
+       *dst = arg_join_imprecise(*dst, *src);
+}
+
+static s16 arg_add(s16 off, s64 delta)
+{
+       s64 res;
+
+       if (off == OFF_IMPRECISE)
+               return OFF_IMPRECISE;
+       res = (s64)off + delta;
+       if (res < S16_MIN + 1 || res > S16_MAX)
+               return OFF_IMPRECISE;
+       return res;
+}
+
+static void arg_padd(struct arg_track *at, s64 delta)
+{
+       int i;
+
+       if (at->off_cnt == 0)
+               return;
+       for (i = 0; i < at->off_cnt; i++) {
+               s16 new_off = arg_add(at->off[i], delta);
+
+               if (new_off == OFF_IMPRECISE) {
+                       at->off_cnt = 0;
+                       return;
+               }
+               at->off[i] = new_off;
+       }
+}
+
+/*
+ * Convert a byte offset from FP to a callee stack slot index.
+ * Returns -1 if out of range or not 8-byte aligned.
+ * Slot 0 = fp-8, slot 1 = fp-16, ..., slot 7 = fp-64, ....
+ */
+static int fp_off_to_slot(s16 off)
+{
+       if (off == OFF_IMPRECISE)
+               return -1;
+       if (off >= 0 || off < -(int)(MAX_ARG_SPILL_SLOTS * 8))
+               return -1;
+       if (off % 8)
+               return -1;
+       return (-off) / 8 - 1;
+}
+
+static struct arg_track fill_from_stack(struct bpf_insn *insn,
+                                       struct arg_track *at_out, int reg,
+                                       struct arg_track *at_stack_out,
+                                       int depth)
+{
+       struct arg_track imp = {
+               .mask = (1u << (depth + 1)) - 1,
+               .frame = ARG_IMPRECISE
+       };
+       struct arg_track result = { .frame = ARG_NONE };
+       int cnt, i;
+
+       if (reg == BPF_REG_FP) {
+               int slot = fp_off_to_slot(insn->off);
+
+               return slot >= 0 ? at_stack_out[slot] : imp;
+       }
+       cnt = at_out[reg].off_cnt;
+       if (cnt == 0)
+               return imp;
+
+       for (i = 0; i < cnt; i++) {
+               s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+               int slot = fp_off_to_slot(fp_off);
+
+               if (slot < 0)
+                       return imp;
+               result = __arg_track_join(result, at_stack_out[slot]);
+       }
+       return result;
+}
+
+/*
+ * Spill @val to all possible stack slots indicated by the FP offsets in @reg.
+ * For an 8-byte store, single candidate slot gets @val. multi-slots are joined.
+ * sub-8-byte store joins with ARG_NONE.
+ * When exact offset is unknown conservatively add reg values to all slots in at_stack_out.
+ */
+static void spill_to_stack(struct bpf_insn *insn, struct arg_track *at_out,
+                          int reg, struct arg_track *at_stack_out,
+                          struct arg_track *val, u32 sz)
+{
+       struct arg_track none = { .frame = ARG_NONE };
+       struct arg_track new_val = sz == 8 ? *val : none;
+       int cnt, i;
+
+       if (reg == BPF_REG_FP) {
+               int slot = fp_off_to_slot(insn->off);
+
+               if (slot >= 0)
+                       at_stack_out[slot] = new_val;
+               return;
+       }
+       cnt = at_out[reg].off_cnt;
+       if (cnt == 0) {
+               for (int slot = 0; slot < MAX_ARG_SPILL_SLOTS; slot++)
+                       at_stack_out[slot] = __arg_track_join(at_stack_out[slot], new_val);
+               return;
+       }
+       for (i = 0; i < cnt; i++) {
+               s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+               int slot = fp_off_to_slot(fp_off);
+
+               if (slot < 0)
+                       continue;
+               if (cnt == 1)
+                       at_stack_out[slot] = new_val;
+               else
+                       at_stack_out[slot] = __arg_track_join(at_stack_out[slot], new_val);
+       }
+}
+
+/*
+ * Clear stack slots overlapping all possible FP offsets in @reg.
+ */
+static void clear_stack_for_all_offs(struct bpf_insn *insn,
+                                    struct arg_track *at_out, int reg,
+                                    struct arg_track *at_stack_out, u32 sz)
+{
+       int cnt, i;
+
+       if (reg == BPF_REG_FP) {
+               clear_overlapping_stack_slots(at_stack_out, insn->off, sz);
+               return;
+       }
+       cnt = at_out[reg].off_cnt;
+       if (cnt == 0) {
+               clear_overlapping_stack_slots(at_stack_out, OFF_IMPRECISE, sz);
+               return;
+       }
+       for (i = 0; i < cnt; i++) {
+               s16 fp_off = arg_add(at_out[reg].off[i], insn->off);
+
+               clear_overlapping_stack_slots(at_stack_out, fp_off, sz);
+       }
+}
+
+static void arg_track_log(struct bpf_verifier_env *env, struct bpf_insn *insn, int idx,
+                         struct arg_track *at_in, struct arg_track *at_stack_in,
+                         struct arg_track *at_out, struct arg_track *at_stack_out)
+{
+       bool printed = false;
+       int i;
+
+       if (!(env->log.level & BPF_LOG_LEVEL2))
+               return;
+       for (i = 0; i < MAX_BPF_REG; i++) {
+               if (arg_track_eq(&at_out[i], &at_in[i]))
+                       continue;
+               if (!printed) {
+                       verbose(env, "%3d: ", idx);
+                       bpf_verbose_insn(env, insn);
+                       bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+                       printed = true;
+               }
+               verbose(env, "\tr%d: ", i); verbose_arg_track(env, &at_in[i]);
+               verbose(env, " -> "); verbose_arg_track(env, &at_out[i]);
+       }
+       for (i = 0; i < MAX_ARG_SPILL_SLOTS; i++) {
+               if (arg_track_eq(&at_stack_out[i], &at_stack_in[i]))
+                       continue;
+               if (!printed) {
+                       verbose(env, "%3d: ", idx);
+                       bpf_verbose_insn(env, insn);
+                       bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+                       printed = true;
+               }
+               verbose(env, "\tfp%+d: ", -(i + 1) * 8); verbose_arg_track(env, &at_stack_in[i]);
+               verbose(env, " -> "); verbose_arg_track(env, &at_stack_out[i]);
+       }
+       if (printed)
+               verbose(env, "\n");
+}
+
+/*
+ * Pure dataflow transfer function for arg_track state.
+ * Updates at_out[] based on how the instruction modifies registers.
+ * Tracks spill/fill, but not other memory accesses.
+ */
+static void arg_track_xfer(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                          int insn_idx,
+                          struct arg_track *at_out, struct arg_track *at_stack_out,
+                          struct func_instance *instance,
+                          u32 *callsites)
+{
+       int depth = instance->callchain.curframe;
+       u8 class = BPF_CLASS(insn->code);
+       u8 code = BPF_OP(insn->code);
+       struct arg_track *dst = &at_out[insn->dst_reg];
+       struct arg_track *src = &at_out[insn->src_reg];
+       struct arg_track none = { .frame = ARG_NONE };
+       int r;
+
+       if (class == BPF_ALU64 && BPF_SRC(insn->code) == BPF_K) {
+               if (code == BPF_MOV) {
+                       *dst = none;
+               } else if (dst->frame >= 0) {
+                       if (code == BPF_ADD)
+                               arg_padd(dst, insn->imm);
+                       else if (code == BPF_SUB)
+                               arg_padd(dst, -(s64)insn->imm);
+                       else
+                               /* Any other 64-bit alu on the pointer makes it imprecise */
+                               dst->off_cnt = 0;
+               } /* else if dst->frame is imprecise it stays so */
+       } else if (class == BPF_ALU64 && BPF_SRC(insn->code) == BPF_X) {
+               if (code == BPF_MOV) {
+                       if (insn->off == 0) {
+                               *dst = *src;
+                       } else {
+                               /* addr_space_cast destroys a pointer */
+                               *dst = none;
+                       }
+               } else {
+                       arg_track_alu64(dst, src);
+               }
+       } else if (class == BPF_ALU) {
+               /*
+                * 32-bit alu destroys the pointer.
+                * If src was a pointer it cannot leak into dst
+                */
+               *dst = none;
+       } else if (class == BPF_JMP && code == BPF_CALL) {
+               /*
+                * at_stack_out[slot] is not cleared by the helper and subprog calls.
+                * The fill_from_stack() may return the stale spill — which is an FP-derived arg_track
+                * (the value that was originally spilled there). The loaded register then carries
+                * a phantom FP-derived identity that doesn't correspond to what's actually in the slot.
+                * This phantom FP pointer propagates forward, and wherever it's subsequently used
+                * (as a helper argument, another store, etc.), it sets stack liveness bits.
+                * Those bits correspond to stack accesses that don't actually happen.
+                * So the effect is over-reporting stack liveness — marking slots as live that aren't
+                * actually accessed. The verifier preserves more state than necessary across calls,
+                * which is conservative.
+                *
+                * helpers can scratch stack slots, but they won't make a valid pointer out of it.
+                * subprogs are allowed to write into parent slots, but they cannot write
+                * _any_ FP-derived pointer into it (either their own or parent's FP).
+                */
+               for (r = BPF_REG_0; r <= BPF_REG_5; r++)
+                       at_out[r] = none;
+       } else if (class == BPF_LDX) {
+               u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+               bool src_is_local_fp = insn->src_reg == BPF_REG_FP || src->frame == depth ||
+                                      (src->frame == ARG_IMPRECISE && (src->mask & BIT(depth)));
+
+               /*
+                * Reload from callee stack: if src is current-frame FP-derived
+                * and the load is an 8-byte BPF_MEM, try to restore the spill
+                * identity.  For imprecise sources fill_from_stack() returns
+                * ARG_IMPRECISE (off_cnt == 0).
+                */
+               if (src_is_local_fp && BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+                       *dst = fill_from_stack(insn, at_out, insn->src_reg, at_stack_out, depth);
+               } else if (src->frame >= 0 && src->frame < depth &&
+                          BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+                       struct arg_track *parent_stack =
+                               env->callsite_at_stack[callsites[src->frame]];
+
+                       *dst = fill_from_stack(insn, at_out, insn->src_reg,
+                                              parent_stack, src->frame);
+               } else if (src->frame == ARG_IMPRECISE &&
+                          !(src->mask & BIT(depth)) && src->mask &&
+                          BPF_MODE(insn->code) == BPF_MEM && sz == 8) {
+                       /*
+                        * Imprecise src with only parent-frame bits:
+                        * conservative fallback.
+                        */
+                       *dst = *src;
+               } else {
+                       *dst = none;
+               }
+       } else if (class == BPF_LD && BPF_MODE(insn->code) == BPF_IMM) {
+               *dst = none;
+       } else if (class == BPF_STX) {
+               u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+               bool dst_is_local_fp;
+
+               /* Track spills to current-frame FP-derived callee stack */
+               dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth;
+               if (dst_is_local_fp && BPF_MODE(insn->code) == BPF_MEM)
+                       spill_to_stack(insn, at_out, insn->dst_reg,
+                                      at_stack_out, src, sz);
+
+               if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+                       if (dst_is_local_fp && insn->imm != BPF_LOAD_ACQ)
+                               clear_stack_for_all_offs(insn, at_out, insn->dst_reg,
+                                                        at_stack_out, sz);
+
+                       if (insn->imm == BPF_CMPXCHG)
+                               at_out[BPF_REG_0] = none;
+                       else if (insn->imm == BPF_LOAD_ACQ)
+                               *dst = none;
+                       else if (insn->imm & BPF_FETCH)
+                               *src = none;
+               }
+       } else if (class == BPF_ST && BPF_MODE(insn->code) == BPF_MEM) {
+               u32 sz = bpf_size_to_bytes(BPF_SIZE(insn->code));
+               bool dst_is_local_fp = insn->dst_reg == BPF_REG_FP || dst->frame == depth;
+
+               /* BPF_ST to FP-derived dst: clear overlapping stack slots */
+               if (dst_is_local_fp)
+                       clear_stack_for_all_offs(insn, at_out, insn->dst_reg,
+                                                at_stack_out, sz);
+       }
+}
+
+/*
+ * For a calls_callback helper, find the callback subprog and determine
+ * which caller register maps to which callback register for FP passthrough.
+ */
+static int find_callback_subprog(struct bpf_verifier_env *env,
+                                struct bpf_insn *insn, int insn_idx,
+                                int *caller_reg, int *callee_reg)
+{
+       struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+       int cb_reg = -1;
+
+       *caller_reg = -1;
+       *callee_reg = -1;
+
+       if (!bpf_helper_call(insn))
+               return -1;
+       switch (insn->imm) {
+       case BPF_FUNC_loop:
+               /* bpf_loop(nr, cb, ctx, flags): cb=R2, R3->cb R2 */
+               cb_reg = BPF_REG_2;
+               *caller_reg = BPF_REG_3;
+               *callee_reg = BPF_REG_2;
+               break;
+       case BPF_FUNC_for_each_map_elem:
+               /* for_each_map_elem(map, cb, ctx, flags): cb=R2, R3->cb R4 */
+               cb_reg = BPF_REG_2;
+               *caller_reg = BPF_REG_3;
+               *callee_reg = BPF_REG_4;
+               break;
+       case BPF_FUNC_find_vma:
+               /* find_vma(task, addr, cb, ctx, flags): cb=R3, R4->cb R3 */
+               cb_reg = BPF_REG_3;
+               *caller_reg = BPF_REG_4;
+               *callee_reg = BPF_REG_3;
+               break;
+       case BPF_FUNC_user_ringbuf_drain:
+               /* user_ringbuf_drain(map, cb, ctx, flags): cb=R2, R3->cb R2 */
+               cb_reg = BPF_REG_2;
+               *caller_reg = BPF_REG_3;
+               *callee_reg = BPF_REG_2;
+               break;
+       default:
+               return -1;
+       }
+
+       if (!(aux->const_reg_subprog_mask & BIT(cb_reg)))
+               return -2;
+
+       return aux->const_reg_vals[cb_reg];
+}
+
+/* Per-subprog intermediate state kept alive across analysis phases */
+struct subprog_at_info {
+       struct arg_track (*at_in)[MAX_BPF_REG];
+       int len;
+};
+
+static void print_subprog_arg_access(struct bpf_verifier_env *env,
+                                    int subprog,
+                                    struct subprog_at_info *info,
+                                    struct arg_track (*at_stack_in)[MAX_ARG_SPILL_SLOTS])
+{
+       struct bpf_insn *insns = env->prog->insnsi;
+       int start = env->subprog_info[subprog].start;
+       int len = info->len;
+       int i, r;
+
+       if (!(env->log.level & BPF_LOG_LEVEL2))
+               return;
+
+       verbose(env, "subprog#%d %s:\n", subprog,
+               env->prog->aux->func_info
+               ? btf_name_by_offset(env->prog->aux->btf,
+                                    btf_type_by_id(env->prog->aux->btf,
+                                                   env->prog->aux->func_info[subprog].type_id)->name_off)
+               : "");
+       for (i = 0; i < len; i++) {
+               int idx = start + i;
+               bool has_extra = false;
+               u8 cls = BPF_CLASS(insns[idx].code);
+               bool is_ldx_stx_call = cls == BPF_LDX || cls == BPF_STX ||
+                                      insns[idx].code == (BPF_JMP | BPF_CALL);
+
+               verbose(env, "%3d: ", idx);
+               bpf_verbose_insn(env, &insns[idx]);
+
+               /* Collect what needs printing */
+               if (is_ldx_stx_call &&
+                   arg_is_visited(&info->at_in[i][0])) {
+                       for (r = 0; r < MAX_BPF_REG - 1; r++)
+                               if (arg_is_fp(&info->at_in[i][r]))
+                                       has_extra = true;
+               }
+               if (is_ldx_stx_call) {
+                       for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+                               if (arg_is_fp(&at_stack_in[i][r]))
+                                       has_extra = true;
+               }
+
+               if (!has_extra) {
+                       if (bpf_is_ldimm64(&insns[idx]))
+                               i++;
+                       continue;
+               }
+
+               bpf_vlog_reset(&env->log, env->log.end_pos - 1);
+               verbose(env, " //");
+
+               if (is_ldx_stx_call && info->at_in &&
+                   arg_is_visited(&info->at_in[i][0])) {
+                       for (r = 0; r < MAX_BPF_REG - 1; r++) {
+                               if (!arg_is_fp(&info->at_in[i][r]))
+                                       continue;
+                               verbose(env, " r%d=", r);
+                               verbose_arg_track(env, &info->at_in[i][r]);
+                       }
+               }
+
+               if (is_ldx_stx_call) {
+                       for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++) {
+                               if (!arg_is_fp(&at_stack_in[i][r]))
+                                       continue;
+                               verbose(env, " fp%+d=", -(r + 1) * 8);
+                               verbose_arg_track(env, &at_stack_in[i][r]);
+                       }
+               }
+
+               verbose(env, "\n");
+               if (bpf_is_ldimm64(&insns[idx]))
+                       i++;
+       }
+}
+
+/*
+ * Compute arg tracking dataflow for a single subprog.
+ * Runs forward fixed-point with arg_track_xfer(), then records
+ * memory accesses in a single linear pass over converged state.
+ *
+ * @callee_entry: pre-populated entry state for R1-R5
+ *                NULL for main (subprog 0).
+ * @info:         stores at_in, len for debug printing.
+ */
+static int compute_subprog_args(struct bpf_verifier_env *env,
+                               struct subprog_at_info *info,
+                               struct arg_track *callee_entry,
+                               struct func_instance *instance,
+                               u32 *callsites)
+{
+       int subprog = instance->subprog;
+       struct bpf_insn *insns = env->prog->insnsi;
+       int depth = instance->callchain.curframe;
+       int start = env->subprog_info[subprog].start;
+       int po_start = env->subprog_info[subprog].postorder_start;
+       int end = env->subprog_info[subprog + 1].start;
+       int po_end = env->subprog_info[subprog + 1].postorder_start;
+       int len = end - start;
+       struct arg_track (*at_in)[MAX_BPF_REG] = NULL;
+       struct arg_track at_out[MAX_BPF_REG];
+       struct arg_track (*at_stack_in)[MAX_ARG_SPILL_SLOTS] = NULL;
+       struct arg_track *at_stack_out = NULL;
+       struct arg_track unvisited = { .frame = ARG_UNVISITED };
+       struct arg_track none = { .frame = ARG_NONE };
+       bool changed;
+       int i, p, r, err = -ENOMEM;
+
+       at_in = kvmalloc_objs(*at_in, len, GFP_KERNEL_ACCOUNT);
+       if (!at_in)
+               goto err_free;
+
+       at_stack_in = kvmalloc_objs(*at_stack_in, len, GFP_KERNEL_ACCOUNT);
+       if (!at_stack_in)
+               goto err_free;
+
+       at_stack_out = kvmalloc_objs(*at_stack_out, MAX_ARG_SPILL_SLOTS, GFP_KERNEL_ACCOUNT);
+       if (!at_stack_out)
+               goto err_free;
+
+       for (i = 0; i < len; i++) {
+               for (r = 0; r < MAX_BPF_REG; r++)
+                       at_in[i][r] = unvisited;
+               for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+                       at_stack_in[i][r] = unvisited;
+       }
+
+       for (r = 0; r < MAX_BPF_REG; r++)
+               at_in[0][r] = none;
+
+       /* Entry: R10 is always precisely the current frame's FP */
+       at_in[0][BPF_REG_FP] = arg_single(depth, 0);
+
+       /* R1-R5: from caller or ARG_NONE for main */
+       if (callee_entry) {
+               for (r = BPF_REG_1; r <= BPF_REG_5; r++)
+                       at_in[0][r] = callee_entry[r];
+       }
+
+       /* Entry: all stack slots are ARG_NONE */
+       for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+               at_stack_in[0][r] = none;
+
+       if (env->log.level & BPF_LOG_LEVEL2)
+               verbose(env, "subprog#%d: analyzing (depth %d)...\n", subprog, depth);
+
+       /* Forward fixed-point iteration in reverse post order */
+redo:
+       changed = false;
+       for (p = po_end - 1; p >= po_start; p--) {
+               int idx = env->cfg.insn_postorder[p];
+               int i = idx - start;
+               struct bpf_insn *insn = &insns[idx];
+               struct bpf_iarray *succ;
+
+               if (!arg_is_visited(&at_in[i][0]) && !arg_is_visited(&at_in[i][1]))
+                       continue;
+
+               memcpy(at_out, at_in[i], sizeof(at_out));
+               memcpy(at_stack_out, at_stack_in[i], MAX_ARG_SPILL_SLOTS * sizeof(*at_stack_out));
+
+               arg_track_xfer(env, insn, idx, at_out, at_stack_out, instance, callsites);
+               arg_track_log(env, insn, idx, at_in[i], at_stack_in[i], at_out, at_stack_out);
+
+               /* Propagate to successors within this subprogram */
+               succ = bpf_insn_successors(env, idx);
+               for (int s = 0; s < succ->cnt; s++) {
+                       int target = succ->items[s];
+                       int ti;
+
+                       /* Filter: stay within the subprogram's range */
+                       if (target < start || target >= end)
+                               continue;
+                       ti = target - start;
+
+                       for (r = 0; r < MAX_BPF_REG; r++)
+                               changed |= arg_track_join(env, idx, target, r,
+                                                         &at_in[ti][r], at_out[r]);
+
+                       for (r = 0; r < MAX_ARG_SPILL_SLOTS; r++)
+                               changed |= arg_track_join(env, idx, target, -r - 1,
+                                                         &at_stack_in[ti][r], at_stack_out[r]);
+               }
+       }
+       if (changed)
+               goto redo;
+
+       info->at_in = at_in;
+       at_in = NULL;
+       info->len = len;
+       print_subprog_arg_access(env, subprog, info, at_stack_in);
+       err = 0;
+
+err_free:
+       kvfree(at_stack_out);
+       kvfree(at_stack_in);
+       kvfree(at_in);
+       return err;
+}
+
+/*
+ * Recursively analyze a subprog with specific 'entry_args'.
+ * Each callee is analyzed with the exact args from its call site.
+ *
+ * Args are recomputed for each call because the dataflow result at_in[]
+ * depends on the entry args and frame depth. Consider: A->C->D and B->C->D
+ * Callsites in A and B pass different args into C, so C is recomputed.
+ * Then within C the same callsite passes different args into D.
+ */
+static int analyze_subprog(struct bpf_verifier_env *env,
+                          struct arg_track *entry_args,
+                          struct subprog_at_info *info,
+                          struct func_instance *instance,
+                          u32 *callsites)
+{
+       int subprog = instance->subprog;
+       int depth = instance->callchain.curframe;
+       struct bpf_insn *insns = env->prog->insnsi;
+       int start = env->subprog_info[subprog].start;
+       int po_start = env->subprog_info[subprog].postorder_start;
+       int po_end = env->subprog_info[subprog + 1].postorder_start;
+       int j, err;
+
+       if (++env->liveness->subprog_calls > 10000) {
+               verbose(env, "liveness analysis exceeded complexity limit (%d calls)\n",
+                       env->liveness->subprog_calls);
+               return -E2BIG;
+       }
+
+       if (need_resched())
+               cond_resched();
+
+       /* Free prior analysis if this subprog was already visited */
+       kvfree(info[subprog].at_in);
+       info[subprog].at_in = NULL;
+
+       err = compute_subprog_args(env, &info[subprog], entry_args, instance, callsites);
+       if (err)
+               return err;
+
+       /* For each reachable call site in the subprog, recurse into callees */
+       for (int p = po_start; p < po_end; p++) {
+               int idx = env->cfg.insn_postorder[p];
+               struct arg_track callee_args[BPF_REG_5 + 1];
+               struct arg_track none = { .frame = ARG_NONE };
+               struct bpf_insn *insn = &insns[idx];
+               struct func_instance *callee_instance;
+               int callee, target;
+               int caller_reg, cb_callee_reg;
+
+               j = idx - start; /* relative index within this subprog */
+
+               if (bpf_pseudo_call(insn)) {
+                       target = idx + insn->imm + 1;
+                       callee = bpf_find_subprog(env, target);
+                       if (callee < 0)
+                               continue;
+
+                       /* Build entry args: R1-R5 from at_in at call site */
+                       for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+                               callee_args[r] = info[subprog].at_in[j][r];
+               } else if (bpf_calls_callback(env, idx)) {
+                       callee = find_callback_subprog(env, insn, idx, &caller_reg, &cb_callee_reg);
+                       if (callee == -2) {
+                               /*
+                                * same bpf_loop() calls two different callbacks and passes
+                                * stack pointer to them
+                                */
+                               if (info[subprog].at_in[j][caller_reg].frame == ARG_NONE)
+                                       continue;
+                               for (int f = 0; f <= depth; f++) {
+                                       err = mark_stack_read(instance, f, idx, SPIS_ALL);
+                                       if (err)
+                                               return err;
+                               }
+                               continue;
+                       }
+                       if (callee < 0)
+                               continue;
+
+                       for (int r = BPF_REG_1; r <= BPF_REG_5; r++)
+                               callee_args[r] = none;
+                       callee_args[cb_callee_reg] = info[subprog].at_in[j][caller_reg];
+               } else {
+                       continue;
+               }
+
+               if (depth == MAX_CALL_FRAMES - 1)
+                       return -EINVAL;
+
+               callee_instance = call_instance(env, instance, idx, callee);
+               if (IS_ERR(callee_instance))
+                       return PTR_ERR(callee_instance);
+               callsites[depth] = idx;
+               err = analyze_subprog(env, callee_args, info, callee_instance, callsites);
+               if (err)
+                       return err;
+       }
+
+       return update_instance(env, instance);
+}
+
+int bpf_compute_subprog_arg_access(struct bpf_verifier_env *env)
+{
+       u32 callsites[MAX_CALL_FRAMES] = {};
+       int insn_cnt = env->prog->len;
+       struct func_instance *instance;
+       struct subprog_at_info *info;
+       int k, err = 0;
+
+       info = kvzalloc_objs(*info, env->subprog_cnt, GFP_KERNEL_ACCOUNT);
+       if (!info)
+               return -ENOMEM;
+
+       env->callsite_at_stack = kvzalloc_objs(*env->callsite_at_stack, insn_cnt,
+                                              GFP_KERNEL_ACCOUNT);
+       if (!env->callsite_at_stack) {
+               kvfree(info);
+               return -ENOMEM;
+       }
+
+       instance = call_instance(env, NULL, 0, 0);
+       if (IS_ERR(instance)) {
+               err = PTR_ERR(instance);
+               goto out;
+       }
+       err = analyze_subprog(env, NULL, info, instance, callsites);
+       if (err)
+               goto out;
+
+       /*
+        * Subprogs and callbacks that don't receive FP-derived arguments
+        * cannot access ancestor stack frames, so they were skipped during
+        * the recursive walk above.  Async callbacks (timer, workqueue) are
+        * also not reachable from the main program's call graph.  Analyze
+        * all unvisited subprogs as independent roots at depth 0.
+        *
+        * Use reverse topological order (callers before callees) so that
+        * each subprog is analyzed before its callees, allowing the
+        * recursive walk inside analyze_subprog() to naturally
+        * reach nested callees that also lack FP-derived args.
+        */
+       for (k = env->subprog_cnt - 1; k >= 0; k--) {
+               int sub = env->subprog_topo_order[k];
+
+               if (info[sub].at_in && !bpf_subprog_is_global(env, sub))
+                       continue;
+               instance = call_instance(env, NULL, 0, sub);
+               if (IS_ERR(instance)) {
+                       err = PTR_ERR(instance);
+                       goto out;
+               }
+               err = analyze_subprog(env, NULL, info, instance, callsites);
+               if (err)
+                       goto out;
+       }
+
+out:
+       for (k = 0; k < insn_cnt; k++)
+               kvfree(env->callsite_at_stack[k]);
+       kvfree(env->callsite_at_stack);
+       env->callsite_at_stack = NULL;
+       for (k = 0; k < env->subprog_cnt; k++)
+               kvfree(info[k].at_in);
+       kvfree(info);
+       return err;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index e5f4bd5f260954efb93ee2acb82af7f1fff39b15..4c3db8e1c7a403eca3ce510989b782464b4e07b3 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -26414,6 +26414,11 @@ static int compute_live_registers(struct bpf_verifier_env *env)
         for (i = 0; i < insn_cnt; ++i)
                 compute_insn_live_regs(env, &insns[i], &state[i]);
  
+       /* Forward pass: resolve stack access through FP-derived pointers */
+       err = bpf_compute_subprog_arg_access(env);
+       if (err)
+               goto out;
+
         changed = true;
         while (changed) {
                 changed = false;
author	Eduard Zingerman <eddyz87@gmail.com>
	Fri, 10 Apr 2026 20:55:58 +0000 (13:55 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Fri, 10 Apr 2026 22:05:05 +0000 (15:05 -0700)
include/linux/bpf_verifier.h		patch \| blob \| blame \| history
kernel/bpf/liveness.c		patch \| blob \| blame \| history
kernel/bpf/verifier.c		patch \| blob \| blame \| history