This patch implements bitwise tracking (tnum analysis) for BPF_END
(byte swap) operation.
Currently, the BPF verifier does not track value for BPF_END operation,
treating the result as completely unknown. This limits the verifier's
ability to prove safety of programs that perform endianness conversions,
which are common in networking code.
For example, the following code pattern for port number validation:
int test(struct pt_regs *ctx) {
__u64 x = bpf_get_prandom_u32();
x &= 0x3f00; // Range: [0, 0x3f00], var_off: (0x0; 0x3f00)
x = bswap16(x); // Should swap to range [0, 0x3f], var_off: (0x0; 0x3f)
if (x > 0x3f) goto trap;
return 0;
trap:
return *(u64 *)NULL; // Should be unreachable
}
Currently generates verifier output:
1: (54) w0 &= 16128 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=16128,var_off=(0x0; 0x3f00))
2: (d7) r0 = bswap16 r0 ; R0=scalar()
3: (25) if r0 > 0x3f goto pc+2 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f))
Without this patch, even though the verifier knows `x` has certain bits
set, after bswap16, it loses all tracking information and treats port
as having a completely unknown value [0, 65535].
According to the BPF instruction set[1], there are 3 kinds of BPF_END:
1. `bswap(16|32|64)`: opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
- do unconditional swap
2. `le(16|32|64)`: opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
- on big-endian: do swap
- on little-endian: truncation (16/32-bit) or no-op (64-bit)
3. `be(16|32|64)`: opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
- on little-endian: do swap
- on big-endian: truncation (16/32-bit) or no-op (64-bit)
Since BPF_END operations are inherently bit-wise permutations, tnum
(bitwise tracking) offers the most efficient and precise mechanism
for value analysis. By implementing `tnum_bswap16`, `tnum_bswap32`,
and `tnum_bswap64`, we can derive exact `var_off` values concisely,
directly reflecting the bit-level changes.
Here is the overview of changes:
1. In `tnum_bswap(16|32|64)` (kernel/bpf/tnum.c):
Call `swab(16|32|64)` function on the value and mask of `var_off`, and
do truncation for 16/32-bit cases.
2. In `adjust_scalar_min_max_vals` (kernel/bpf/verifier.c):
Call helper function `scalar_byte_swap`.
- Only do byte swap when
* alu64 (unconditional swap) OR
* switching between big-endian and little-endian machines.
- If need do byte swap:
* Firstly call `tnum_bswap(16|32|64)` to update `var_off`.
* Then reset the bound since byte swap scrambles the range.
- For 16/32-bit cases, truncate dst register to match the swapped size.
This enables better verification of networking code that frequently uses
byte swaps for protocol processing, reducing false positive rejections.
[1] https://www.kernel.org/doc/Documentation/bpf/standardization/instruction-set.rst
Co-developed-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Co-developed-by: Yazhou Tang <tangyazhou518@outlook.com>
Signed-off-by: Yazhou Tang <tangyazhou518@outlook.com>
Signed-off-by: Tianci Cao <ziye@zju.edu.cn>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20260204111503.77871-2-ziye@zju.edu.cn
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
/* Return @a with all but the lowest @size bytes cleared */
struct tnum tnum_cast(struct tnum a, u8 size);
+/* Swap the bytes of a tnum */
+struct tnum tnum_bswap16(struct tnum a);
+struct tnum tnum_bswap32(struct tnum a);
+struct tnum tnum_bswap64(struct tnum a);
+
/* Returns true if @a is a known constant */
static inline bool tnum_is_const(struct tnum a)
{
*/
#include <linux/kernel.h>
#include <linux/tnum.h>
+#include <linux/swab.h>
#define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m}
/* A completely unknown value */
{
return tnum_with_subreg(a, tnum_const(value));
}
+
+struct tnum tnum_bswap16(struct tnum a)
+{
+ return TNUM(swab16(a.value & 0xFFFF), swab16(a.mask & 0xFFFF));
+}
+
+struct tnum tnum_bswap32(struct tnum a)
+{
+ return TNUM(swab32(a.value & 0xFFFFFFFF), swab32(a.mask & 0xFFFFFFFF));
+}
+
+struct tnum tnum_bswap64(struct tnum a)
+{
+ return TNUM(swab64(a.value), swab64(a.mask));
+}
__update_reg_bounds(dst_reg);
}
+static void scalar_byte_swap(struct bpf_reg_state *dst_reg, struct bpf_insn *insn)
+{
+ /*
+ * Byte swap operation - update var_off using tnum_bswap.
+ * Three cases:
+ * 1. bswap(16|32|64): opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE)
+ * unconditional swap
+ * 2. to_le(16|32|64): opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE)
+ * swap on big-endian, truncation or no-op on little-endian
+ * 3. to_be(16|32|64): opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE)
+ * swap on little-endian, truncation or no-op on big-endian
+ */
+
+ bool alu64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ bool to_le = BPF_SRC(insn->code) == BPF_TO_LE;
+ bool is_big_endian;
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ is_big_endian = true;
+#else
+ is_big_endian = false;
+#endif
+ /* Apply bswap if alu64 or switch between big-endian and little-endian machines */
+ bool need_bswap = alu64 || (to_le == is_big_endian);
+
+ if (need_bswap) {
+ if (insn->imm == 16)
+ dst_reg->var_off = tnum_bswap16(dst_reg->var_off);
+ else if (insn->imm == 32)
+ dst_reg->var_off = tnum_bswap32(dst_reg->var_off);
+ else if (insn->imm == 64)
+ dst_reg->var_off = tnum_bswap64(dst_reg->var_off);
+ /*
+ * Byteswap scrambles the range, so we must reset bounds.
+ * Bounds will be re-derived from the new tnum later.
+ */
+ __mark_reg_unbounded(dst_reg);
+ }
+ /* For bswap16/32, truncate dst register to match the swapped size */
+ if (insn->imm == 16 || insn->imm == 32)
+ coerce_reg_to_size(dst_reg, insn->imm / 8);
+}
+
static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
const struct bpf_reg_state *src_reg)
{
case BPF_XOR:
case BPF_OR:
case BPF_MUL:
+ case BPF_END:
return true;
/*
else
scalar_min_max_arsh(dst_reg, &src_reg);
break;
+ case BPF_END:
+ scalar_byte_swap(dst_reg, insn);
+ break;
default:
break;
}
- /* ALU32 ops are zero extended into 64bit register */
- if (alu32)
+ /*
+ * ALU32 ops are zero extended into 64bit register.
+ *
+ * BPF_END is already handled inside the helper (truncation),
+ * so skip zext here to avoid unexpected zero extension.
+ * e.g., le64: opcode=(BPF_END|BPF_ALU|BPF_TO_LE), imm=0x40
+ * This is a 64bit byte swap operation with alu32==true,
+ * but we should not zero extend the result.
+ */
+ if (alu32 && opcode != BPF_END)
zext_32_to_64(dst_reg);
reg_bounds_sync(dst_reg);
return 0;
}
/* check dest operand */
- if (opcode == BPF_NEG &&
+ if ((opcode == BPF_NEG || opcode == BPF_END) &&
regs[insn->dst_reg].type == SCALAR_VALUE) {
err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
err = err ?: adjust_scalar_min_max_vals(env, insn,