From: Greg Kroah-Hartman Date: Fri, 26 Jan 2018 10:36:29 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.4.114~25 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3efd9f3362f8521039595517b26d6f17ff4b60b6;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch arm-net-bpf-clarify-tail_call-index.patch arm-net-bpf-correct-stack-layout-documentation.patch arm-net-bpf-fix-ldx-instructions.patch arm-net-bpf-fix-register-saving.patch arm-net-bpf-fix-stack-alignment.patch arm-net-bpf-fix-tail-call-jumps.patch arm-net-bpf-move-stack-documentation.patch --- diff --git a/queue-4.14/arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch b/queue-4.14/arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch new file mode 100644 index 00000000000..e34fbbadfd0 --- /dev/null +++ b/queue-4.14/arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch @@ -0,0 +1,75 @@ +From e9062481824384f00299971f923fecf6b3668001 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 11:35:15 +0000 +Subject: ARM: net: bpf: avoid 'bx' instruction on non-Thumb capable CPUs + +From: Russell King + +commit e9062481824384f00299971f923fecf6b3668001 upstream. + +Avoid the 'bx' instruction on CPUs that have no support for Thumb and +thus do not implement this instruction by moving the generation of this +opcode to a separate function that selects between: + + bx reg + +and + + mov pc, reg + +according to the capabilities of the CPU. + +Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -285,16 +285,20 @@ static inline void emit_mov_i(const u8 r + emit_mov_i_no8m(rd, val, ctx); + } + +-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) ++static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) + { +- ctx->seen |= SEEN_CALL; +-#if __LINUX_ARM_ARCH__ < 5 +- emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); +- + if (elf_hwcap & HWCAP_THUMB) + emit(ARM_BX(tgt_reg), ctx); + else + emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); ++} ++ ++static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) ++{ ++ ctx->seen |= SEEN_CALL; ++#if __LINUX_ARM_ARCH__ < 5 ++ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); ++ emit_bx_r(tgt_reg, ctx); + #else + emit(ARM_BLX_R(tgt_reg), ctx); + #endif +@@ -997,7 +1001,7 @@ static int emit_bpf_tail_call(struct jit + emit_a32_mov_i(tmp2[1], off, false, ctx); + emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); + emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); +- emit(ARM_BX(tmp[1]), ctx); ++ emit_bx_r(tmp[1], ctx); + + /* out: */ + if (out_offset == -1) +@@ -1166,7 +1170,7 @@ static void build_epilogue(struct jit_ct + emit(ARM_POP(reg_set), ctx); + /* Return back to the callee function */ + if (!(ctx->seen & SEEN_CALL)) +- emit(ARM_BX(ARM_LR), ctx); ++ emit_bx_r(ARM_LR, ctx); + #endif + } + diff --git a/queue-4.14/arm-net-bpf-clarify-tail_call-index.patch b/queue-4.14/arm-net-bpf-clarify-tail_call-index.patch new file mode 100644 index 00000000000..ec5ef731050 --- /dev/null +++ b/queue-4.14/arm-net-bpf-clarify-tail_call-index.patch @@ -0,0 +1,32 @@ +From 091f02483df7b56615b524491f404e574c5e0668 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 12:11:26 +0000 +Subject: ARM: net: bpf: clarify tail_call index + +From: Russell King + +commit 091f02483df7b56615b524491f404e574c5e0668 upstream. + +As per 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT"), the index used +for array lookup is defined to be 32-bit wide. Update a misleading +comment that suggests it is 64-bit wide. + +Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -1016,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit + emit_a32_mov_i(tmp[1], off, false, ctx); + emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); + emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); +- /* index (64 bit) */ ++ /* index is 32-bit for arrays */ + emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); + /* index >= array->map.max_entries */ + emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); diff --git a/queue-4.14/arm-net-bpf-correct-stack-layout-documentation.patch b/queue-4.14/arm-net-bpf-correct-stack-layout-documentation.patch new file mode 100644 index 00000000000..5b8a9a10b85 --- /dev/null +++ b/queue-4.14/arm-net-bpf-correct-stack-layout-documentation.patch @@ -0,0 +1,74 @@ +From 0005e55a79cfda88199e41a406a829c88d708c67 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 22:51:27 +0000 +Subject: ARM: net: bpf: correct stack layout documentation + +From: Russell King + +commit 0005e55a79cfda88199e41a406a829c88d708c67 upstream. + +The stack layout documentation incorrectly suggests that the BPF JIT +scratch space starts immediately below BPF_FP. This is not correct, +so let's fix the documentation to reflect reality. + +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 35 +++++++++++++++++++++++++++-------- + 1 file changed, 27 insertions(+), 8 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -28,24 +28,43 @@ + int bpf_jit_enable __read_mostly; + + /* +- * eBPF prog stack layout ++ * eBPF prog stack layout: + * + * high +- * original ARM_SP => +-----+ eBPF prologue +- * |FP/LR| +- * current ARM_FP => +-----+ +- * | ... | callee saved registers +- * eBPF fp register => +-----+ <= (BPF_FP) ++ * original ARM_SP => +-----+ ++ * | | callee saved registers ++ * +-----+ <= (BPF_FP + SCRATCH_SIZE) + * | ... | eBPF JIT scratch space +- * | | eBPF prog stack ++ * eBPF fp register => +-----+ ++ * (BPF_FP) | ... | eBPF prog stack + * +-----+ + * |RSVD | JIT scratchpad +- * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE) ++ * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | | + * | ... | Function call stack + * | | + * +-----+ + * low ++ * ++ * The callee saved registers depends on whether frame pointers are enabled. ++ * With frame pointers (to be compliant with the ABI): ++ * ++ * high ++ * original ARM_SP => +------------------+ \ ++ * | pc | | ++ * current ARM_FP => +------------------+ } callee saved registers ++ * |r4-r8,r10,fp,ip,lr| | ++ * +------------------+ / ++ * low ++ * ++ * Without frame pointers: ++ * ++ * high ++ * original ARM_SP => +------------------+ ++ * | lr | (optional) ++ * | r4-r8,r10 | callee saved registers ++ * +------------------+ ++ * low + */ + + #define STACK_OFFSET(k) (k) diff --git a/queue-4.14/arm-net-bpf-fix-ldx-instructions.patch b/queue-4.14/arm-net-bpf-fix-ldx-instructions.patch new file mode 100644 index 00000000000..f4e8af33dca --- /dev/null +++ b/queue-4.14/arm-net-bpf-fix-ldx-instructions.patch @@ -0,0 +1,115 @@ +From ec19e02b343db991d2d1610c409efefebf4e2ca9 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 21:06:16 +0000 +Subject: ARM: net: bpf: fix LDX instructions + +From: Russell King + +commit ec19e02b343db991d2d1610c409efefebf4e2ca9 upstream. + +When the source and destination register are identical, our JIT does not +generate correct code, which leads to kernel oopses. + +Fix this by (a) generating more efficient code, and (b) making use of +the temporary earlier if we will overwrite the address register. + +Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 61 ++++++++++++++++++++++++---------------------- + 1 file changed, 33 insertions(+), 28 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -913,33 +913,53 @@ static inline void emit_str_r(const u8 d + } + + /* dst = *(size*)(src + off) */ +-static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk, +- const s32 off, struct jit_ctx *ctx, const u8 sz){ ++static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, ++ s32 off, struct jit_ctx *ctx, const u8 sz){ + const u8 *tmp = bpf2a32[TMP_REG_1]; +- u8 rd = dstk ? tmp[1] : dst; ++ const u8 *rd = dstk ? tmp : dst; + u8 rm = src; ++ s32 off_max; + +- if (off) { ++ if (sz == BPF_H) ++ off_max = 0xff; ++ else ++ off_max = 0xfff; ++ ++ if (off < 0 || off > off_max) { + emit_a32_mov_i(tmp[0], off, false, ctx); + emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); + rm = tmp[0]; ++ off = 0; ++ } else if (rd[1] == rm) { ++ emit(ARM_MOV_R(tmp[0], rm), ctx); ++ rm = tmp[0]; + } + switch (sz) { +- case BPF_W: +- /* Load a Word */ +- emit(ARM_LDR_I(rd, rm, 0), ctx); ++ case BPF_B: ++ /* Load a Byte */ ++ emit(ARM_LDRB_I(rd[1], rm, off), ctx); ++ emit_a32_mov_i(dst[0], 0, dstk, ctx); + break; + case BPF_H: + /* Load a HalfWord */ +- emit(ARM_LDRH_I(rd, rm, 0), ctx); ++ emit(ARM_LDRH_I(rd[1], rm, off), ctx); ++ emit_a32_mov_i(dst[0], 0, dstk, ctx); + break; +- case BPF_B: +- /* Load a Byte */ +- emit(ARM_LDRB_I(rd, rm, 0), ctx); ++ case BPF_W: ++ /* Load a Word */ ++ emit(ARM_LDR_I(rd[1], rm, off), ctx); ++ emit_a32_mov_i(dst[0], 0, dstk, ctx); ++ break; ++ case BPF_DW: ++ /* Load a Double Word */ ++ emit(ARM_LDR_I(rd[1], rm, off), ctx); ++ emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); + break; + } + if (dstk) +- emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); ++ emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); ++ if (dstk && sz == BPF_DW) ++ emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); + } + + /* Arithmatic Operation */ +@@ -1440,22 +1460,7 @@ exit: + rn = sstk ? tmp2[1] : src_lo; + if (sstk) + emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); +- switch (BPF_SIZE(code)) { +- case BPF_W: +- /* Load a Word */ +- case BPF_H: +- /* Load a Half-Word */ +- case BPF_B: +- /* Load a Byte */ +- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code)); +- emit_a32_mov_i(dst_hi, 0, dstk, ctx); +- break; +- case BPF_DW: +- /* Load a double word */ +- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W); +- emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W); +- break; +- } ++ emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); + break; + /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ + case BPF_LD | BPF_ABS | BPF_W: diff --git a/queue-4.14/arm-net-bpf-fix-register-saving.patch b/queue-4.14/arm-net-bpf-fix-register-saving.patch new file mode 100644 index 00000000000..27bcf09993a --- /dev/null +++ b/queue-4.14/arm-net-bpf-fix-register-saving.patch @@ -0,0 +1,222 @@ +From 02088d9b392f605c892894b46aa8c83e3abd0115 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 22:38:18 +0000 +Subject: ARM: net: bpf: fix register saving + +From: Russell King + +commit 02088d9b392f605c892894b46aa8c83e3abd0115 upstream. + +When an eBPF program tail-calls another eBPF program, it enters it after +the prologue to avoid having complex stack manipulations. This can lead +to kernel oopses, and similar. + +Resolve this by always using a fixed stack layout, a CPU register frame +pointer, and using this when reloading registers before returning. + +Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 80 ++++++++++++---------------------------------- + 1 file changed, 22 insertions(+), 58 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -61,20 +61,24 @@ int bpf_jit_enable __read_mostly; + * + * high + * original ARM_SP => +------------------+ +- * | lr | (optional) +- * | r4-r8,r10 | callee saved registers +- * +------------------+ ++ * | r4-r8,r10,fp,lr | callee saved registers ++ * current ARM_FP => +------------------+ + * low ++ * ++ * When popping registers off the stack at the end of a BPF function, we ++ * reference them via the current ARM_FP register. + */ ++#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ ++ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ ++ 1 << ARM_FP) ++#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) ++#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) + + #define STACK_OFFSET(k) (k) + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ + #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ + #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ + +-/* Flags used for JIT optimization */ +-#define SEEN_CALL (1 << 0) +- + #define FLAG_IMM_OVERFLOW (1 << 0) + + /* +@@ -135,7 +139,6 @@ static const u8 bpf2a32[][2] = { + * idx : index of current last JITed instruction. + * prologue_bytes : bytes used in prologue. + * epilogue_offset : offset of epilogue starting. +- * seen : bit mask used for JIT optimization. + * offsets : array of eBPF instruction offsets in + * JITed code. + * target : final JITed code. +@@ -150,7 +153,6 @@ struct jit_ctx { + unsigned int idx; + unsigned int prologue_bytes; + unsigned int epilogue_offset; +- u32 seen; + u32 flags; + u32 *offsets; + u32 *target; +@@ -340,7 +342,6 @@ static void emit_bx_r(u8 tgt_reg, struct + + static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) + { +- ctx->seen |= SEEN_CALL; + #if __LINUX_ARM_ARCH__ < 5 + emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); + emit_bx_r(tgt_reg, ctx); +@@ -403,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u + } + + /* Call appropriate function */ +- ctx->seen |= SEEN_CALL; + emit_mov_i(ARM_IP, op == BPF_DIV ? + (u32)jit_udiv32 : (u32)jit_mod32, ctx); + emit_blx_r(ARM_IP, ctx); +@@ -669,8 +669,6 @@ static inline void emit_a32_lsh_r64(cons + /* Do LSH operation */ + emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); + emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); +- /* As we are using ARM_LR */ +- ctx->seen |= SEEN_CALL; + emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); +@@ -705,8 +703,6 @@ static inline void emit_a32_arsh_r64(con + /* Do the ARSH operation */ + emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); + emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); +- /* As we are using ARM_LR */ +- ctx->seen |= SEEN_CALL; + emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); + _emit(ARM_COND_MI, ARM_B(0), ctx); +@@ -741,8 +737,6 @@ static inline void emit_a32_lsr_r64(cons + /* Do LSH operation */ + emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); + emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); +- /* As we are using ARM_LR */ +- ctx->seen |= SEEN_CALL; + emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); +@@ -877,8 +871,6 @@ static inline void emit_a32_mul_r64(cons + /* Do Multiplication */ + emit(ARM_MUL(ARM_IP, rd, rn), ctx); + emit(ARM_MUL(ARM_LR, rm, rt), ctx); +- /* As we are using ARM_LR */ +- ctx->seen |= SEEN_CALL; + emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); + + emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); +@@ -955,7 +947,6 @@ static inline void emit_ar_r(const u8 rd + const u8 rn, struct jit_ctx *ctx, u8 op) { + switch (op) { + case BPF_JSET: +- ctx->seen |= SEEN_CALL; + emit(ARM_AND_R(ARM_IP, rt, rn), ctx); + emit(ARM_AND_R(ARM_LR, rd, rm), ctx); + emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); +@@ -1119,33 +1110,22 @@ static void build_prologue(struct jit_ct + const u8 r2 = bpf2a32[BPF_REG_1][1]; + const u8 r3 = bpf2a32[BPF_REG_1][0]; + const u8 r4 = bpf2a32[BPF_REG_6][1]; +- const u8 r5 = bpf2a32[BPF_REG_6][0]; +- const u8 r6 = bpf2a32[TMP_REG_1][1]; +- const u8 r7 = bpf2a32[TMP_REG_1][0]; +- const u8 r8 = bpf2a32[TMP_REG_2][1]; +- const u8 r10 = bpf2a32[TMP_REG_2][0]; + const u8 fplo = bpf2a32[BPF_REG_FP][1]; + const u8 fphi = bpf2a32[BPF_REG_FP][0]; +- const u8 sp = ARM_SP; + const u8 *tcc = bpf2a32[TCALL_CNT]; + +- u16 reg_set = 0; +- + /* Save callee saved registers. */ +- reg_set |= (1<seen & SEEN_CALL) +- reg_set |= (1<stack_size = imm8m(STACK_SIZE); + +@@ -1168,33 +1148,19 @@ static void build_prologue(struct jit_ct + /* end of prologue */ + } + ++/* restore callee saved registers. */ + static void build_epilogue(struct jit_ctx *ctx) + { +- const u8 r4 = bpf2a32[BPF_REG_6][1]; +- const u8 r5 = bpf2a32[BPF_REG_6][0]; +- const u8 r6 = bpf2a32[TMP_REG_1][1]; +- const u8 r7 = bpf2a32[TMP_REG_1][0]; +- const u8 r8 = bpf2a32[TMP_REG_2][1]; +- const u8 r10 = bpf2a32[TMP_REG_2][0]; +- u16 reg_set = 0; +- +- /* unwind function call stack */ +- emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); +- +- /* restore callee saved registers. */ +- reg_set |= (1<seen & SEEN_CALL) +- reg_set |= (1<seen & SEEN_CALL)) +- emit_bx_r(ARM_LR, ctx); ++ emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); ++ emit(ARM_POP(CALLEE_POP_MASK), ctx); + #endif + } + +@@ -1422,8 +1388,6 @@ static int build_insn(const struct bpf_i + emit_rev32(rt, rt, ctx); + goto emit_bswap_uxt; + case 64: +- /* Because of the usage of ARM_LR */ +- ctx->seen |= SEEN_CALL; + emit_rev32(ARM_LR, rt, ctx); + emit_rev32(rt, rd, ctx); + emit(ARM_MOV_R(rd, ARM_LR), ctx); diff --git a/queue-4.14/arm-net-bpf-fix-stack-alignment.patch b/queue-4.14/arm-net-bpf-fix-stack-alignment.patch new file mode 100644 index 00000000000..206528ea2ce --- /dev/null +++ b/queue-4.14/arm-net-bpf-fix-stack-alignment.patch @@ -0,0 +1,49 @@ +From d1220efd23484c72c82d5471f05daeb35b5d1916 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 16:10:07 +0000 +Subject: ARM: net: bpf: fix stack alignment + +From: Russell King + +commit d1220efd23484c72c82d5471f05daeb35b5d1916 upstream. + +As per 2dede2d8e925 ("ARM EABI: stack pointer must be 64-bit aligned +after a CPU exception") the stack should be aligned to a 64-bit boundary +on EABI systems. Ensure that the eBPF JIT appropraitely aligns the +stack. + +Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -179,8 +179,13 @@ static void jit_fill_hole(void *area, un + *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); + } + +-/* Stack must be multiples of 16 Bytes */ +-#define STACK_ALIGN(sz) (((sz) + 3) & ~3) ++#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) ++/* EABI requires the stack to be aligned to 64-bit boundaries */ ++#define STACK_ALIGNMENT 8 ++#else ++/* Stack must be aligned to 32-bit boundaries */ ++#define STACK_ALIGNMENT 4 ++#endif + + /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, + * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, +@@ -194,7 +199,7 @@ static void jit_fill_hole(void *area, un + + SCRATCH_SIZE + \ + + 4 /* extra for skb_copy_bits buffer */) + +-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE) ++#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) + + /* Get the offset of eBPF REGISTERs stored on scratch space. */ + #define STACK_VAR(off) (STACK_SIZE-off-4) diff --git a/queue-4.14/arm-net-bpf-fix-tail-call-jumps.patch b/queue-4.14/arm-net-bpf-fix-tail-call-jumps.patch new file mode 100644 index 00000000000..3f90e85c7d7 --- /dev/null +++ b/queue-4.14/arm-net-bpf-fix-tail-call-jumps.patch @@ -0,0 +1,75 @@ +From f4483f2cc1fdc03488c8a1452e545545ae5bda93 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 11:39:54 +0000 +Subject: ARM: net: bpf: fix tail call jumps + +From: Russell King + +commit f4483f2cc1fdc03488c8a1452e545545ae5bda93 upstream. + +When a tail call fails, it is documented that the tail call should +continue execution at the following instruction. An example tail call +sequence is: + + 12: (85) call bpf_tail_call#12 + 13: (b7) r0 = 0 + 14: (95) exit + +The ARM assembler for the tail call in this case ends up branching to +instruction 14 instead of instruction 13, resulting in the BPF filter +returning a non-zero value: + + 178: ldr r8, [sp, #588] ; insn 12 + 17c: ldr r6, [r8, r6] + 180: ldr r8, [sp, #580] + 184: cmp r8, r6 + 188: bcs 0x1e8 + 18c: ldr r6, [sp, #524] + 190: ldr r7, [sp, #528] + 194: cmp r7, #0 + 198: cmpeq r6, #32 + 19c: bhi 0x1e8 + 1a0: adds r6, r6, #1 + 1a4: adc r7, r7, #0 + 1a8: str r6, [sp, #524] + 1ac: str r7, [sp, #528] + 1b0: mov r6, #104 + 1b4: ldr r8, [sp, #588] + 1b8: add r6, r8, r6 + 1bc: ldr r8, [sp, #580] + 1c0: lsl r7, r8, #2 + 1c4: ldr r6, [r6, r7] + 1c8: cmp r6, #0 + 1cc: beq 0x1e8 + 1d0: mov r8, #32 + 1d4: ldr r6, [r6, r8] + 1d8: add r6, r6, #44 + 1dc: bx r6 + 1e0: mov r0, #0 ; insn 13 + 1e4: mov r1, #0 + 1e8: add sp, sp, #596 ; insn 14 + 1ec: pop {r4, r5, r6, r7, r8, sl, pc} + +For other sequences, the tail call could end up branching midway through +the following BPF instructions, or maybe off the end of the function, +leading to unknown behaviours. + +Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -949,7 +949,7 @@ static int emit_bpf_tail_call(struct jit + const u8 *tcc = bpf2a32[TCALL_CNT]; + const int idx0 = ctx->idx; + #define cur_offset (ctx->idx - idx0) +-#define jmp_offset (out_offset - (cur_offset)) ++#define jmp_offset (out_offset - (cur_offset) - 2) + u32 off, lo, hi; + + /* if (index >= array->map.max_entries) diff --git a/queue-4.14/arm-net-bpf-move-stack-documentation.patch b/queue-4.14/arm-net-bpf-move-stack-documentation.patch new file mode 100644 index 00000000000..f372e9cca0d --- /dev/null +++ b/queue-4.14/arm-net-bpf-move-stack-documentation.patch @@ -0,0 +1,77 @@ +From 70ec3a6c2c11e4b0e107a65de943a082f9aff351 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sat, 13 Jan 2018 21:26:14 +0000 +Subject: ARM: net: bpf: move stack documentation + +From: Russell King + +commit 70ec3a6c2c11e4b0e107a65de943a082f9aff351 upstream. + +Move the stack documentation towards the top of the file, where it's +relevant for things like the register layout. + +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/net/bpf_jit_32.c | 42 +++++++++++++++++++++--------------------- + 1 file changed, 21 insertions(+), 21 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -27,6 +27,27 @@ + + int bpf_jit_enable __read_mostly; + ++/* ++ * eBPF prog stack layout ++ * ++ * high ++ * original ARM_SP => +-----+ eBPF prologue ++ * |FP/LR| ++ * current ARM_FP => +-----+ ++ * | ... | callee saved registers ++ * eBPF fp register => +-----+ <= (BPF_FP) ++ * | ... | eBPF JIT scratch space ++ * | | eBPF prog stack ++ * +-----+ ++ * |RSVD | JIT scratchpad ++ * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE) ++ * | | ++ * | ... | Function call stack ++ * | | ++ * +-----+ ++ * low ++ */ ++ + #define STACK_OFFSET(k) (k) + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ + #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ +@@ -1091,27 +1112,6 @@ static void build_prologue(struct jit_ct + + u16 reg_set = 0; + +- /* +- * eBPF prog stack layout +- * +- * high +- * original ARM_SP => +-----+ eBPF prologue +- * |FP/LR| +- * current ARM_FP => +-----+ +- * | ... | callee saved registers +- * eBPF fp register => +-----+ <= (BPF_FP) +- * | ... | eBPF JIT scratch space +- * | | eBPF prog stack +- * +-----+ +- * |RSVD | JIT scratchpad +- * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) +- * | | +- * | ... | Function call stack +- * | | +- * +-----+ +- * low +- */ +- + /* Save callee saved registers. */ + reg_set |= (1<