]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 26 Jan 2018 10:36:29 +0000 (11:36 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 26 Jan 2018 10:36:29 +0000 (11:36 +0100)
added patches:
arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch
arm-net-bpf-clarify-tail_call-index.patch
arm-net-bpf-correct-stack-layout-documentation.patch
arm-net-bpf-fix-ldx-instructions.patch
arm-net-bpf-fix-register-saving.patch
arm-net-bpf-fix-stack-alignment.patch
arm-net-bpf-fix-tail-call-jumps.patch
arm-net-bpf-move-stack-documentation.patch

queue-4.14/arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch [new file with mode: 0644]
queue-4.14/arm-net-bpf-clarify-tail_call-index.patch [new file with mode: 0644]
queue-4.14/arm-net-bpf-correct-stack-layout-documentation.patch [new file with mode: 0644]
queue-4.14/arm-net-bpf-fix-ldx-instructions.patch [new file with mode: 0644]
queue-4.14/arm-net-bpf-fix-register-saving.patch [new file with mode: 0644]
queue-4.14/arm-net-bpf-fix-stack-alignment.patch [new file with mode: 0644]
queue-4.14/arm-net-bpf-fix-tail-call-jumps.patch [new file with mode: 0644]
queue-4.14/arm-net-bpf-move-stack-documentation.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch b/queue-4.14/arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch
new file mode 100644 (file)
index 0000000..e34fbba
--- /dev/null
@@ -0,0 +1,75 @@
+From e9062481824384f00299971f923fecf6b3668001 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 11:35:15 +0000
+Subject: ARM: net: bpf: avoid 'bx' instruction on non-Thumb capable CPUs
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit e9062481824384f00299971f923fecf6b3668001 upstream.
+
+Avoid the 'bx' instruction on CPUs that have no support for Thumb and
+thus do not implement this instruction by moving the generation of this
+opcode to a separate function that selects between:
+
+       bx      reg
+
+and
+
+       mov     pc, reg
+
+according to the capabilities of the CPU.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |   18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -285,16 +285,20 @@ static inline void emit_mov_i(const u8 r
+               emit_mov_i_no8m(rd, val, ctx);
+ }
+-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
++static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
+ {
+-      ctx->seen |= SEEN_CALL;
+-#if __LINUX_ARM_ARCH__ < 5
+-      emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+-
+       if (elf_hwcap & HWCAP_THUMB)
+               emit(ARM_BX(tgt_reg), ctx);
+       else
+               emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
++}
++
++static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
++{
++      ctx->seen |= SEEN_CALL;
++#if __LINUX_ARM_ARCH__ < 5
++      emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
++      emit_bx_r(tgt_reg, ctx);
+ #else
+       emit(ARM_BLX_R(tgt_reg), ctx);
+ #endif
+@@ -997,7 +1001,7 @@ static int emit_bpf_tail_call(struct jit
+       emit_a32_mov_i(tmp2[1], off, false, ctx);
+       emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
+       emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
+-      emit(ARM_BX(tmp[1]), ctx);
++      emit_bx_r(tmp[1], ctx);
+       /* out: */
+       if (out_offset == -1)
+@@ -1166,7 +1170,7 @@ static void build_epilogue(struct jit_ct
+       emit(ARM_POP(reg_set), ctx);
+       /* Return back to the callee function */
+       if (!(ctx->seen & SEEN_CALL))
+-              emit(ARM_BX(ARM_LR), ctx);
++              emit_bx_r(ARM_LR, ctx);
+ #endif
+ }
diff --git a/queue-4.14/arm-net-bpf-clarify-tail_call-index.patch b/queue-4.14/arm-net-bpf-clarify-tail_call-index.patch
new file mode 100644 (file)
index 0000000..ec5ef73
--- /dev/null
@@ -0,0 +1,32 @@
+From 091f02483df7b56615b524491f404e574c5e0668 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 12:11:26 +0000
+Subject: ARM: net: bpf: clarify tail_call index
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 091f02483df7b56615b524491f404e574c5e0668 upstream.
+
+As per 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT"), the index used
+for array lookup is defined to be 32-bit wide. Update a misleading
+comment that suggests it is 64-bit wide.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -1016,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit
+       emit_a32_mov_i(tmp[1], off, false, ctx);
+       emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+       emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
+-      /* index (64 bit) */
++      /* index is 32-bit for arrays */
+       emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+       /* index >= array->map.max_entries */
+       emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
diff --git a/queue-4.14/arm-net-bpf-correct-stack-layout-documentation.patch b/queue-4.14/arm-net-bpf-correct-stack-layout-documentation.patch
new file mode 100644 (file)
index 0000000..5b8a9a1
--- /dev/null
@@ -0,0 +1,74 @@
+From 0005e55a79cfda88199e41a406a829c88d708c67 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 22:51:27 +0000
+Subject: ARM: net: bpf: correct stack layout documentation
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 0005e55a79cfda88199e41a406a829c88d708c67 upstream.
+
+The stack layout documentation incorrectly suggests that the BPF JIT
+scratch space starts immediately below BPF_FP. This is not correct,
+so let's fix the documentation to reflect reality.
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |   35 +++++++++++++++++++++++++++--------
+ 1 file changed, 27 insertions(+), 8 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -28,24 +28,43 @@
+ int bpf_jit_enable __read_mostly;
+ /*
+- * eBPF prog stack layout
++ * eBPF prog stack layout:
+  *
+  *                         high
+- * original ARM_SP =>     +-----+ eBPF prologue
+- *                        |FP/LR|
+- * current ARM_FP =>      +-----+
+- *                        | ... | callee saved registers
+- * eBPF fp register =>    +-----+ <= (BPF_FP)
++ * original ARM_SP =>     +-----+
++ *                        |     | callee saved registers
++ *                        +-----+ <= (BPF_FP + SCRATCH_SIZE)
+  *                        | ... | eBPF JIT scratch space
+- *                        |     | eBPF prog stack
++ * eBPF fp register =>    +-----+
++ *   (BPF_FP)             | ... | eBPF prog stack
+  *                        +-----+
+  *                        |RSVD | JIT scratchpad
+- * current ARM_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
++ * current ARM_SP =>      +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+  *                        |     |
+  *                        | ... | Function call stack
+  *                        |     |
+  *                        +-----+
+  *                          low
++ *
++ * The callee saved registers depends on whether frame pointers are enabled.
++ * With frame pointers (to be compliant with the ABI):
++ *
++ *                                high
++ * original ARM_SP =>     +------------------+ \
++ *                        |        pc        | |
++ * current ARM_FP =>      +------------------+ } callee saved registers
++ *                        |r4-r8,r10,fp,ip,lr| |
++ *                        +------------------+ /
++ *                                low
++ *
++ * Without frame pointers:
++ *
++ *                                high
++ * original ARM_SP =>     +------------------+
++ *                        |        lr        | (optional)
++ *                        |     r4-r8,r10    | callee saved registers
++ *                        +------------------+
++ *                                low
+  */
+ #define STACK_OFFSET(k)       (k)
diff --git a/queue-4.14/arm-net-bpf-fix-ldx-instructions.patch b/queue-4.14/arm-net-bpf-fix-ldx-instructions.patch
new file mode 100644 (file)
index 0000000..f4e8af3
--- /dev/null
@@ -0,0 +1,115 @@
+From ec19e02b343db991d2d1610c409efefebf4e2ca9 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 21:06:16 +0000
+Subject: ARM: net: bpf: fix LDX instructions
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit ec19e02b343db991d2d1610c409efefebf4e2ca9 upstream.
+
+When the source and destination register are identical, our JIT does not
+generate correct code, which leads to kernel oopses.
+
+Fix this by (a) generating more efficient code, and (b) making use of
+the temporary earlier if we will overwrite the address register.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |   61 ++++++++++++++++++++++++----------------------
+ 1 file changed, 33 insertions(+), 28 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -913,33 +913,53 @@ static inline void emit_str_r(const u8 d
+ }
+ /* dst = *(size*)(src + off) */
+-static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
+-                            const s32 off, struct jit_ctx *ctx, const u8 sz){
++static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
++                            s32 off, struct jit_ctx *ctx, const u8 sz){
+       const u8 *tmp = bpf2a32[TMP_REG_1];
+-      u8 rd = dstk ? tmp[1] : dst;
++      const u8 *rd = dstk ? tmp : dst;
+       u8 rm = src;
++      s32 off_max;
+-      if (off) {
++      if (sz == BPF_H)
++              off_max = 0xff;
++      else
++              off_max = 0xfff;
++
++      if (off < 0 || off > off_max) {
+               emit_a32_mov_i(tmp[0], off, false, ctx);
+               emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+               rm = tmp[0];
++              off = 0;
++      } else if (rd[1] == rm) {
++              emit(ARM_MOV_R(tmp[0], rm), ctx);
++              rm = tmp[0];
+       }
+       switch (sz) {
+-      case BPF_W:
+-              /* Load a Word */
+-              emit(ARM_LDR_I(rd, rm, 0), ctx);
++      case BPF_B:
++              /* Load a Byte */
++              emit(ARM_LDRB_I(rd[1], rm, off), ctx);
++              emit_a32_mov_i(dst[0], 0, dstk, ctx);
+               break;
+       case BPF_H:
+               /* Load a HalfWord */
+-              emit(ARM_LDRH_I(rd, rm, 0), ctx);
++              emit(ARM_LDRH_I(rd[1], rm, off), ctx);
++              emit_a32_mov_i(dst[0], 0, dstk, ctx);
+               break;
+-      case BPF_B:
+-              /* Load a Byte */
+-              emit(ARM_LDRB_I(rd, rm, 0), ctx);
++      case BPF_W:
++              /* Load a Word */
++              emit(ARM_LDR_I(rd[1], rm, off), ctx);
++              emit_a32_mov_i(dst[0], 0, dstk, ctx);
++              break;
++      case BPF_DW:
++              /* Load a Double Word */
++              emit(ARM_LDR_I(rd[1], rm, off), ctx);
++              emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
+               break;
+       }
+       if (dstk)
+-              emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
++              emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
++      if (dstk && sz == BPF_DW)
++              emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
+ }
+ /* Arithmatic Operation */
+@@ -1440,22 +1460,7 @@ exit:
+               rn = sstk ? tmp2[1] : src_lo;
+               if (sstk)
+                       emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+-              switch (BPF_SIZE(code)) {
+-              case BPF_W:
+-                      /* Load a Word */
+-              case BPF_H:
+-                      /* Load a Half-Word */
+-              case BPF_B:
+-                      /* Load a Byte */
+-                      emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
+-                      emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+-                      break;
+-              case BPF_DW:
+-                      /* Load a double word */
+-                      emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+-                      emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
+-                      break;
+-              }
++              emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
+               break;
+       /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
+       case BPF_LD | BPF_ABS | BPF_W:
diff --git a/queue-4.14/arm-net-bpf-fix-register-saving.patch b/queue-4.14/arm-net-bpf-fix-register-saving.patch
new file mode 100644 (file)
index 0000000..27bcf09
--- /dev/null
@@ -0,0 +1,222 @@
+From 02088d9b392f605c892894b46aa8c83e3abd0115 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 22:38:18 +0000
+Subject: ARM: net: bpf: fix register saving
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 02088d9b392f605c892894b46aa8c83e3abd0115 upstream.
+
+When an eBPF program tail-calls another eBPF program, it enters it after
+the prologue to avoid having complex stack manipulations.  This can lead
+to kernel oopses, and similar.
+
+Resolve this by always using a fixed stack layout, a CPU register frame
+pointer, and using this when reloading registers before returning.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |   80 ++++++++++++----------------------------------
+ 1 file changed, 22 insertions(+), 58 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -61,20 +61,24 @@ int bpf_jit_enable __read_mostly;
+  *
+  *                                high
+  * original ARM_SP =>     +------------------+
+- *                        |        lr        | (optional)
+- *                        |     r4-r8,r10    | callee saved registers
+- *                        +------------------+
++ *                        | r4-r8,r10,fp,lr  | callee saved registers
++ * current ARM_FP =>      +------------------+
+  *                                low
++ *
++ * When popping registers off the stack at the end of a BPF function, we
++ * reference them via the current ARM_FP register.
+  */
++#define CALLEE_MASK   (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
++                       1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
++                       1 << ARM_FP)
++#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
++#define CALLEE_POP_MASK  (CALLEE_MASK | 1 << ARM_PC)
+ #define STACK_OFFSET(k)       (k)
+ #define TMP_REG_1     (MAX_BPF_JIT_REG + 0)   /* TEMP Register 1 */
+ #define TMP_REG_2     (MAX_BPF_JIT_REG + 1)   /* TEMP Register 2 */
+ #define TCALL_CNT     (MAX_BPF_JIT_REG + 2)   /* Tail Call Count */
+-/* Flags used for JIT optimization */
+-#define SEEN_CALL     (1 << 0)
+-
+ #define FLAG_IMM_OVERFLOW     (1 << 0)
+ /*
+@@ -135,7 +139,6 @@ static const u8 bpf2a32[][2] = {
+  * idx                        :       index of current last JITed instruction.
+  * prologue_bytes     :       bytes used in prologue.
+  * epilogue_offset    :       offset of epilogue starting.
+- * seen                       :       bit mask used for JIT optimization.
+  * offsets            :       array of eBPF instruction offsets in
+  *                            JITed code.
+  * target             :       final JITed code.
+@@ -150,7 +153,6 @@ struct jit_ctx {
+       unsigned int idx;
+       unsigned int prologue_bytes;
+       unsigned int epilogue_offset;
+-      u32 seen;
+       u32 flags;
+       u32 *offsets;
+       u32 *target;
+@@ -340,7 +342,6 @@ static void emit_bx_r(u8 tgt_reg, struct
+ static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+ {
+-      ctx->seen |= SEEN_CALL;
+ #if __LINUX_ARM_ARCH__ < 5
+       emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+       emit_bx_r(tgt_reg, ctx);
+@@ -403,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u
+       }
+       /* Call appropriate function */
+-      ctx->seen |= SEEN_CALL;
+       emit_mov_i(ARM_IP, op == BPF_DIV ?
+                  (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+       emit_blx_r(ARM_IP, ctx);
+@@ -669,8 +669,6 @@ static inline void emit_a32_lsh_r64(cons
+       /* Do LSH operation */
+       emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
+       emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
+-      /* As we are using ARM_LR */
+-      ctx->seen |= SEEN_CALL;
+       emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
+       emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
+@@ -705,8 +703,6 @@ static inline void emit_a32_arsh_r64(con
+       /* Do the ARSH operation */
+       emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+       emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+-      /* As we are using ARM_LR */
+-      ctx->seen |= SEEN_CALL;
+       emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+       _emit(ARM_COND_MI, ARM_B(0), ctx);
+@@ -741,8 +737,6 @@ static inline void emit_a32_lsr_r64(cons
+       /* Do LSH operation */
+       emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+       emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+-      /* As we are using ARM_LR */
+-      ctx->seen |= SEEN_CALL;
+       emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+       emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
+@@ -877,8 +871,6 @@ static inline void emit_a32_mul_r64(cons
+       /* Do Multiplication */
+       emit(ARM_MUL(ARM_IP, rd, rn), ctx);
+       emit(ARM_MUL(ARM_LR, rm, rt), ctx);
+-      /* As we are using ARM_LR */
+-      ctx->seen |= SEEN_CALL;
+       emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
+       emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
+@@ -955,7 +947,6 @@ static inline void emit_ar_r(const u8 rd
+                            const u8 rn, struct jit_ctx *ctx, u8 op) {
+       switch (op) {
+       case BPF_JSET:
+-              ctx->seen |= SEEN_CALL;
+               emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+               emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+               emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+@@ -1119,33 +1110,22 @@ static void build_prologue(struct jit_ct
+       const u8 r2 = bpf2a32[BPF_REG_1][1];
+       const u8 r3 = bpf2a32[BPF_REG_1][0];
+       const u8 r4 = bpf2a32[BPF_REG_6][1];
+-      const u8 r5 = bpf2a32[BPF_REG_6][0];
+-      const u8 r6 = bpf2a32[TMP_REG_1][1];
+-      const u8 r7 = bpf2a32[TMP_REG_1][0];
+-      const u8 r8 = bpf2a32[TMP_REG_2][1];
+-      const u8 r10 = bpf2a32[TMP_REG_2][0];
+       const u8 fplo = bpf2a32[BPF_REG_FP][1];
+       const u8 fphi = bpf2a32[BPF_REG_FP][0];
+-      const u8 sp = ARM_SP;
+       const u8 *tcc = bpf2a32[TCALL_CNT];
+-      u16 reg_set = 0;
+-
+       /* Save callee saved registers. */
+-      reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+ #ifdef CONFIG_FRAME_POINTER
+-      reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
+-      emit(ARM_MOV_R(ARM_IP, sp), ctx);
++      u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
++      emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
+       emit(ARM_PUSH(reg_set), ctx);
+       emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
+ #else
+-      /* Check if call instruction exists in BPF body */
+-      if (ctx->seen & SEEN_CALL)
+-              reg_set |= (1<<ARM_LR);
+-      emit(ARM_PUSH(reg_set), ctx);
++      emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
++      emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
+ #endif
+       /* Save frame pointer for later */
+-      emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
++      emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
+       ctx->stack_size = imm8m(STACK_SIZE);
+@@ -1168,33 +1148,19 @@ static void build_prologue(struct jit_ct
+       /* end of prologue */
+ }
++/* restore callee saved registers. */
+ static void build_epilogue(struct jit_ctx *ctx)
+ {
+-      const u8 r4 = bpf2a32[BPF_REG_6][1];
+-      const u8 r5 = bpf2a32[BPF_REG_6][0];
+-      const u8 r6 = bpf2a32[TMP_REG_1][1];
+-      const u8 r7 = bpf2a32[TMP_REG_1][0];
+-      const u8 r8 = bpf2a32[TMP_REG_2][1];
+-      const u8 r10 = bpf2a32[TMP_REG_2][0];
+-      u16 reg_set = 0;
+-
+-      /* unwind function call stack */
+-      emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
+-
+-      /* restore callee saved registers. */
+-      reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+ #ifdef CONFIG_FRAME_POINTER
+-      /* the first instruction of the prologue was: mov ip, sp */
+-      reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
++      /* When using frame pointers, some additional registers need to
++       * be loaded. */
++      u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
++      emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
+       emit(ARM_LDM(ARM_SP, reg_set), ctx);
+ #else
+-      if (ctx->seen & SEEN_CALL)
+-              reg_set |= (1<<ARM_PC);
+       /* Restore callee saved registers. */
+-      emit(ARM_POP(reg_set), ctx);
+-      /* Return back to the callee function */
+-      if (!(ctx->seen & SEEN_CALL))
+-              emit_bx_r(ARM_LR, ctx);
++      emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
++      emit(ARM_POP(CALLEE_POP_MASK), ctx);
+ #endif
+ }
+@@ -1422,8 +1388,6 @@ static int build_insn(const struct bpf_i
+                       emit_rev32(rt, rt, ctx);
+                       goto emit_bswap_uxt;
+               case 64:
+-                      /* Because of the usage of ARM_LR */
+-                      ctx->seen |= SEEN_CALL;
+                       emit_rev32(ARM_LR, rt, ctx);
+                       emit_rev32(rt, rd, ctx);
+                       emit(ARM_MOV_R(rd, ARM_LR), ctx);
diff --git a/queue-4.14/arm-net-bpf-fix-stack-alignment.patch b/queue-4.14/arm-net-bpf-fix-stack-alignment.patch
new file mode 100644 (file)
index 0000000..206528e
--- /dev/null
@@ -0,0 +1,49 @@
+From d1220efd23484c72c82d5471f05daeb35b5d1916 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 16:10:07 +0000
+Subject: ARM: net: bpf: fix stack alignment
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit d1220efd23484c72c82d5471f05daeb35b5d1916 upstream.
+
+As per 2dede2d8e925 ("ARM EABI: stack pointer must be 64-bit aligned
+after a CPU exception") the stack should be aligned to a 64-bit boundary
+on EABI systems.  Ensure that the eBPF JIT appropraitely aligns the
+stack.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -179,8 +179,13 @@ static void jit_fill_hole(void *area, un
+               *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
+ }
+-/* Stack must be multiples of 16 Bytes */
+-#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
++#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
++/* EABI requires the stack to be aligned to 64-bit boundaries */
++#define STACK_ALIGNMENT       8
++#else
++/* Stack must be aligned to 32-bit boundaries */
++#define STACK_ALIGNMENT       4
++#endif
+ /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+  * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+@@ -194,7 +199,7 @@ static void jit_fill_hole(void *area, un
+        + SCRATCH_SIZE + \
+        + 4 /* extra for skb_copy_bits buffer */)
+-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
++#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+ /* Get the offset of eBPF REGISTERs stored on scratch space. */
+ #define STACK_VAR(off) (STACK_SIZE-off-4)
diff --git a/queue-4.14/arm-net-bpf-fix-tail-call-jumps.patch b/queue-4.14/arm-net-bpf-fix-tail-call-jumps.patch
new file mode 100644 (file)
index 0000000..3f90e85
--- /dev/null
@@ -0,0 +1,75 @@
+From f4483f2cc1fdc03488c8a1452e545545ae5bda93 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 11:39:54 +0000
+Subject: ARM: net: bpf: fix tail call jumps
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit f4483f2cc1fdc03488c8a1452e545545ae5bda93 upstream.
+
+When a tail call fails, it is documented that the tail call should
+continue execution at the following instruction.  An example tail call
+sequence is:
+
+  12: (85) call bpf_tail_call#12
+  13: (b7) r0 = 0
+  14: (95) exit
+
+The ARM assembler for the tail call in this case ends up branching to
+instruction 14 instead of instruction 13, resulting in the BPF filter
+returning a non-zero value:
+
+  178: ldr     r8, [sp, #588]  ; insn 12
+  17c: ldr     r6, [r8, r6]
+  180: ldr     r8, [sp, #580]
+  184: cmp     r8, r6
+  188: bcs     0x1e8
+  18c: ldr     r6, [sp, #524]
+  190: ldr     r7, [sp, #528]
+  194: cmp     r7, #0
+  198: cmpeq   r6, #32
+  19c: bhi     0x1e8
+  1a0: adds    r6, r6, #1
+  1a4: adc     r7, r7, #0
+  1a8: str     r6, [sp, #524]
+  1ac: str     r7, [sp, #528]
+  1b0: mov     r6, #104
+  1b4: ldr     r8, [sp, #588]
+  1b8: add     r6, r8, r6
+  1bc: ldr     r8, [sp, #580]
+  1c0: lsl     r7, r8, #2
+  1c4: ldr     r6, [r6, r7]
+  1c8: cmp     r6, #0
+  1cc: beq     0x1e8
+  1d0: mov     r8, #32
+  1d4: ldr     r6, [r6, r8]
+  1d8: add     r6, r6, #44
+  1dc: bx      r6
+  1e0: mov     r0, #0          ; insn 13
+  1e4: mov     r1, #0
+  1e8: add     sp, sp, #596    ; insn 14
+  1ec: pop     {r4, r5, r6, r7, r8, sl, pc}
+
+For other sequences, the tail call could end up branching midway through
+the following BPF instructions, or maybe off the end of the function,
+leading to unknown behaviours.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -949,7 +949,7 @@ static int emit_bpf_tail_call(struct jit
+       const u8 *tcc = bpf2a32[TCALL_CNT];
+       const int idx0 = ctx->idx;
+ #define cur_offset (ctx->idx - idx0)
+-#define jmp_offset (out_offset - (cur_offset))
++#define jmp_offset (out_offset - (cur_offset) - 2)
+       u32 off, lo, hi;
+       /* if (index >= array->map.max_entries)
diff --git a/queue-4.14/arm-net-bpf-move-stack-documentation.patch b/queue-4.14/arm-net-bpf-move-stack-documentation.patch
new file mode 100644 (file)
index 0000000..f372e9c
--- /dev/null
@@ -0,0 +1,77 @@
+From 70ec3a6c2c11e4b0e107a65de943a082f9aff351 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 21:26:14 +0000
+Subject: ARM: net: bpf: move stack documentation
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 70ec3a6c2c11e4b0e107a65de943a082f9aff351 upstream.
+
+Move the stack documentation towards the top of the file, where it's
+relevant for things like the register layout.
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c |   42 +++++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -27,6 +27,27 @@
+ int bpf_jit_enable __read_mostly;
++/*
++ * eBPF prog stack layout
++ *
++ *                         high
++ * original ARM_SP =>     +-----+ eBPF prologue
++ *                        |FP/LR|
++ * current ARM_FP =>      +-----+
++ *                        | ... | callee saved registers
++ * eBPF fp register =>    +-----+ <= (BPF_FP)
++ *                        | ... | eBPF JIT scratch space
++ *                        |     | eBPF prog stack
++ *                        +-----+
++ *                        |RSVD | JIT scratchpad
++ * current ARM_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
++ *                        |     |
++ *                        | ... | Function call stack
++ *                        |     |
++ *                        +-----+
++ *                          low
++ */
++
+ #define STACK_OFFSET(k)       (k)
+ #define TMP_REG_1     (MAX_BPF_JIT_REG + 0)   /* TEMP Register 1 */
+ #define TMP_REG_2     (MAX_BPF_JIT_REG + 1)   /* TEMP Register 2 */
+@@ -1091,27 +1112,6 @@ static void build_prologue(struct jit_ct
+       u16 reg_set = 0;
+-      /*
+-       * eBPF prog stack layout
+-       *
+-       *                         high
+-       * original ARM_SP =>     +-----+ eBPF prologue
+-       *                        |FP/LR|
+-       * current ARM_FP =>      +-----+
+-       *                        | ... | callee saved registers
+-       * eBPF fp register =>    +-----+ <= (BPF_FP)
+-       *                        | ... | eBPF JIT scratch space
+-       *                        |     | eBPF prog stack
+-       *                        +-----+
+-       *                        |RSVD | JIT scratchpad
+-       * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
+-       *                        |     |
+-       *                        | ... | Function call stack
+-       *                        |     |
+-       *                        +-----+
+-       *                          low
+-       */
+-
+       /* Save callee saved registers. */
+       reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+ #ifdef CONFIG_FRAME_POINTER
index befeface692b80143a780a15439f1f9c5cf205d6..97850044744cd8ed85684434210b89a23609788f 100644 (file)
@@ -11,3 +11,11 @@ input-trackpoint-only-expose-supported-controls-for-elan-alps-and-nxp.patch
 btrfs-fix-stale-entries-in-readdir.patch
 kvm-s390-add-proper-locking-for-cmma-migration-bitmap.patch
 orangefs-fix-deadlock-do-not-write-i_size-in-read_iter.patch
+arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch
+arm-net-bpf-fix-tail-call-jumps.patch
+arm-net-bpf-fix-stack-alignment.patch
+arm-net-bpf-move-stack-documentation.patch
+arm-net-bpf-correct-stack-layout-documentation.patch
+arm-net-bpf-fix-register-saving.patch
+arm-net-bpf-fix-ldx-instructions.patch
+arm-net-bpf-clarify-tail_call-index.patch