--- /dev/null
+From e9062481824384f00299971f923fecf6b3668001 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 11:35:15 +0000
+Subject: ARM: net: bpf: avoid 'bx' instruction on non-Thumb capable CPUs
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit e9062481824384f00299971f923fecf6b3668001 upstream.
+
+Avoid the 'bx' instruction on CPUs that have no support for Thumb and
+thus do not implement this instruction by moving the generation of this
+opcode to a separate function that selects between:
+
+ bx reg
+
+and
+
+ mov pc, reg
+
+according to the capabilities of the CPU.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -285,16 +285,20 @@ static inline void emit_mov_i(const u8 r
+ emit_mov_i_no8m(rd, val, ctx);
+ }
+
+-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
++static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
+ {
+- ctx->seen |= SEEN_CALL;
+-#if __LINUX_ARM_ARCH__ < 5
+- emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+-
+ if (elf_hwcap & HWCAP_THUMB)
+ emit(ARM_BX(tgt_reg), ctx);
+ else
+ emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
++}
++
++static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
++{
++ ctx->seen |= SEEN_CALL;
++#if __LINUX_ARM_ARCH__ < 5
++ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
++ emit_bx_r(tgt_reg, ctx);
+ #else
+ emit(ARM_BLX_R(tgt_reg), ctx);
+ #endif
+@@ -997,7 +1001,7 @@ static int emit_bpf_tail_call(struct jit
+ emit_a32_mov_i(tmp2[1], off, false, ctx);
+ emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
+ emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
+- emit(ARM_BX(tmp[1]), ctx);
++ emit_bx_r(tmp[1], ctx);
+
+ /* out: */
+ if (out_offset == -1)
+@@ -1166,7 +1170,7 @@ static void build_epilogue(struct jit_ct
+ emit(ARM_POP(reg_set), ctx);
+ /* Return back to the callee function */
+ if (!(ctx->seen & SEEN_CALL))
+- emit(ARM_BX(ARM_LR), ctx);
++ emit_bx_r(ARM_LR, ctx);
+ #endif
+ }
+
--- /dev/null
+From 091f02483df7b56615b524491f404e574c5e0668 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 12:11:26 +0000
+Subject: ARM: net: bpf: clarify tail_call index
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 091f02483df7b56615b524491f404e574c5e0668 upstream.
+
+As per 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT"), the index used
+for array lookup is defined to be 32-bit wide. Update a misleading
+comment that suggests it is 64-bit wide.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -1016,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit
+ emit_a32_mov_i(tmp[1], off, false, ctx);
+ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
+ emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
+- /* index (64 bit) */
++ /* index is 32-bit for arrays */
+ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
+ /* index >= array->map.max_entries */
+ emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
--- /dev/null
+From 0005e55a79cfda88199e41a406a829c88d708c67 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 22:51:27 +0000
+Subject: ARM: net: bpf: correct stack layout documentation
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 0005e55a79cfda88199e41a406a829c88d708c67 upstream.
+
+The stack layout documentation incorrectly suggests that the BPF JIT
+scratch space starts immediately below BPF_FP. This is not correct,
+so let's fix the documentation to reflect reality.
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 35 +++++++++++++++++++++++++++--------
+ 1 file changed, 27 insertions(+), 8 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -28,24 +28,43 @@
+ int bpf_jit_enable __read_mostly;
+
+ /*
+- * eBPF prog stack layout
++ * eBPF prog stack layout:
+ *
+ * high
+- * original ARM_SP => +-----+ eBPF prologue
+- * |FP/LR|
+- * current ARM_FP => +-----+
+- * | ... | callee saved registers
+- * eBPF fp register => +-----+ <= (BPF_FP)
++ * original ARM_SP => +-----+
++ * | | callee saved registers
++ * +-----+ <= (BPF_FP + SCRATCH_SIZE)
+ * | ... | eBPF JIT scratch space
+- * | | eBPF prog stack
++ * eBPF fp register => +-----+
++ * (BPF_FP) | ... | eBPF prog stack
+ * +-----+
+ * |RSVD | JIT scratchpad
+- * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE)
++ * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+ * | |
+ * | ... | Function call stack
+ * | |
+ * +-----+
+ * low
++ *
++ * The callee saved registers depends on whether frame pointers are enabled.
++ * With frame pointers (to be compliant with the ABI):
++ *
++ * high
++ * original ARM_SP => +------------------+ \
++ * | pc | |
++ * current ARM_FP => +------------------+ } callee saved registers
++ * |r4-r8,r10,fp,ip,lr| |
++ * +------------------+ /
++ * low
++ *
++ * Without frame pointers:
++ *
++ * high
++ * original ARM_SP => +------------------+
++ * | lr | (optional)
++ * | r4-r8,r10 | callee saved registers
++ * +------------------+
++ * low
+ */
+
+ #define STACK_OFFSET(k) (k)
--- /dev/null
+From ec19e02b343db991d2d1610c409efefebf4e2ca9 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 21:06:16 +0000
+Subject: ARM: net: bpf: fix LDX instructions
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit ec19e02b343db991d2d1610c409efefebf4e2ca9 upstream.
+
+When the source and destination register are identical, our JIT does not
+generate correct code, which leads to kernel oopses.
+
+Fix this by (a) generating more efficient code, and (b) making use of
+the temporary earlier if we will overwrite the address register.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 61 ++++++++++++++++++++++++----------------------
+ 1 file changed, 33 insertions(+), 28 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -913,33 +913,53 @@ static inline void emit_str_r(const u8 d
+ }
+
+ /* dst = *(size*)(src + off) */
+-static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
+- const s32 off, struct jit_ctx *ctx, const u8 sz){
++static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
++ s32 off, struct jit_ctx *ctx, const u8 sz){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+- u8 rd = dstk ? tmp[1] : dst;
++ const u8 *rd = dstk ? tmp : dst;
+ u8 rm = src;
++ s32 off_max;
+
+- if (off) {
++ if (sz == BPF_H)
++ off_max = 0xff;
++ else
++ off_max = 0xfff;
++
++ if (off < 0 || off > off_max) {
+ emit_a32_mov_i(tmp[0], off, false, ctx);
+ emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
+ rm = tmp[0];
++ off = 0;
++ } else if (rd[1] == rm) {
++ emit(ARM_MOV_R(tmp[0], rm), ctx);
++ rm = tmp[0];
+ }
+ switch (sz) {
+- case BPF_W:
+- /* Load a Word */
+- emit(ARM_LDR_I(rd, rm, 0), ctx);
++ case BPF_B:
++ /* Load a Byte */
++ emit(ARM_LDRB_I(rd[1], rm, off), ctx);
++ emit_a32_mov_i(dst[0], 0, dstk, ctx);
+ break;
+ case BPF_H:
+ /* Load a HalfWord */
+- emit(ARM_LDRH_I(rd, rm, 0), ctx);
++ emit(ARM_LDRH_I(rd[1], rm, off), ctx);
++ emit_a32_mov_i(dst[0], 0, dstk, ctx);
+ break;
+- case BPF_B:
+- /* Load a Byte */
+- emit(ARM_LDRB_I(rd, rm, 0), ctx);
++ case BPF_W:
++ /* Load a Word */
++ emit(ARM_LDR_I(rd[1], rm, off), ctx);
++ emit_a32_mov_i(dst[0], 0, dstk, ctx);
++ break;
++ case BPF_DW:
++ /* Load a Double Word */
++ emit(ARM_LDR_I(rd[1], rm, off), ctx);
++ emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
+ break;
+ }
+ if (dstk)
+- emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
++ emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
++ if (dstk && sz == BPF_DW)
++ emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
+ }
+
+ /* Arithmatic Operation */
+@@ -1440,22 +1460,7 @@ exit:
+ rn = sstk ? tmp2[1] : src_lo;
+ if (sstk)
+ emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
+- switch (BPF_SIZE(code)) {
+- case BPF_W:
+- /* Load a Word */
+- case BPF_H:
+- /* Load a Half-Word */
+- case BPF_B:
+- /* Load a Byte */
+- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
+- emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+- break;
+- case BPF_DW:
+- /* Load a double word */
+- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
+- emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
+- break;
+- }
++ emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
+ break;
+ /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
+ case BPF_LD | BPF_ABS | BPF_W:
--- /dev/null
+From 02088d9b392f605c892894b46aa8c83e3abd0115 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 22:38:18 +0000
+Subject: ARM: net: bpf: fix register saving
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 02088d9b392f605c892894b46aa8c83e3abd0115 upstream.
+
+When an eBPF program tail-calls another eBPF program, it enters it after
+the prologue to avoid having complex stack manipulations. This can lead
+to kernel oopses, and similar.
+
+Resolve this by always using a fixed stack layout, a CPU register frame
+pointer, and using this when reloading registers before returning.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 80 ++++++++++++----------------------------------
+ 1 file changed, 22 insertions(+), 58 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -61,20 +61,24 @@ int bpf_jit_enable __read_mostly;
+ *
+ * high
+ * original ARM_SP => +------------------+
+- * | lr | (optional)
+- * | r4-r8,r10 | callee saved registers
+- * +------------------+
++ * | r4-r8,r10,fp,lr | callee saved registers
++ * current ARM_FP => +------------------+
+ * low
++ *
++ * When popping registers off the stack at the end of a BPF function, we
++ * reference them via the current ARM_FP register.
+ */
++#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
++ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
++ 1 << ARM_FP)
++#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
++#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
+
+ #define STACK_OFFSET(k) (k)
+ #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
+ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
+ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
+
+-/* Flags used for JIT optimization */
+-#define SEEN_CALL (1 << 0)
+-
+ #define FLAG_IMM_OVERFLOW (1 << 0)
+
+ /*
+@@ -135,7 +139,6 @@ static const u8 bpf2a32[][2] = {
+ * idx : index of current last JITed instruction.
+ * prologue_bytes : bytes used in prologue.
+ * epilogue_offset : offset of epilogue starting.
+- * seen : bit mask used for JIT optimization.
+ * offsets : array of eBPF instruction offsets in
+ * JITed code.
+ * target : final JITed code.
+@@ -150,7 +153,6 @@ struct jit_ctx {
+ unsigned int idx;
+ unsigned int prologue_bytes;
+ unsigned int epilogue_offset;
+- u32 seen;
+ u32 flags;
+ u32 *offsets;
+ u32 *target;
+@@ -340,7 +342,6 @@ static void emit_bx_r(u8 tgt_reg, struct
+
+ static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+ {
+- ctx->seen |= SEEN_CALL;
+ #if __LINUX_ARM_ARCH__ < 5
+ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+ emit_bx_r(tgt_reg, ctx);
+@@ -403,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u
+ }
+
+ /* Call appropriate function */
+- ctx->seen |= SEEN_CALL;
+ emit_mov_i(ARM_IP, op == BPF_DIV ?
+ (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+ emit_blx_r(ARM_IP, ctx);
+@@ -669,8 +669,6 @@ static inline void emit_a32_lsh_r64(cons
+ /* Do LSH operation */
+ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
+- /* As we are using ARM_LR */
+- ctx->seen |= SEEN_CALL;
+ emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
+ emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
+@@ -705,8 +703,6 @@ static inline void emit_a32_arsh_r64(con
+ /* Do the ARSH operation */
+ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+- /* As we are using ARM_LR */
+- ctx->seen |= SEEN_CALL;
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+ _emit(ARM_COND_MI, ARM_B(0), ctx);
+@@ -741,8 +737,6 @@ static inline void emit_a32_lsr_r64(cons
+ /* Do LSH operation */
+ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+- /* As we are using ARM_LR */
+- ctx->seen |= SEEN_CALL;
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
+@@ -877,8 +871,6 @@ static inline void emit_a32_mul_r64(cons
+ /* Do Multiplication */
+ emit(ARM_MUL(ARM_IP, rd, rn), ctx);
+ emit(ARM_MUL(ARM_LR, rm, rt), ctx);
+- /* As we are using ARM_LR */
+- ctx->seen |= SEEN_CALL;
+ emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
+
+ emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
+@@ -955,7 +947,6 @@ static inline void emit_ar_r(const u8 rd
+ const u8 rn, struct jit_ctx *ctx, u8 op) {
+ switch (op) {
+ case BPF_JSET:
+- ctx->seen |= SEEN_CALL;
+ emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
+ emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
+ emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
+@@ -1119,33 +1110,22 @@ static void build_prologue(struct jit_ct
+ const u8 r2 = bpf2a32[BPF_REG_1][1];
+ const u8 r3 = bpf2a32[BPF_REG_1][0];
+ const u8 r4 = bpf2a32[BPF_REG_6][1];
+- const u8 r5 = bpf2a32[BPF_REG_6][0];
+- const u8 r6 = bpf2a32[TMP_REG_1][1];
+- const u8 r7 = bpf2a32[TMP_REG_1][0];
+- const u8 r8 = bpf2a32[TMP_REG_2][1];
+- const u8 r10 = bpf2a32[TMP_REG_2][0];
+ const u8 fplo = bpf2a32[BPF_REG_FP][1];
+ const u8 fphi = bpf2a32[BPF_REG_FP][0];
+- const u8 sp = ARM_SP;
+ const u8 *tcc = bpf2a32[TCALL_CNT];
+
+- u16 reg_set = 0;
+-
+ /* Save callee saved registers. */
+- reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+ #ifdef CONFIG_FRAME_POINTER
+- reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
+- emit(ARM_MOV_R(ARM_IP, sp), ctx);
++ u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
++ emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
+ emit(ARM_PUSH(reg_set), ctx);
+ emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
+ #else
+- /* Check if call instruction exists in BPF body */
+- if (ctx->seen & SEEN_CALL)
+- reg_set |= (1<<ARM_LR);
+- emit(ARM_PUSH(reg_set), ctx);
++ emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
++ emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
+ #endif
+ /* Save frame pointer for later */
+- emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
++ emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
+
+ ctx->stack_size = imm8m(STACK_SIZE);
+
+@@ -1168,33 +1148,19 @@ static void build_prologue(struct jit_ct
+ /* end of prologue */
+ }
+
++/* restore callee saved registers. */
+ static void build_epilogue(struct jit_ctx *ctx)
+ {
+- const u8 r4 = bpf2a32[BPF_REG_6][1];
+- const u8 r5 = bpf2a32[BPF_REG_6][0];
+- const u8 r6 = bpf2a32[TMP_REG_1][1];
+- const u8 r7 = bpf2a32[TMP_REG_1][0];
+- const u8 r8 = bpf2a32[TMP_REG_2][1];
+- const u8 r10 = bpf2a32[TMP_REG_2][0];
+- u16 reg_set = 0;
+-
+- /* unwind function call stack */
+- emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
+-
+- /* restore callee saved registers. */
+- reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+ #ifdef CONFIG_FRAME_POINTER
+- /* the first instruction of the prologue was: mov ip, sp */
+- reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
++ /* When using frame pointers, some additional registers need to
++ * be loaded. */
++ u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
++ emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
+ emit(ARM_LDM(ARM_SP, reg_set), ctx);
+ #else
+- if (ctx->seen & SEEN_CALL)
+- reg_set |= (1<<ARM_PC);
+ /* Restore callee saved registers. */
+- emit(ARM_POP(reg_set), ctx);
+- /* Return back to the callee function */
+- if (!(ctx->seen & SEEN_CALL))
+- emit_bx_r(ARM_LR, ctx);
++ emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
++ emit(ARM_POP(CALLEE_POP_MASK), ctx);
+ #endif
+ }
+
+@@ -1422,8 +1388,6 @@ static int build_insn(const struct bpf_i
+ emit_rev32(rt, rt, ctx);
+ goto emit_bswap_uxt;
+ case 64:
+- /* Because of the usage of ARM_LR */
+- ctx->seen |= SEEN_CALL;
+ emit_rev32(ARM_LR, rt, ctx);
+ emit_rev32(rt, rd, ctx);
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
--- /dev/null
+From d1220efd23484c72c82d5471f05daeb35b5d1916 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 16:10:07 +0000
+Subject: ARM: net: bpf: fix stack alignment
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit d1220efd23484c72c82d5471f05daeb35b5d1916 upstream.
+
+As per 2dede2d8e925 ("ARM EABI: stack pointer must be 64-bit aligned
+after a CPU exception") the stack should be aligned to a 64-bit boundary
+on EABI systems. Ensure that the eBPF JIT appropraitely aligns the
+stack.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -179,8 +179,13 @@ static void jit_fill_hole(void *area, un
+ *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
+ }
+
+-/* Stack must be multiples of 16 Bytes */
+-#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
++#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
++/* EABI requires the stack to be aligned to 64-bit boundaries */
++#define STACK_ALIGNMENT 8
++#else
++/* Stack must be aligned to 32-bit boundaries */
++#define STACK_ALIGNMENT 4
++#endif
+
+ /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+@@ -194,7 +199,7 @@ static void jit_fill_hole(void *area, un
+ + SCRATCH_SIZE + \
+ + 4 /* extra for skb_copy_bits buffer */)
+
+-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
++#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+
+ /* Get the offset of eBPF REGISTERs stored on scratch space. */
+ #define STACK_VAR(off) (STACK_SIZE-off-4)
--- /dev/null
+From f4483f2cc1fdc03488c8a1452e545545ae5bda93 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 11:39:54 +0000
+Subject: ARM: net: bpf: fix tail call jumps
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit f4483f2cc1fdc03488c8a1452e545545ae5bda93 upstream.
+
+When a tail call fails, it is documented that the tail call should
+continue execution at the following instruction. An example tail call
+sequence is:
+
+ 12: (85) call bpf_tail_call#12
+ 13: (b7) r0 = 0
+ 14: (95) exit
+
+The ARM assembler for the tail call in this case ends up branching to
+instruction 14 instead of instruction 13, resulting in the BPF filter
+returning a non-zero value:
+
+ 178: ldr r8, [sp, #588] ; insn 12
+ 17c: ldr r6, [r8, r6]
+ 180: ldr r8, [sp, #580]
+ 184: cmp r8, r6
+ 188: bcs 0x1e8
+ 18c: ldr r6, [sp, #524]
+ 190: ldr r7, [sp, #528]
+ 194: cmp r7, #0
+ 198: cmpeq r6, #32
+ 19c: bhi 0x1e8
+ 1a0: adds r6, r6, #1
+ 1a4: adc r7, r7, #0
+ 1a8: str r6, [sp, #524]
+ 1ac: str r7, [sp, #528]
+ 1b0: mov r6, #104
+ 1b4: ldr r8, [sp, #588]
+ 1b8: add r6, r8, r6
+ 1bc: ldr r8, [sp, #580]
+ 1c0: lsl r7, r8, #2
+ 1c4: ldr r6, [r6, r7]
+ 1c8: cmp r6, #0
+ 1cc: beq 0x1e8
+ 1d0: mov r8, #32
+ 1d4: ldr r6, [r6, r8]
+ 1d8: add r6, r6, #44
+ 1dc: bx r6
+ 1e0: mov r0, #0 ; insn 13
+ 1e4: mov r1, #0
+ 1e8: add sp, sp, #596 ; insn 14
+ 1ec: pop {r4, r5, r6, r7, r8, sl, pc}
+
+For other sequences, the tail call could end up branching midway through
+the following BPF instructions, or maybe off the end of the function,
+leading to unknown behaviours.
+
+Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -949,7 +949,7 @@ static int emit_bpf_tail_call(struct jit
+ const u8 *tcc = bpf2a32[TCALL_CNT];
+ const int idx0 = ctx->idx;
+ #define cur_offset (ctx->idx - idx0)
+-#define jmp_offset (out_offset - (cur_offset))
++#define jmp_offset (out_offset - (cur_offset) - 2)
+ u32 off, lo, hi;
+
+ /* if (index >= array->map.max_entries)
--- /dev/null
+From 70ec3a6c2c11e4b0e107a65de943a082f9aff351 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sat, 13 Jan 2018 21:26:14 +0000
+Subject: ARM: net: bpf: move stack documentation
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 70ec3a6c2c11e4b0e107a65de943a082f9aff351 upstream.
+
+Move the stack documentation towards the top of the file, where it's
+relevant for things like the register layout.
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/net/bpf_jit_32.c | 42 +++++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 21 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -27,6 +27,27 @@
+
+ int bpf_jit_enable __read_mostly;
+
++/*
++ * eBPF prog stack layout
++ *
++ * high
++ * original ARM_SP => +-----+ eBPF prologue
++ * |FP/LR|
++ * current ARM_FP => +-----+
++ * | ... | callee saved registers
++ * eBPF fp register => +-----+ <= (BPF_FP)
++ * | ... | eBPF JIT scratch space
++ * | | eBPF prog stack
++ * +-----+
++ * |RSVD | JIT scratchpad
++ * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE)
++ * | |
++ * | ... | Function call stack
++ * | |
++ * +-----+
++ * low
++ */
++
+ #define STACK_OFFSET(k) (k)
+ #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
+ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
+@@ -1091,27 +1112,6 @@ static void build_prologue(struct jit_ct
+
+ u16 reg_set = 0;
+
+- /*
+- * eBPF prog stack layout
+- *
+- * high
+- * original ARM_SP => +-----+ eBPF prologue
+- * |FP/LR|
+- * current ARM_FP => +-----+
+- * | ... | callee saved registers
+- * eBPF fp register => +-----+ <= (BPF_FP)
+- * | ... | eBPF JIT scratch space
+- * | | eBPF prog stack
+- * +-----+
+- * |RSVD | JIT scratchpad
+- * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
+- * | |
+- * | ... | Function call stack
+- * | |
+- * +-----+
+- * low
+- */
+-
+ /* Save callee saved registers. */
+ reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
+ #ifdef CONFIG_FRAME_POINTER
btrfs-fix-stale-entries-in-readdir.patch
kvm-s390-add-proper-locking-for-cmma-migration-bitmap.patch
orangefs-fix-deadlock-do-not-write-i_size-in-read_iter.patch
+arm-net-bpf-avoid-bx-instruction-on-non-thumb-capable-cpus.patch
+arm-net-bpf-fix-tail-call-jumps.patch
+arm-net-bpf-fix-stack-alignment.patch
+arm-net-bpf-move-stack-documentation.patch
+arm-net-bpf-correct-stack-layout-documentation.patch
+arm-net-bpf-fix-register-saving.patch
+arm-net-bpf-fix-ldx-instructions.patch
+arm-net-bpf-clarify-tail_call-index.patch