]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jan 2018 12:20:02 +0000 (13:20 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Jan 2018 12:20:02 +0000 (13:20 +0100)
added patches:
bpf-arm64-fix-stack_depth-tracking-in-combination-with-tail-calls.patch
bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
bpf-fix-32-bit-divide-by-zero.patch
bpf-fix-divides-by-zero.patch
bpf-introduce-bpf_jit_always_on-config.patch
bpf-reject-stores-into-ctx-via-st-and-xadd.patch

queue-4.14/bpf-arm64-fix-stack_depth-tracking-in-combination-with-tail-calls.patch [new file with mode: 0644]
queue-4.14/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch [new file with mode: 0644]
queue-4.14/bpf-fix-32-bit-divide-by-zero.patch [new file with mode: 0644]
queue-4.14/bpf-fix-divides-by-zero.patch [new file with mode: 0644]
queue-4.14/bpf-introduce-bpf_jit_always_on-config.patch [new file with mode: 0644]
queue-4.14/bpf-reject-stores-into-ctx-via-st-and-xadd.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/bpf-arm64-fix-stack_depth-tracking-in-combination-with-tail-calls.patch b/queue-4.14/bpf-arm64-fix-stack_depth-tracking-in-combination-with-tail-calls.patch
new file mode 100644 (file)
index 0000000..2014a5b
--- /dev/null
@@ -0,0 +1,83 @@
+From foo@baz Mon Jan 29 13:14:09 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 29 Jan 2018 00:36:47 +0100
+Subject: bpf, arm64: fix stack_depth tracking in combination with tail calls
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, stable@vger.kernel.org, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20180128233647.21154-7-daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ upstream commit a2284d912bfc865cdca4c00488e08a3550f9a405 ]
+
+Using dynamic stack_depth tracking in arm64 JIT is currently broken in
+combination with tail calls. In prologue, we cache ctx->stack_size and
+adjust SP reg for setting up function call stack, and tearing it down
+again in epilogue. Problem is that when doing a tail call, the cached
+ctx->stack_size might not be the same.
+
+One way to fix the problem with minimal overhead is to re-adjust SP in
+emit_bpf_tail_call() and properly adjust it to the current program's
+ctx->stack_size. Tested on Cavium ThunderX ARMv8.
+
+Fixes: f1c9eed7f437 ("bpf, arm64: take advantage of stack_depth tracking")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/net/bpf_jit_comp.c |   20 +++++++++++---------
+ 1 file changed, 11 insertions(+), 9 deletions(-)
+
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -148,7 +148,8 @@ static inline int epilogue_offset(const
+ /* Stack must be multiples of 16B */
+ #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
+-#define PROLOGUE_OFFSET 8
++/* Tail call offset to jump into */
++#define PROLOGUE_OFFSET 7
+ static int build_prologue(struct jit_ctx *ctx)
+ {
+@@ -200,19 +201,19 @@ static int build_prologue(struct jit_ctx
+       /* Initialize tail_call_cnt */
+       emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+-      /* 4 byte extra for skb_copy_bits buffer */
+-      ctx->stack_size = prog->aux->stack_depth + 4;
+-      ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+-
+-      /* Set up function call stack */
+-      emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+-
+       cur_offset = ctx->idx - idx0;
+       if (cur_offset != PROLOGUE_OFFSET) {
+               pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
+                           cur_offset, PROLOGUE_OFFSET);
+               return -1;
+       }
++
++      /* 4 byte extra for skb_copy_bits buffer */
++      ctx->stack_size = prog->aux->stack_depth + 4;
++      ctx->stack_size = STACK_ALIGN(ctx->stack_size);
++
++      /* Set up function call stack */
++      emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+       return 0;
+ }
+@@ -260,11 +261,12 @@ static int emit_bpf_tail_call(struct jit
+       emit(A64_LDR64(prg, tmp, prg), ctx);
+       emit(A64_CBZ(1, prg, jmp_offset), ctx);
+-      /* goto *(prog->bpf_func + prologue_size); */
++      /* goto *(prog->bpf_func + prologue_offset); */
+       off = offsetof(struct bpf_prog, bpf_func);
+       emit_a64_mov_i64(tmp, off, ctx);
+       emit(A64_LDR64(tmp, prg, tmp), ctx);
+       emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
++      emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+       emit(A64_BR(tmp), ctx);
+       /* out: */
diff --git a/queue-4.14/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch b/queue-4.14/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
new file mode 100644 (file)
index 0000000..7842cb3
--- /dev/null
@@ -0,0 +1,132 @@
+From foo@baz Mon Jan 29 13:14:09 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 29 Jan 2018 00:36:43 +0100
+Subject: bpf: avoid false sharing of map refcount with max_entries
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, stable@vger.kernel.org, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20180128233647.21154-3-daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ]
+
+In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds
+speculation") also change the layout of struct bpf_map such that
+false sharing of fast-path members like max_entries is avoided
+when the maps reference counter is altered. Therefore enforce
+them to be placed into separate cachelines.
+
+pahole dump after change:
+
+  struct bpf_map {
+        const struct bpf_map_ops  * ops;                 /*     0     8 */
+        struct bpf_map *           inner_map_meta;       /*     8     8 */
+        void *                     security;             /*    16     8 */
+        enum bpf_map_type          map_type;             /*    24     4 */
+        u32                        key_size;             /*    28     4 */
+        u32                        value_size;           /*    32     4 */
+        u32                        max_entries;          /*    36     4 */
+        u32                        map_flags;            /*    40     4 */
+        u32                        pages;                /*    44     4 */
+        u32                        id;                   /*    48     4 */
+        int                        numa_node;            /*    52     4 */
+        bool                       unpriv_array;         /*    56     1 */
+
+        /* XXX 7 bytes hole, try to pack */
+
+        /* --- cacheline 1 boundary (64 bytes) --- */
+        struct user_struct *       user;                 /*    64     8 */
+        atomic_t                   refcnt;               /*    72     4 */
+        atomic_t                   usercnt;              /*    76     4 */
+        struct work_struct         work;                 /*    80    32 */
+        char                       name[16];             /*   112    16 */
+        /* --- cacheline 2 boundary (128 bytes) --- */
+
+        /* size: 128, cachelines: 2, members: 17 */
+        /* sum members: 121, holes: 1, sum holes: 7 */
+  };
+
+Now all entries in the first cacheline are read only throughout
+the life time of the map, set up once during map creation. Overall
+struct size and number of cachelines doesn't change from the
+reordering. struct bpf_map is usually first member and embedded
+in map structs in specific map implementations, so also avoid those
+members to sit at the end where it could potentially share the
+cacheline with first map values e.g. in the array since remote
+CPUs could trigger map updates just as well for those (easily
+dirtying members like max_entries intentionally as well) while
+having subsequent values in cache.
+
+Quoting from Google's Project Zero blog [1]:
+
+  Additionally, at least on the Intel machine on which this was
+  tested, bouncing modified cache lines between cores is slow,
+  apparently because the MESI protocol is used for cache coherence
+  [8]. Changing the reference counter of an eBPF array on one
+  physical CPU core causes the cache line containing the reference
+  counter to be bounced over to that CPU core, making reads of the
+  reference counter on all other CPU cores slow until the changed
+  reference counter has been written back to memory. Because the
+  length and the reference counter of an eBPF array are stored in
+  the same cache line, this also means that changing the reference
+  counter on one physical CPU core causes reads of the eBPF array's
+  length to be slow on other physical CPU cores (intentional false
+  sharing).
+
+While this doesn't 'control' the out-of-bounds speculation through
+masking the index as in commit b2157399cc98, triggering a manipulation
+of the map's reference counter is really trivial, so lets not allow
+to easily affect max_entries from it.
+
+Splitting to separate cachelines also generally makes sense from
+a performance perspective anyway in that fast-path won't have a
+cache miss if the map gets pinned, reused in other progs, etc out
+of control path, thus also avoids unintentional false sharing.
+
+  [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h |   21 ++++++++++++++++-----
+ 1 file changed, 16 insertions(+), 5 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -42,7 +42,14 @@ struct bpf_map_ops {
+ };
+ struct bpf_map {
+-      atomic_t refcnt;
++      /* 1st cacheline with read-mostly members of which some
++       * are also accessed in fast-path (e.g. ops, max_entries).
++       */
++      const struct bpf_map_ops *ops ____cacheline_aligned;
++      struct bpf_map *inner_map_meta;
++#ifdef CONFIG_SECURITY
++      void *security;
++#endif
+       enum bpf_map_type map_type;
+       u32 key_size;
+       u32 value_size;
+@@ -52,11 +59,15 @@ struct bpf_map {
+       u32 id;
+       int numa_node;
+       bool unpriv_array;
+-      struct user_struct *user;
+-      const struct bpf_map_ops *ops;
+-      struct work_struct work;
++      /* 7 bytes hole */
++
++      /* 2nd cacheline with misc members to avoid false sharing
++       * particularly with refcounting.
++       */
++      struct user_struct *user ____cacheline_aligned;
++      atomic_t refcnt;
+       atomic_t usercnt;
+-      struct bpf_map *inner_map_meta;
++      struct work_struct work;
+ };
+ /* function argument constraints */
diff --git a/queue-4.14/bpf-fix-32-bit-divide-by-zero.patch b/queue-4.14/bpf-fix-32-bit-divide-by-zero.patch
new file mode 100644 (file)
index 0000000..ea6cadd
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Mon Jan 29 13:14:09 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 29 Jan 2018 00:36:45 +0100
+Subject: bpf: fix 32-bit divide by zero
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, stable@vger.kernel.org, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20180128233647.21154-5-daniel@iogearbox.net>
+
+From: Alexei Starovoitov <ast@kernel.org>
+
+[ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ]
+
+due to some JITs doing if (src_reg == 0) check in 64-bit mode
+for div/mod operations mask upper 32-bits of src register
+before doing the check
+
+Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT")
+Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.")
+Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   18 ++++++++++++++++++
+ net/core/filter.c     |    4 ++++
+ 2 files changed, 22 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4304,6 +4304,24 @@ static int fixup_bpf_calls(struct bpf_ve
+       int i, cnt, delta = 0;
+       for (i = 0; i < insn_cnt; i++, insn++) {
++              if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
++                  insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
++                      /* due to JIT bugs clear upper 32-bits of src register
++                       * before div/mod operation
++                       */
++                      insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
++                      insn_buf[1] = *insn;
++                      cnt = 2;
++                      new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
++                      if (!new_prog)
++                              return -ENOMEM;
++
++                      delta    += cnt - 1;
++                      env->prog = prog = new_prog;
++                      insn      = new_prog->insnsi + i + delta;
++                      continue;
++              }
++
+               if (insn->code != (BPF_JMP | BPF_CALL))
+                       continue;
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -457,6 +457,10 @@ do_pass:
+                           convert_bpf_extensions(fp, &insn))
+                               break;
++                      if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
++                          fp->code == (BPF_ALU | BPF_MOD | BPF_X))
++                              *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
++
+                       *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
+                       break;
diff --git a/queue-4.14/bpf-fix-divides-by-zero.patch b/queue-4.14/bpf-fix-divides-by-zero.patch
new file mode 100644 (file)
index 0000000..ae2ca24
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Mon Jan 29 13:14:09 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 29 Jan 2018 00:36:44 +0100
+Subject: bpf: fix divides by zero
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, stable@vger.kernel.org, Eric Dumazet <edumazet@google.com>
+Message-ID: <20180128233647.21154-4-daniel@iogearbox.net>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ]
+
+Divides by zero are not nice, lets avoid them if possible.
+
+Also do_div() seems not needed when dealing with 32bit operands,
+but this seems a minor detail.
+
+Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/core.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -949,7 +949,7 @@ select_insn:
+               DST = tmp;
+               CONT;
+       ALU_MOD_X:
+-              if (unlikely(SRC == 0))
++              if (unlikely((u32)SRC == 0))
+                       return 0;
+               tmp = (u32) DST;
+               DST = do_div(tmp, (u32) SRC);
+@@ -968,7 +968,7 @@ select_insn:
+               DST = div64_u64(DST, SRC);
+               CONT;
+       ALU_DIV_X:
+-              if (unlikely(SRC == 0))
++              if (unlikely((u32)SRC == 0))
+                       return 0;
+               tmp = (u32) DST;
+               do_div(tmp, (u32) SRC);
diff --git a/queue-4.14/bpf-introduce-bpf_jit_always_on-config.patch b/queue-4.14/bpf-introduce-bpf_jit_always_on-config.patch
new file mode 100644 (file)
index 0000000..b02b468
--- /dev/null
@@ -0,0 +1,215 @@
+From foo@baz Mon Jan 29 13:14:09 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 29 Jan 2018 00:36:42 +0100
+Subject: bpf: introduce BPF_JIT_ALWAYS_ON config
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, stable@vger.kernel.org, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20180128233647.21154-2-daniel@iogearbox.net>
+
+From: Alexei Starovoitov <ast@kernel.org>
+
+[ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ]
+
+The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715.
+
+A quote from goolge project zero blog:
+"At this point, it would normally be necessary to locate gadgets in
+the host kernel code that can be used to actually leak data by reading
+from an attacker-controlled location, shifting and masking the result
+appropriately and then using the result of that as offset to an
+attacker-controlled address for a load. But piecing gadgets together
+and figuring out which ones work in a speculation context seems annoying.
+So instead, we decided to use the eBPF interpreter, which is built into
+the host kernel - while there is no legitimate way to invoke it from inside
+a VM, the presence of the code in the host kernel's text section is sufficient
+to make it usable for the attack, just like with ordinary ROP gadgets."
+
+To make attacker job harder introduce BPF_JIT_ALWAYS_ON config
+option that removes interpreter from the kernel in favor of JIT-only mode.
+So far eBPF JIT is supported by:
+x64, arm64, arm32, sparc64, s390, powerpc64, mips64
+
+The start of JITed program is randomized and code page is marked as read-only.
+In addition "constant blinding" can be turned on with net.core.bpf_jit_harden
+
+v2->v3:
+- move __bpf_prog_ret0 under ifdef (Daniel)
+
+v1->v2:
+- fix init order, test_bpf and cBPF (Daniel's feedback)
+- fix offloaded bpf (Jakub's feedback)
+- add 'return 0' dummy in case something can invoke prog->bpf_func
+- retarget bpf tree. For bpf-next the patch would need one extra hunk.
+  It will be sent when the trees are merged back to net-next
+
+Considered doing:
+  int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT;
+but it seems better to land the patch as-is and in bpf-next remove
+bpf_jit_enable global variable from all JITs, consolidate in one place
+and remove this jit_init() function.
+
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ init/Kconfig               |    7 +++++++
+ kernel/bpf/core.c          |   19 +++++++++++++++++++
+ lib/test_bpf.c             |   11 +++++++----
+ net/core/filter.c          |    6 ++----
+ net/core/sysctl_net_core.c |    6 ++++++
+ net/socket.c               |    9 +++++++++
+ 6 files changed, 50 insertions(+), 8 deletions(-)
+
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1342,6 +1342,13 @@ config BPF_SYSCALL
+         Enable the bpf() system call that allows to manipulate eBPF
+         programs and maps via file descriptors.
++config BPF_JIT_ALWAYS_ON
++      bool "Permanently enable BPF JIT and remove BPF interpreter"
++      depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
++      help
++        Enables BPF JIT and removes BPF interpreter to avoid
++        speculative execution of BPF instructions by the interpreter
++
+ config SHMEM
+       bool "Use full shmem filesystem" if EXPERT
+       default y
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -760,6 +760,7 @@ noinline u64 __bpf_call_base(u64 r1, u64
+ }
+ EXPORT_SYMBOL_GPL(__bpf_call_base);
++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ /**
+  *    __bpf_prog_run - run eBPF program on a given context
+  *    @ctx: is the data we are operating on
+@@ -1310,6 +1311,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320
+ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
+ };
++#else
++static unsigned int __bpf_prog_ret0(const void *ctx,
++                                  const struct bpf_insn *insn)
++{
++      return 0;
++}
++#endif
++
+ bool bpf_prog_array_compatible(struct bpf_array *array,
+                              const struct bpf_prog *fp)
+ {
+@@ -1357,9 +1366,13 @@ static int bpf_check_tail_call(const str
+  */
+ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
+ {
++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+       u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+       fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
++#else
++      fp->bpf_func = __bpf_prog_ret0;
++#endif
+       /* eBPF JITs can rewrite the program in case constant
+        * blinding is active. However, in case of error during
+@@ -1368,6 +1381,12 @@ struct bpf_prog *bpf_prog_select_runtime
+        * be JITed, but falls back to the interpreter.
+        */
+       fp = bpf_int_jit_compile(fp);
++#ifdef CONFIG_BPF_JIT_ALWAYS_ON
++      if (!fp->jited) {
++              *err = -ENOTSUPP;
++              return fp;
++      }
++#endif
+       bpf_prog_lock_ro(fp);
+       /* The tail call compatibility check can only be done at
+--- a/lib/test_bpf.c
++++ b/lib/test_bpf.c
+@@ -6207,9 +6207,8 @@ static struct bpf_prog *generate_filter(
+                               return NULL;
+                       }
+               }
+-              /* We don't expect to fail. */
+               if (*err) {
+-                      pr_cont("FAIL to attach err=%d len=%d\n",
++                      pr_cont("FAIL to prog_create err=%d len=%d\n",
+                               *err, fprog.len);
+                       return NULL;
+               }
+@@ -6233,6 +6232,10 @@ static struct bpf_prog *generate_filter(
+                * checks.
+                */
+               fp = bpf_prog_select_runtime(fp, err);
++              if (*err) {
++                      pr_cont("FAIL to select_runtime err=%d\n", *err);
++                      return NULL;
++              }
+               break;
+       }
+@@ -6418,8 +6421,8 @@ static __init int test_bpf(void)
+                               pass_cnt++;
+                               continue;
+                       }
+-
+-                      return err;
++                      err_cnt++;
++                      continue;
+               }
+               pr_cont("jited:%u ", fp->jited);
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1053,11 +1053,9 @@ static struct bpf_prog *bpf_migrate_filt
+                */
+               goto out_err_free;
+-      /* We are guaranteed to never error here with cBPF to eBPF
+-       * transitions, since there's no issue with type compatibility
+-       * checks on program arrays.
+-       */
+       fp = bpf_prog_select_runtime(fp, &err);
++      if (err)
++              goto out_err_free;
+       kfree(old_prog);
+       return fp;
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[]
+               .data           = &bpf_jit_enable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+               .proc_handler   = proc_dointvec
++#else
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &one,
++              .extra2         = &one,
++#endif
+       },
+ # ifdef CONFIG_HAVE_EBPF_JIT
+       {
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2642,6 +2642,15 @@ out_fs:
+ core_initcall(sock_init);     /* early initcall */
++static int __init jit_init(void)
++{
++#ifdef CONFIG_BPF_JIT_ALWAYS_ON
++      bpf_jit_enable = 1;
++#endif
++      return 0;
++}
++pure_initcall(jit_init);
++
+ #ifdef CONFIG_PROC_FS
+ void socket_seq_show(struct seq_file *seq)
+ {
diff --git a/queue-4.14/bpf-reject-stores-into-ctx-via-st-and-xadd.patch b/queue-4.14/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
new file mode 100644 (file)
index 0000000..354dbb4
--- /dev/null
@@ -0,0 +1,125 @@
+From foo@baz Mon Jan 29 13:14:09 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 29 Jan 2018 00:36:46 +0100
+Subject: bpf: reject stores into ctx via st and xadd
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, stable@vger.kernel.org, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20180128233647.21154-6-daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ]
+
+Alexei found that verifier does not reject stores into context
+via BPF_ST instead of BPF_STX. And while looking at it, we
+also should not allow XADD variant of BPF_STX.
+
+The context rewriter is only assuming either BPF_LDX_MEM- or
+BPF_STX_MEM-type operations, thus reject anything other than
+that so that assumptions in the rewriter properly hold. Add
+test cases as well for BPF selftests.
+
+Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields")
+Reported-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c                       |   19 ++++++++++++++++++
+ tools/testing/selftests/bpf/test_verifier.c |   29 ++++++++++++++++++++++++++--
+ 2 files changed, 46 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -986,6 +986,13 @@ static bool is_pointer_value(struct bpf_
+       return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
+ }
++static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
++{
++      const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
++
++      return reg->type == PTR_TO_CTX;
++}
++
+ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+                                  int off, int size, bool strict)
+ {
+@@ -1258,6 +1265,12 @@ static int check_xadd(struct bpf_verifie
+               return -EACCES;
+       }
++      if (is_ctx_reg(env, insn->dst_reg)) {
++              verbose("BPF_XADD stores into R%d context is not allowed\n",
++                      insn->dst_reg);
++              return -EACCES;
++      }
++
+       /* check whether atomic_add can read the memory */
+       err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+                              BPF_SIZE(insn->code), BPF_READ, -1);
+@@ -3859,6 +3872,12 @@ static int do_check(struct bpf_verifier_
+                       if (err)
+                               return err;
++                      if (is_ctx_reg(env, insn->dst_reg)) {
++                              verbose("BPF_ST stores into R%d context is not allowed\n",
++                                      insn->dst_reg);
++                              return -EACCES;
++                      }
++
+                       /* check that memory (dst_reg + off) is writeable */
+                       err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+                                              BPF_SIZE(insn->code), BPF_WRITE,
+--- a/tools/testing/selftests/bpf/test_verifier.c
++++ b/tools/testing/selftests/bpf/test_verifier.c
+@@ -2596,6 +2596,29 @@ static struct bpf_test tests[] = {
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
++              "context stores via ST",
++              .insns = {
++                      BPF_MOV64_IMM(BPF_REG_0, 0),
++                      BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
++                      BPF_EXIT_INSN(),
++              },
++              .errstr = "BPF_ST stores into R1 context is not allowed",
++              .result = REJECT,
++              .prog_type = BPF_PROG_TYPE_SCHED_CLS,
++      },
++      {
++              "context stores via XADD",
++              .insns = {
++                      BPF_MOV64_IMM(BPF_REG_0, 0),
++                      BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1,
++                                   BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
++                      BPF_EXIT_INSN(),
++              },
++              .errstr = "BPF_XADD stores into R1 context is not allowed",
++              .result = REJECT,
++              .prog_type = BPF_PROG_TYPE_SCHED_CLS,
++      },
++      {
+               "direct packet access: test1",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+@@ -4317,7 +4340,8 @@ static struct bpf_test tests[] = {
+               .fixup_map1 = { 2 },
+               .errstr_unpriv = "R2 leaks addr into mem",
+               .result_unpriv = REJECT,
+-              .result = ACCEPT,
++              .result = REJECT,
++              .errstr = "BPF_XADD stores into R1 context is not allowed",
+       },
+       {
+               "leak pointer into ctx 2",
+@@ -4331,7 +4355,8 @@ static struct bpf_test tests[] = {
+               },
+               .errstr_unpriv = "R10 leaks addr into mem",
+               .result_unpriv = REJECT,
+-              .result = ACCEPT,
++              .result = REJECT,
++              .errstr = "BPF_XADD stores into R1 context is not allowed",
+       },
+       {
+               "leak pointer into ctx 3",
index fce9bfcd0f524e6ba9e7d611f46b318a8afd545d..7eda6773d41030b86c894994ba65a0a2b6de414e 100644 (file)
@@ -62,3 +62,9 @@ x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch
 x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch
 x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch
 hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch
+bpf-introduce-bpf_jit_always_on-config.patch
+bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
+bpf-fix-divides-by-zero.patch
+bpf-fix-32-bit-divide-by-zero.patch
+bpf-reject-stores-into-ctx-via-st-and-xadd.patch
+bpf-arm64-fix-stack_depth-tracking-in-combination-with-tail-calls.patch