4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 1 Feb 2018 08:07:15 +0000 (09:07 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 1 Feb 2018 08:07:15 +0000 (09:07 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 1 Feb 2018 08:07:15 +0000 (09:07 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 1 Feb 2018 08:07:15 +0000 (09:07 +0100)
diff --git a/queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch b/queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch

new file mode 100644 (file)

index 0000000..df0365d
--- /dev/null
+++ b/queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
@@ -0,0 +1,49 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:42 +0100
+Subject: bpf: arsh is not supported in 32 bit alu thus reject it
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
+Message-ID: <60932351924d42bf28628b0a01a693602cc0d9b9.1517279268.git.daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ upstream commit 7891a87efc7116590eaba57acc3c422487802c6f ]
+
+The following snippet was throwing an 'unknown opcode cc' warning
+in BPF interpreter:
+
+  0: (18) r0 = 0x0
+  2: (7b) *(u64 *)(r10 -16) = r0
+  3: (cc) (u32) r0 s>>= (u32) r0
+  4: (95) exit
+
+Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X}
+generation, not all of them do and interpreter does neither. We can
+leave existing ones and implement it later in bpf-next for the
+remaining ones, but reject this properly in verifier for the time
+being.
+
+Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
+Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1165,6 +1165,11 @@ static int check_alu_op(struct verifier_
+                       return -EINVAL;
+               }
+ 
++              if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
++                      verbose("BPF_ARSH not supported for 32 bit ALU\n");
++                      return -EINVAL;
++              }
++
+               if ((opcode == BPF_LSH || opcode == BPF_RSH ||
+                    opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
+                       int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
diff --git a/queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch b/queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch

new file mode 100644 (file)

index 0000000..93ab57f
--- /dev/null
+++ b/queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
@@ -0,0 +1,126 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:43 +0100
+Subject: bpf: avoid false sharing of map refcount with max_entries
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
+Message-ID: <6c5f91e38c952be4831f6764a92cedb7a48be095.1517279268.git.daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ upstream commit be95a845cc4402272994ce290e3ad928aff06cb9 ]
+
+In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds
+speculation") also change the layout of struct bpf_map such that
+false sharing of fast-path members like max_entries is avoided
+when the maps reference counter is altered. Therefore enforce
+them to be placed into separate cachelines.
+
+pahole dump after change:
+
+  struct bpf_map {
+        const struct bpf_map_ops  * ops;                 /*     0     8 */
+        struct bpf_map *           inner_map_meta;       /*     8     8 */
+        void *                     security;             /*    16     8 */
+        enum bpf_map_type          map_type;             /*    24     4 */
+        u32                        key_size;             /*    28     4 */
+        u32                        value_size;           /*    32     4 */
+        u32                        max_entries;          /*    36     4 */
+        u32                        map_flags;            /*    40     4 */
+        u32                        pages;                /*    44     4 */
+        u32                        id;                   /*    48     4 */
+        int                        numa_node;            /*    52     4 */
+        bool                       unpriv_array;         /*    56     1 */
+
+        /* XXX 7 bytes hole, try to pack */
+
+        /* --- cacheline 1 boundary (64 bytes) --- */
+        struct user_struct *       user;                 /*    64     8 */
+        atomic_t                   refcnt;               /*    72     4 */
+        atomic_t                   usercnt;              /*    76     4 */
+        struct work_struct         work;                 /*    80    32 */
+        char                       name[16];             /*   112    16 */
+        /* --- cacheline 2 boundary (128 bytes) --- */
+
+        /* size: 128, cachelines: 2, members: 17 */
+        /* sum members: 121, holes: 1, sum holes: 7 */
+  };
+
+Now all entries in the first cacheline are read only throughout
+the life time of the map, set up once during map creation. Overall
+struct size and number of cachelines doesn't change from the
+reordering. struct bpf_map is usually first member and embedded
+in map structs in specific map implementations, so also avoid those
+members to sit at the end where it could potentially share the
+cacheline with first map values e.g. in the array since remote
+CPUs could trigger map updates just as well for those (easily
+dirtying members like max_entries intentionally as well) while
+having subsequent values in cache.
+
+Quoting from Google's Project Zero blog [1]:
+
+  Additionally, at least on the Intel machine on which this was
+  tested, bouncing modified cache lines between cores is slow,
+  apparently because the MESI protocol is used for cache coherence
+  [8]. Changing the reference counter of an eBPF array on one
+  physical CPU core causes the cache line containing the reference
+  counter to be bounced over to that CPU core, making reads of the
+  reference counter on all other CPU cores slow until the changed
+  reference counter has been written back to memory. Because the
+  length and the reference counter of an eBPF array are stored in
+  the same cache line, this also means that changing the reference
+  counter on one physical CPU core causes reads of the eBPF array's
+  length to be slow on other physical CPU cores (intentional false
+  sharing).
+
+While this doesn't 'control' the out-of-bounds speculation through
+masking the index as in commit b2157399cc98, triggering a manipulation
+of the map's reference counter is really trivial, so lets not allow
+to easily affect max_entries from it.
+
+Splitting to separate cachelines also generally makes sense from
+a performance perspective anyway in that fast-path won't have a
+cache miss if the map gets pinned, reused in other progs, etc out
+of control path, thus also avoids unintentional false sharing.
+
+  [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/bpf.h |   16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -31,17 +31,25 @@ struct bpf_map_ops {
+ };
+ 
+ struct bpf_map {
+-      atomic_t refcnt;
++      /* 1st cacheline with read-mostly members of which some
++       * are also accessed in fast-path (e.g. ops, max_entries).
++       */
++      const struct bpf_map_ops *ops ____cacheline_aligned;
+       enum bpf_map_type map_type;
+       u32 key_size;
+       u32 value_size;
+       u32 max_entries;
+       u32 pages;
+       bool unpriv_array;
+-      struct user_struct *user;
+-      const struct bpf_map_ops *ops;
+-      struct work_struct work;
++      /* 7 bytes hole */
++
++      /* 2nd cacheline with misc members to avoid false sharing
++       * particularly with refcounting.
++       */
++      struct user_struct *user ____cacheline_aligned;
++      atomic_t refcnt;
+       atomic_t usercnt;
++      struct work_struct work;
+ };
+ 
+ struct bpf_map_type_list {
diff --git a/queue-4.4/bpf-fix-32-bit-divide-by-zero.patch b/queue-4.4/bpf-fix-32-bit-divide-by-zero.patch

new file mode 100644 (file)

index 0000000..9206433
--- /dev/null
+++ b/queue-4.4/bpf-fix-32-bit-divide-by-zero.patch
@@ -0,0 +1,67 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:45 +0100
+Subject: bpf: fix 32-bit divide by zero
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
+Message-ID: <7e8a78250e8cf1f486b4cdb005e3ff313b992816.1517279268.git.daniel@iogearbox.net>
+
+From: Alexei Starovoitov <ast@kernel.org>
+
+[ upstream commit 68fda450a7df51cff9e5a4d4a4d9d0d5f2589153 ]
+
+due to some JITs doing if (src_reg == 0) check in 64-bit mode
+for div/mod operations mask upper 32-bits of src register
+before doing the check
+
+Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT")
+Fixes: 7a12b5031c6b ("sparc64: Add eBPF JIT.")
+Reported-by: syzbot+48340bb518e88849e2e3@syzkaller.appspotmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   18 ++++++++++++++++++
+ net/core/filter.c     |    4 ++++
+ 2 files changed, 22 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2248,6 +2248,24 @@ static int fixup_bpf_calls(struct verifi
+       int i, cnt, delta = 0;
+ 
+       for (i = 0; i < insn_cnt; i++, insn++) {
++              if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
++                  insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
++                      /* due to JIT bugs clear upper 32-bits of src register
++                       * before div/mod operation
++                       */
++                      insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
++                      insn_buf[1] = *insn;
++                      cnt = 2;
++                      new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
++                      if (!new_prog)
++                              return -ENOMEM;
++
++                      delta    += cnt - 1;
++                      env->prog = prog = new_prog;
++                      insn      = new_prog->insnsi + i + delta;
++                      continue;
++              }
++
+               if (insn->code != (BPF_JMP | BPF_CALL))
+                       continue;
+ 
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -430,6 +430,10 @@ do_pass:
+                           convert_bpf_extensions(fp, &insn))
+                               break;
+ 
++                      if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
++                          fp->code == (BPF_ALU | BPF_MOD | BPF_X))
++                              *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
++
+                       *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
+                       break;
+ 
diff --git a/queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch b/queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch

new file mode 100644 (file)

index 0000000..0aa3cfe
--- /dev/null
+++ b/queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch
@@ -0,0 +1,60 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:40 +0100
+Subject: bpf: fix bpf_tail_call() x64 JIT
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org, Alexei Starovoitov <ast@fb.com>, "David S . Miller" <davem@davemloft.net>
+Message-ID: <1d696e8c8bf884fb67aca8fe4ab8ba132b8a2ed1.1517279268.git.daniel@iogearbox.net>
+
+From: Alexei Starovoitov <ast@fb.com>
+
+[ upstream commit 90caccdd8cc0215705f18b92771b449b01e2474a ]
+
+- bpf prog_array just like all other types of bpf array accepts 32-bit index.
+  Clarify that in the comment.
+- fix x64 JIT of bpf_tail_call which was incorrectly loading 8 instead of 4 bytes
+- tighten corresponding check in the interpreter to stay consistent
+
+The JIT bug can be triggered after introduction of BPF_F_NUMA_NODE flag
+in commit 96eabe7a40aa in 4.14. Before that the map_flags would stay zero and
+though JIT code is wrong it will check bounds correctly.
+Hence two fixes tags. All other JITs don't have this problem.
+
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Fixes: 96eabe7a40aa ("bpf: Allow selecting numa node during map creation")
+Fixes: b52f00e6a715 ("x86: bpf_jit: implement bpf_tail_call() helper")
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c |    4 ++--
+ kernel/bpf/core.c           |    2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -266,9 +266,9 @@ static void emit_bpf_tail_call(u8 **ppro
+       /* if (index >= array->map.max_entries)
+        *   goto out;
+        */
+-      EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
++      EMIT2(0x89, 0xD2);                        /* mov edx, edx */
++      EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], edx */
+             offsetof(struct bpf_array, map.max_entries));
+-      EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
+ #define OFFSET1 43 /* number of bytes to jump */
+       EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
+       label1 = cnt;
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -517,7 +517,7 @@ select_insn:
+               struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
+               struct bpf_array *array = container_of(map, struct bpf_array, map);
+               struct bpf_prog *prog;
+-              u64 index = BPF_R3;
++              u32 index = BPF_R3;
+ 
+               if (unlikely(index >= array->map.max_entries))
+                       goto out;
diff --git a/queue-4.4/bpf-fix-branch-pruning-logic.patch b/queue-4.4/bpf-fix-branch-pruning-logic.patch

new file mode 100644 (file)

index 0000000..11ec20e
--- /dev/null
+++ b/queue-4.4/bpf-fix-branch-pruning-logic.patch
@@ -0,0 +1,116 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:38 +0100
+Subject: bpf: fix branch pruning logic
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
+Message-ID: <c4bc7f798c0e99b4551d3423c294350fc8223714.1517279268.git.daniel@iogearbox.net>
+
+From: Alexei Starovoitov <ast@fb.com>
+
+[ Upstream commit c131187db2d3fa2f8bf32fdf4e9a4ef805168467 ]
+
+when the verifier detects that register contains a runtime constant
+and it's compared with another constant it will prune exploration
+of the branch that is guaranteed not to be taken at runtime.
+This is all correct, but malicious program may be constructed
+in such a way that it always has a constant comparison and
+the other branch is never taken under any conditions.
+In this case such path through the program will not be explored
+by the verifier. It won't be taken at run-time either, but since
+all instructions are JITed the malicious program may cause JITs
+to complain about using reserved fields, etc.
+To fix the issue we have to track the instructions explored by
+the verifier and sanitize instructions that are dead at run time
+with NOPs. We cannot reject such dead code, since llvm generates
+it for valid C code, since it doesn't do as much data flow
+analysis as the verifier does.
+
+Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -191,6 +191,7 @@ struct bpf_insn_aux_data {
+               enum bpf_reg_type ptr_type;     /* pointer type for load/store insns */
+               struct bpf_map *map_ptr;        /* pointer for call insn into lookup_elem */
+       };
++      bool seen; /* this insn was processed by the verifier */
+ };
+ 
+ #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+@@ -1793,6 +1794,7 @@ static int do_check(struct verifier_env
+                       print_bpf_insn(env, insn);
+               }
+ 
++              env->insn_aux_data[insn_idx].seen = true;
+               if (class == BPF_ALU || class == BPF_ALU64) {
+                       err = check_alu_op(env, insn);
+                       if (err)
+@@ -1988,6 +1990,7 @@ process_bpf_exit:
+                                       return err;
+ 
+                               insn_idx++;
++                              env->insn_aux_data[insn_idx].seen = true;
+                       } else {
+                               verbose("invalid BPF_LD mode\n");
+                               return -EINVAL;
+@@ -2125,6 +2128,7 @@ static int adjust_insn_aux_data(struct v
+                               u32 off, u32 cnt)
+ {
+       struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
++      int i;
+ 
+       if (cnt == 1)
+               return 0;
+@@ -2134,6 +2138,8 @@ static int adjust_insn_aux_data(struct v
+       memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
+       memcpy(new_data + off + cnt - 1, old_data + off,
+              sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
++      for (i = off; i < off + cnt - 1; i++)
++              new_data[i].seen = true;
+       env->insn_aux_data = new_data;
+       vfree(old_data);
+       return 0;
+@@ -2152,6 +2158,25 @@ static struct bpf_prog *bpf_patch_insn_d
+       return new_prog;
+ }
+ 
++/* The verifier does more data flow analysis than llvm and will not explore
++ * branches that are dead at run time. Malicious programs can have dead code
++ * too. Therefore replace all dead at-run-time code with nops.
++ */
++static void sanitize_dead_code(struct verifier_env *env)
++{
++      struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
++      struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
++      struct bpf_insn *insn = env->prog->insnsi;
++      const int insn_cnt = env->prog->len;
++      int i;
++
++      for (i = 0; i < insn_cnt; i++) {
++              if (aux_data[i].seen)
++                      continue;
++              memcpy(insn + i, &nop, sizeof(nop));
++      }
++}
++
+ /* convert load instructions that access fields of 'struct __sk_buff'
+  * into sequence of instructions that access fields of 'struct sk_buff'
+  */
+@@ -2371,6 +2396,9 @@ skip_full_check:
+       free_states(env);
+ 
+       if (ret == 0)
++              sanitize_dead_code(env);
++
++      if (ret == 0)
+               /* program is valid, convert *(u32*)(ctx + off) accesses */
+               ret = convert_ctx_accesses(env);
+ 
diff --git a/queue-4.4/bpf-fix-divides-by-zero.patch b/queue-4.4/bpf-fix-divides-by-zero.patch

new file mode 100644 (file)

index 0000000..642ef43
--- /dev/null
+++ b/queue-4.4/bpf-fix-divides-by-zero.patch
@@ -0,0 +1,46 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:44 +0100
+Subject: bpf: fix divides by zero
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org, Eric Dumazet <edumazet@google.com>
+Message-ID: <cdf90cab74cbd2ef542fc5f982b55f423a52a5b5.1517279268.git.daniel@iogearbox.net>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ upstream commit c366287ebd698ef5e3de300d90cd62ee9ee7373e ]
+
+Divides by zero are not nice, lets avoid them if possible.
+
+Also do_div() seems not needed when dealing with 32bit operands,
+but this seems a minor detail.
+
+Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/core.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -444,7 +444,7 @@ select_insn:
+               DST = tmp;
+               CONT;
+       ALU_MOD_X:
+-              if (unlikely(SRC == 0))
++              if (unlikely((u32)SRC == 0))
+                       return 0;
+               tmp = (u32) DST;
+               DST = do_div(tmp, (u32) SRC);
+@@ -463,7 +463,7 @@ select_insn:
+               DST = div64_u64(DST, SRC);
+               CONT;
+       ALU_DIV_X:
+-              if (unlikely(SRC == 0))
++              if (unlikely((u32)SRC == 0))
+                       return 0;
+               tmp = (u32) DST;
+               do_div(tmp, (u32) SRC);
diff --git a/queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch b/queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch

new file mode 100644 (file)

index 0000000..8045b6b
--- /dev/null
+++ b/queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch
@@ -0,0 +1,254 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:41 +0100
+Subject: bpf: introduce BPF_JIT_ALWAYS_ON config
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
+Message-ID: <8fa0284c9e3811cc7ae467dd3490da45ff76b46b.1517279268.git.daniel@iogearbox.net>
+
+From: Alexei Starovoitov <ast@kernel.org>
+
+[ upstream commit 290af86629b25ffd1ed6232c4e9107da031705cb ]
+
+The BPF interpreter has been used as part of the spectre 2 attack CVE-2017-5715.
+
+A quote from goolge project zero blog:
+"At this point, it would normally be necessary to locate gadgets in
+the host kernel code that can be used to actually leak data by reading
+from an attacker-controlled location, shifting and masking the result
+appropriately and then using the result of that as offset to an
+attacker-controlled address for a load. But piecing gadgets together
+and figuring out which ones work in a speculation context seems annoying.
+So instead, we decided to use the eBPF interpreter, which is built into
+the host kernel - while there is no legitimate way to invoke it from inside
+a VM, the presence of the code in the host kernel's text section is sufficient
+to make it usable for the attack, just like with ordinary ROP gadgets."
+
+To make attacker job harder introduce BPF_JIT_ALWAYS_ON config
+option that removes interpreter from the kernel in favor of JIT-only mode.
+So far eBPF JIT is supported by:
+x64, arm64, arm32, sparc64, s390, powerpc64, mips64
+
+The start of JITed program is randomized and code page is marked as read-only.
+In addition "constant blinding" can be turned on with net.core.bpf_jit_harden
+
+v2->v3:
+- move __bpf_prog_ret0 under ifdef (Daniel)
+
+v1->v2:
+- fix init order, test_bpf and cBPF (Daniel's feedback)
+- fix offloaded bpf (Jakub's feedback)
+- add 'return 0' dummy in case something can invoke prog->bpf_func
+- retarget bpf tree. For bpf-next the patch would need one extra hunk.
+  It will be sent when the trees are merged back to net-next
+
+Considered doing:
+  int bpf_jit_enable __read_mostly = BPF_EBPF_JIT_DEFAULT;
+but it seems better to land the patch as-is and in bpf-next remove
+bpf_jit_enable global variable from all JITs, consolidate in one place
+and remove this jit_init() function.
+
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/Kconfig         |    1 +
+ arch/s390/Kconfig          |    1 +
+ arch/x86/Kconfig           |    1 +
+ init/Kconfig               |    7 +++++++
+ kernel/bpf/core.c          |   24 +++++++++++++++++++++++-
+ lib/test_bpf.c             |   13 ++++++++-----
+ net/Kconfig                |    3 +++
+ net/core/filter.c          |    4 +++-
+ net/core/sysctl_net_core.c |    6 ++++++
+ net/socket.c               |    9 +++++++++
+ 10 files changed, 62 insertions(+), 7 deletions(-)
+
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -54,6 +54,7 @@ config ARM64
+       select HAVE_ARCH_SECCOMP_FILTER
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_BPF_JIT
++      select HAVE_EBPF_JIT
+       select HAVE_C_RECORDMCOUNT
+       select HAVE_CC_STACKPROTECTOR
+       select HAVE_CMPXCHG_DOUBLE
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -123,6 +123,7 @@ config S390
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
++      select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
+       select HAVE_CMPXCHG_DOUBLE
+       select HAVE_CMPXCHG_LOCAL
+       select HAVE_DEBUG_KMEMLEAK
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -88,6 +88,7 @@ config X86
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select HAVE_BPF_JIT                     if X86_64
++      select HAVE_EBPF_JIT                    if X86_64
+       select HAVE_CC_STACKPROTECTOR
+       select HAVE_CMPXCHG_DOUBLE
+       select HAVE_CMPXCHG_LOCAL
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -1556,6 +1556,13 @@ config BPF_SYSCALL
+         Enable the bpf() system call that allows to manipulate eBPF
+         programs and maps via file descriptors.
+ 
++config BPF_JIT_ALWAYS_ON
++      bool "Permanently enable BPF JIT and remove BPF interpreter"
++      depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
++      help
++        Enables BPF JIT and removes BPF interpreter to avoid
++        speculative execution of BPF instructions by the interpreter
++
+ config SHMEM
+       bool "Use full shmem filesystem" if EXPERT
+       default y
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -256,6 +256,7 @@ noinline u64 __bpf_call_base(u64 r1, u64
+ }
+ EXPORT_SYMBOL_GPL(__bpf_call_base);
+ 
++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ /**
+  *    __bpf_prog_run - run eBPF program on a given context
+  *    @ctx: is the data we are operating on
+@@ -725,6 +726,13 @@ load_byte:
+               return 0;
+ }
+ 
++#else
++static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn)
++{
++      return 0;
++}
++#endif
++
+ bool bpf_prog_array_compatible(struct bpf_array *array,
+                              const struct bpf_prog *fp)
+ {
+@@ -771,9 +779,23 @@ static int bpf_check_tail_call(const str
+  */
+ int bpf_prog_select_runtime(struct bpf_prog *fp)
+ {
++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+       fp->bpf_func = (void *) __bpf_prog_run;
+-
++#else
++      fp->bpf_func = (void *) __bpf_prog_ret0;
++#endif
++
++      /* eBPF JITs can rewrite the program in case constant
++       * blinding is active. However, in case of error during
++       * blinding, bpf_int_jit_compile() must always return a
++       * valid program, which in this case would simply not
++       * be JITed, but falls back to the interpreter.
++       */
+       bpf_int_jit_compile(fp);
++#ifdef CONFIG_BPF_JIT_ALWAYS_ON
++      if (!fp->jited)
++              return -ENOTSUPP;
++#endif
+       bpf_prog_lock_ro(fp);
+ 
+       /* The tail call compatibility check can only be done at
+--- a/lib/test_bpf.c
++++ b/lib/test_bpf.c
+@@ -5304,9 +5304,8 @@ static struct bpf_prog *generate_filter(
+                               return NULL;
+                       }
+               }
+-              /* We don't expect to fail. */
+               if (*err) {
+-                      pr_cont("FAIL to attach err=%d len=%d\n",
++                      pr_cont("FAIL to prog_create err=%d len=%d\n",
+                               *err, fprog.len);
+                       return NULL;
+               }
+@@ -5325,7 +5324,11 @@ static struct bpf_prog *generate_filter(
+               fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
+               memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn));
+ 
+-              bpf_prog_select_runtime(fp);
++              *err = bpf_prog_select_runtime(fp);
++              if (*err) {
++                      pr_cont("FAIL to select_runtime err=%d\n", *err);
++                      return NULL;
++              }
+               break;
+       }
+ 
+@@ -5511,8 +5514,8 @@ static __init int test_bpf(void)
+                               pass_cnt++;
+                               continue;
+                       }
+-
+-                      return err;
++                      err_cnt++;
++                      continue;
+               }
+ 
+               pr_cont("jited:%u ", fp->jited);
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -388,3 +388,6 @@ endif   # if NET
+ # Used by archs to tell that they support BPF_JIT
+ config HAVE_BPF_JIT
+       bool
++
++config HAVE_EBPF_JIT
++      bool
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -984,7 +984,9 @@ static struct bpf_prog *bpf_migrate_filt
+                */
+               goto out_err_free;
+ 
+-      bpf_prog_select_runtime(fp);
++      err = bpf_prog_select_runtime(fp);
++      if (err)
++              goto out_err_free;
+ 
+       kfree(old_prog);
+       return fp;
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -292,7 +292,13 @@ static struct ctl_table net_core_table[]
+               .data           = &bpf_jit_enable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
++#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+               .proc_handler   = proc_dointvec
++#else
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &one,
++              .extra2         = &one,
++#endif
+       },
+ #endif
+       {
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2534,6 +2534,15 @@ out_fs:
+ 
+ core_initcall(sock_init);     /* early initcall */
+ 
++static int __init jit_init(void)
++{
++#ifdef CONFIG_BPF_JIT_ALWAYS_ON
++      bpf_jit_enable = 1;
++#endif
++      return 0;
++}
++pure_initcall(jit_init);
++
+ #ifdef CONFIG_PROC_FS
+ void socket_seq_show(struct seq_file *seq)
+ {
diff --git a/queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch b/queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch

new file mode 100644 (file)

index 0000000..2e1d163
--- /dev/null
+++ b/queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch
@@ -0,0 +1,72 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:46 +0100
+Subject: bpf: reject stores into ctx via st and xadd
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org
+Message-ID: <7d49693fcf1d0f23f0f14e8da18acfe03da9fc18.1517279268.git.daniel@iogearbox.net>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ upstream commit f37a8cb84cce18762e8f86a70bd6a49a66ab964c ]
+
+Alexei found that verifier does not reject stores into context
+via BPF_ST instead of BPF_STX. And while looking at it, we
+also should not allow XADD variant of BPF_STX.
+
+The context rewriter is only assuming either BPF_LDX_MEM- or
+BPF_STX_MEM-type operations, thus reject anything other than
+that so that assumptions in the rewriter properly hold. Add
+test cases as well for BPF selftests.
+
+Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields")
+Reported-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -683,6 +683,13 @@ static bool is_pointer_value(struct veri
+       }
+ }
+ 
++static bool is_ctx_reg(struct verifier_env *env, int regno)
++{
++      const struct reg_state *reg = &env->cur_state.regs[regno];
++
++      return reg->type == PTR_TO_CTX;
++}
++
+ /* check whether memory at (regno + off) is accessible for t = (read | write)
+  * if t==write, value_regno is a register which value is stored into memory
+  * if t==read, value_regno is a register which will receive the value from memory
+@@ -779,6 +786,12 @@ static int check_xadd(struct verifier_en
+               return -EACCES;
+       }
+ 
++      if (is_ctx_reg(env, insn->dst_reg)) {
++              verbose("BPF_XADD stores into R%d context is not allowed\n",
++                      insn->dst_reg);
++              return -EACCES;
++      }
++
+       /* check whether atomic_add can read the memory */
+       err = check_mem_access(env, insn->dst_reg, insn->off,
+                              BPF_SIZE(insn->code), BPF_READ, -1);
+@@ -1909,6 +1922,12 @@ static int do_check(struct verifier_env
+                       if (err)
+                               return err;
+ 
++                      if (is_ctx_reg(env, insn->dst_reg)) {
++                              verbose("BPF_ST stores into R%d context is not allowed\n",
++                                      insn->dst_reg);
++                              return -EACCES;
++                      }
++
+                       /* check that memory (dst_reg + off) is writeable */
+                       err = check_mem_access(env, insn->dst_reg, insn->off,
+                                              BPF_SIZE(insn->code), BPF_WRITE,
diff --git a/queue-4.4/series b/queue-4.4/series

index f87e6b5e7222cc5d25f658be5a5ce104853e65ae..db87e5187284b32f968f886ab7c3bddbfba5cee7 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -1 +1,10 @@
  loop-fix-concurrent-lo_open-lo_release.patch
+bpf-fix-branch-pruning-logic.patch
+x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
+bpf-fix-bpf_tail_call-x64-jit.patch
+bpf-introduce-bpf_jit_always_on-config.patch
+bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch
+bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch
+bpf-fix-divides-by-zero.patch
+bpf-fix-32-bit-divide-by-zero.patch
+bpf-reject-stores-into-ctx-via-st-and-xadd.patch
diff --git a/queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch b/queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch

new file mode 100644 (file)

index 0000000..12e5385
--- /dev/null
+++ b/queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch
@@ -0,0 +1,70 @@
+From foo@baz Thu Feb  1 09:05:44 CET 2018
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Tue, 30 Jan 2018 03:37:39 +0100
+Subject: x86: bpf_jit: small optimization in emit_bpf_tail_call()
+To: gregkh@linuxfoundation.org
+Cc: ast@kernel.org, daniel@iogearbox.net, stable@vger.kernel.org, Eric Dumazet <edumazet@google.com>, "David S . Miller" <davem@davemloft.net>
+Message-ID: <e0ee628ad21177055e3bad4cea8995f7d66b11c0.1517279268.git.daniel@iogearbox.net>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ upstream commit 84ccac6e7854ebbfb56d2fc6d5bef9be49bb304c ]
+
+Saves 4 bytes replacing following instructions :
+
+lea rax, [rsi + rdx * 8 + offsetof(...)]
+mov rax, qword ptr [rax]
+cmp rax, 0
+
+by :
+
+mov rax, [rsi + rdx * 8 + offsetof(...)]
+test rax, rax
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Alexei Starovoitov <ast@kernel.org>
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/net/bpf_jit_comp.c |    9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -269,7 +269,7 @@ static void emit_bpf_tail_call(u8 **ppro
+       EMIT4(0x48, 0x8B, 0x46,                   /* mov rax, qword ptr [rsi + 16] */
+             offsetof(struct bpf_array, map.max_entries));
+       EMIT3(0x48, 0x39, 0xD0);                  /* cmp rax, rdx */
+-#define OFFSET1 47 /* number of bytes to jump */
++#define OFFSET1 43 /* number of bytes to jump */
+       EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
+       label1 = cnt;
+ 
+@@ -278,21 +278,20 @@ static void emit_bpf_tail_call(u8 **ppro
+        */
+       EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
+       EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT */
+-#define OFFSET2 36
++#define OFFSET2 32
+       EMIT2(X86_JA, OFFSET2);                   /* ja out */
+       label2 = cnt;
+       EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
+       EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
+ 
+       /* prog = array->ptrs[index]; */
+-      EMIT4_off32(0x48, 0x8D, 0x84, 0xD6,       /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
++      EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
+                   offsetof(struct bpf_array, ptrs));
+-      EMIT3(0x48, 0x8B, 0x00);                  /* mov rax, qword ptr [rax] */
+ 
+       /* if (prog == NULL)
+        *   goto out;
+        */
+-      EMIT4(0x48, 0x83, 0xF8, 0x00);            /* cmp rax, 0 */
++      EMIT3(0x48, 0x85, 0xC0);                  /* test rax,rax */
+ #define OFFSET3 10
+       EMIT2(X86_JE, OFFSET3);                   /* je out */
+       label3 = cnt;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 1 Feb 2018 08:07:15 +0000 (09:07 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 1 Feb 2018 08:07:15 +0000 (09:07 +0100)
queue-4.4/bpf-arsh-is-not-supported-in-32-bit-alu-thus-reject-it.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/bpf-avoid-false-sharing-of-map-refcount-with-max_entries.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/bpf-fix-32-bit-divide-by-zero.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/bpf-fix-bpf_tail_call-x64-jit.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/bpf-fix-branch-pruning-logic.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/bpf-fix-divides-by-zero.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/bpf-introduce-bpf_jit_always_on-config.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/bpf-reject-stores-into-ctx-via-st-and-xadd.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history
queue-4.4/x86-bpf_jit-small-optimization-in-emit_bpf_tail_call.patch	[new file with mode: 0644]	patch \| blob