patches for 4.19

author Sasha Levin <sashal@kernel.org>

Mon, 28 Jan 2019 21:27:05 +0000 (16:27 -0500)

committer Sasha Levin <sashal@kernel.org>

Mon, 28 Jan 2019 21:27:05 +0000 (16:27 -0500)
author Sasha Levin <sashal@kernel.org>
Mon, 28 Jan 2019 21:27:05 +0000 (16:27 -0500)
committer Sasha Levin <sashal@kernel.org>
Mon, 28 Jan 2019 21:27:05 +0000 (16:27 -0500)
diff --git a/queue-4.19/bpf-add-per-insn-complexity-limit.patch b/queue-4.19/bpf-add-per-insn-complexity-limit.patch

new file mode 100644 (file)

index 0000000..8a26489
--- /dev/null
+++ b/queue-4.19/bpf-add-per-insn-complexity-limit.patch
@@ -0,0 +1,77 @@
+From 693c3a15893e2cc211828e9009eae0fef82b926f Mon Sep 17 00:00:00 2001
+From: Alexei Starovoitov <ast@kernel.org>
+Date: Mon, 28 Jan 2019 21:28:20 +0100
+Subject: bpf: add per-insn complexity limit
+
+[ commit ceefbc96fa5c5b975d87bf8e89ba8416f6b764d9 upstream ]
+
+malicious bpf program may try to force the verifier to remember
+a lot of distinct verifier states.
+Put a limit to number of per-insn 'struct bpf_verifier_state'.
+Note that hitting the limit doesn't reject the program.
+It potentially makes the verifier do more steps to analyze the program.
+It means that malicious programs will hit BPF_COMPLEXITY_LIMIT_INSNS sooner
+instead of spending cpu time walking long link list.
+
+The limit of BPF_COMPLEXITY_LIMIT_STATES==64 affects cilium progs
+with slight increase in number of "steps" it takes to successfully verify
+the programs:
+                       before    after
+bpf_lb-DLB_L3.o         1940      1940
+bpf_lb-DLB_L4.o         3089      3089
+bpf_lb-DUNKNOWN.o       1065      1065
+bpf_lxc-DDROP_ALL.o     28052  |  28162
+bpf_lxc-DUNKNOWN.o      35487  |  35541
+bpf_netdev.o            10864     10864
+bpf_overlay.o           6643      6643
+bpf_lcx_jit.o           38437     38437
+
+But it also makes malicious program to be rejected in 0.4 seconds vs 6.5
+Hence apply this limit to unprivileged programs only.
+
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 3d093003c723..2bbb98535b70 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -156,6 +156,7 @@ struct bpf_verifier_stack_elem {
+ 
+ #define BPF_COMPLEXITY_LIMIT_INSNS    131072
+ #define BPF_COMPLEXITY_LIMIT_STACK    1024
++#define BPF_COMPLEXITY_LIMIT_STATES   64
+ 
+ #define BPF_MAP_PTR_UNPRIV    1UL
+ #define BPF_MAP_PTR_POISON    ((void *)((0xeB9FUL << 1) +     \
+@@ -4735,7 +4736,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
+       struct bpf_verifier_state_list *new_sl;
+       struct bpf_verifier_state_list *sl;
+       struct bpf_verifier_state *cur = env->cur_state;
+-      int i, j, err;
++      int i, j, err, states_cnt = 0;
+ 
+       sl = env->explored_states[insn_idx];
+       if (!sl)
+@@ -4762,8 +4763,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
+                       return 1;
+               }
+               sl = sl->next;
++              states_cnt++;
+       }
+ 
++      if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
++              return 0;
++
+       /* there were no equivalent states, remember current one.
+        * technically the current state is not proven to be safe yet,
+        * but it will either reach outer most bpf_exit (which means it's safe)
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-enable-access-to-ax-register-also-from-verifier-.patch b/queue-4.19/bpf-enable-access-to-ax-register-also-from-verifier-.patch

new file mode 100644 (file)

index 0000000..79c159a
--- /dev/null
+++ b/queue-4.19/bpf-enable-access-to-ax-register-also-from-verifier-.patch
@@ -0,0 +1,78 @@
+From 761ea22a5fe3fc3991b56c94f24f1e3edbc6fade Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:23 +0100
+Subject: bpf: enable access to ax register also from verifier rewrite
+
+[ commit 9b73bfdd08e73231d6a90ae6db4b46b3fbf56c30 upstream ]
+
+Right now we are using BPF ax register in JIT for constant blinding as
+well as in interpreter as temporary variable. Verifier will not be able
+to use it simply because its use will get overridden from the former in
+bpf_jit_blind_insn(). However, it can be made to work in that blinding
+will be skipped if there is prior use in either source or destination
+register on the instruction. Taking constraints of ax into account, the
+verifier is then open to use it in rewrites under some constraints. Note,
+ax register already has mappings in every eBPF JIT.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/filter.h |  7 +------
+ kernel/bpf/core.c      | 20 ++++++++++++++++++++
+ 2 files changed, 21 insertions(+), 6 deletions(-)
+
+diff --git a/include/linux/filter.h b/include/linux/filter.h
+index 81420a0efdbe..1a39d57eb88f 100644
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -53,12 +53,7 @@ struct sock_reuseport;
+ #define BPF_REG_D     BPF_REG_8       /* data, callee-saved */
+ #define BPF_REG_H     BPF_REG_9       /* hlen, callee-saved */
+ 
+-/* Kernel hidden auxiliary/helper register for hardening step.
+- * Only used by eBPF JITs. It's nothing more than a temporary
+- * register that JITs use internally, only that here it's part
+- * of eBPF instructions that have been rewritten for blinding
+- * constants. See JIT pre-step in bpf_jit_blind_constants().
+- */
++/* Kernel hidden auxiliary/helper register. */
+ #define BPF_REG_AX            MAX_BPF_REG
+ #define MAX_BPF_EXT_REG               (MAX_BPF_REG + 1)
+ #define MAX_BPF_JIT_REG               MAX_BPF_EXT_REG
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index aefc62ae4a1e..474525e3a9db 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -643,6 +643,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
+       BUILD_BUG_ON(BPF_REG_AX  + 1 != MAX_BPF_JIT_REG);
+       BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
+ 
++      /* Constraints on AX register:
++       *
++       * AX register is inaccessible from user space. It is mapped in
++       * all JITs, and used here for constant blinding rewrites. It is
++       * typically "stateless" meaning its contents are only valid within
++       * the executed instruction, but not across several instructions.
++       * There are a few exceptions however which are further detailed
++       * below.
++       *
++       * Constant blinding is only used by JITs, not in the interpreter.
++       * The interpreter uses AX in some occasions as a local temporary
++       * register e.g. in DIV or MOD instructions.
++       *
++       * In restricted circumstances, the verifier can also use the AX
++       * register for rewrites as long as they do not interfere with
++       * the above cases!
++       */
++      if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
++              goto out;
++
+       if (from->imm == 0 &&
+           (from->code == (BPF_ALU   | BPF_MOV | BPF_K) ||
+            from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-fix-check_map_access-smin_value-test-when-pointe.patch b/queue-4.19/bpf-fix-check_map_access-smin_value-test-when-pointe.patch

new file mode 100644 (file)

index 0000000..1d50fef
--- /dev/null
+++ b/queue-4.19/bpf-fix-check_map_access-smin_value-test-when-pointe.patch
@@ -0,0 +1,54 @@
+From e6cba8acf0f7f47cc1b8c5884dac40b451de9c58 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:27 +0100
+Subject: bpf: fix check_map_access smin_value test when pointer contains
+ offset
+
+[ commit b7137c4eab85c1cf3d46acdde90ce1163b28c873 upstream ]
+
+In check_map_access() we probe actual bounds through __check_map_access()
+with offset of reg->smin_value + off for lower bound and offset of
+reg->umax_value + off for the upper bound. However, even though the
+reg->smin_value could have a negative value, the final result of the
+sum with off could be positive when pointer arithmetic with known and
+unknown scalars is combined. In this case we reject the program with
+an error such as "R<x> min value is negative, either use unsigned index
+or do a if (index >=0) check." even though the access itself would be
+fine. Therefore extend the check to probe whether the actual resulting
+reg->smin_value + off is less than zero.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index fbaa3b9e1d71..f9d5aea4891d 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1294,13 +1294,17 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
+        */
+       if (env->log.level)
+               print_verifier_state(env, state);
++
+       /* The minimum value is only important with signed
+        * comparisons where we can't assume the floor of a
+        * value is 0.  If we are using signed variables for our
+        * index'es we need to make sure that whatever we use
+        * will have a set floor within our range.
+        */
+-      if (reg->smin_value < 0) {
++      if (reg->smin_value < 0 &&
++          (reg->smin_value == S64_MIN ||
++           (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
++            reg->smin_value + off < 0)) {
+               verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+                       regno);
+               return -EACCES;
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch b/queue-4.19/bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch

new file mode 100644 (file)

index 0000000..3c0174e
--- /dev/null
+++ b/queue-4.19/bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch
@@ -0,0 +1,139 @@
+From 3bfc7522973691f132ca4c855a82aee985b94751 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:30 +0100
+Subject: bpf: fix inner map masking to prevent oob under speculation
+
+[ commit 9d5564ddcf2a0f5ba3fa1c3a1f8a1b59ad309553 upstream ]
+
+During review I noticed that inner meta map setup for map in
+map is buggy in that it does not propagate all needed data
+from the reference map which the verifier is later accessing.
+
+In particular one such case is index masking to prevent out of
+bounds access under speculative execution due to missing the
+map's unpriv_array/index_mask field propagation. Fix this such
+that the verifier is generating the correct code for inlined
+lookups in case of unpriviledged use.
+
+Before patch (test_verifier's 'map in map access' dump):
+
+  # bpftool prog dump xla id 3
+     0: (62) *(u32 *)(r10 -4) = 0
+     1: (bf) r2 = r10
+     2: (07) r2 += -4
+     3: (18) r1 = map[id:4]
+     5: (07) r1 += 272                |
+     6: (61) r0 = *(u32 *)(r2 +0)     |
+     7: (35) if r0 >= 0x1 goto pc+6   | Inlined map in map lookup
+     8: (54) (u32) r0 &= (u32) 0      | with index masking for
+     9: (67) r0 <<= 3                 | map->unpriv_array.
+    10: (0f) r0 += r1                 |
+    11: (79) r0 = *(u64 *)(r0 +0)     |
+    12: (15) if r0 == 0x0 goto pc+1   |
+    13: (05) goto pc+1                |
+    14: (b7) r0 = 0                   |
+    15: (15) if r0 == 0x0 goto pc+11
+    16: (62) *(u32 *)(r10 -4) = 0
+    17: (bf) r2 = r10
+    18: (07) r2 += -4
+    19: (bf) r1 = r0
+    20: (07) r1 += 272                |
+    21: (61) r0 = *(u32 *)(r2 +0)     | Index masking missing (!)
+    22: (35) if r0 >= 0x1 goto pc+3   | for inner map despite
+    23: (67) r0 <<= 3                 | map->unpriv_array set.
+    24: (0f) r0 += r1                 |
+    25: (05) goto pc+1                |
+    26: (b7) r0 = 0                   |
+    27: (b7) r0 = 0
+    28: (95) exit
+
+After patch:
+
+  # bpftool prog dump xla id 1
+     0: (62) *(u32 *)(r10 -4) = 0
+     1: (bf) r2 = r10
+     2: (07) r2 += -4
+     3: (18) r1 = map[id:2]
+     5: (07) r1 += 272                |
+     6: (61) r0 = *(u32 *)(r2 +0)     |
+     7: (35) if r0 >= 0x1 goto pc+6   | Same inlined map in map lookup
+     8: (54) (u32) r0 &= (u32) 0      | with index masking due to
+     9: (67) r0 <<= 3                 | map->unpriv_array.
+    10: (0f) r0 += r1                 |
+    11: (79) r0 = *(u64 *)(r0 +0)     |
+    12: (15) if r0 == 0x0 goto pc+1   |
+    13: (05) goto pc+1                |
+    14: (b7) r0 = 0                   |
+    15: (15) if r0 == 0x0 goto pc+12
+    16: (62) *(u32 *)(r10 -4) = 0
+    17: (bf) r2 = r10
+    18: (07) r2 += -4
+    19: (bf) r1 = r0
+    20: (07) r1 += 272                |
+    21: (61) r0 = *(u32 *)(r2 +0)     |
+    22: (35) if r0 >= 0x1 goto pc+4   | Now fixed inlined inner map
+    23: (54) (u32) r0 &= (u32) 0      | lookup with proper index masking
+    24: (67) r0 <<= 3                 | for map->unpriv_array.
+    25: (0f) r0 += r1                 |
+    26: (05) goto pc+1                |
+    27: (b7) r0 = 0                   |
+    28: (b7) r0 = 0
+    29: (95) exit
+
+Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/map_in_map.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
+index 3bfbf4464416..9670ee5ee74e 100644
+--- a/kernel/bpf/map_in_map.c
++++ b/kernel/bpf/map_in_map.c
+@@ -12,6 +12,7 @@
+ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
+ {
+       struct bpf_map *inner_map, *inner_map_meta;
++      u32 inner_map_meta_size;
+       struct fd f;
+ 
+       f = fdget(inner_map_ufd);
+@@ -35,7 +36,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
+               return ERR_PTR(-EINVAL);
+       }
+ 
+-      inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER);
++      inner_map_meta_size = sizeof(*inner_map_meta);
++      /* In some cases verifier needs to access beyond just base map. */
++      if (inner_map->ops == &array_map_ops)
++              inner_map_meta_size = sizeof(struct bpf_array);
++
++      inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
+       if (!inner_map_meta) {
+               fdput(f);
+               return ERR_PTR(-ENOMEM);
+@@ -45,9 +51,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
+       inner_map_meta->key_size = inner_map->key_size;
+       inner_map_meta->value_size = inner_map->value_size;
+       inner_map_meta->map_flags = inner_map->map_flags;
+-      inner_map_meta->ops = inner_map->ops;
+       inner_map_meta->max_entries = inner_map->max_entries;
+ 
++      /* Misc members not needed in bpf_map_meta_equal() check. */
++      inner_map_meta->ops = inner_map->ops;
++      if (inner_map->ops == &array_map_ops) {
++              inner_map_meta->unpriv_array = inner_map->unpriv_array;
++              container_of(inner_map_meta, struct bpf_array, map)->index_mask =
++                   container_of(inner_map, struct bpf_array, map)->index_mask;
++      }
++
+       fdput(f);
+       return inner_map_meta;
+ }
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch b/queue-4.19/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch

new file mode 100644 (file)

index 0000000..5e92700
--- /dev/null
+++ b/queue-4.19/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch
@@ -0,0 +1,152 @@
+From e467ff9122eb2f03e7844189882a40a2ac3726f4 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:29 +0100
+Subject: bpf: fix sanitation of alu op with pointer / scalar type from
+ different paths
+
+[ commit d3bd7413e0ca40b60cf60d4003246d067cafdeda upstream ]
+
+While 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer
+arithmetic") took care of rejecting alu op on pointer when e.g. pointer
+came from two different map values with different map properties such as
+value size, Jann reported that a case was not covered yet when a given
+alu op is used in both "ptr_reg += reg" and "numeric_reg += reg" from
+different branches where we would incorrectly try to sanitize based
+on the pointer's limit. Catch this corner case and reject the program
+instead.
+
+Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_verifier.h |  1 +
+ kernel/bpf/verifier.c        | 61 ++++++++++++++++++++++++++++--------
+ 2 files changed, 49 insertions(+), 13 deletions(-)
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index 2716dcbfb2f1..91393724e933 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -147,6 +147,7 @@ struct bpf_verifier_state_list {
+ #define BPF_ALU_SANITIZE_SRC          1U
+ #define BPF_ALU_SANITIZE_DST          2U
+ #define BPF_ALU_NEG_VALUE             (1U << 2)
++#define BPF_ALU_NON_POINTER           (1U << 3)
+ #define BPF_ALU_SANITIZE              (BPF_ALU_SANITIZE_SRC | \
+                                        BPF_ALU_SANITIZE_DST)
+ 
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 9ab0adedb174..4d81be2d0739 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2734,6 +2734,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
+       }
+ }
+ 
++static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
++                                  const struct bpf_insn *insn)
++{
++      return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
++}
++
++static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
++                                     u32 alu_state, u32 alu_limit)
++{
++      /* If we arrived here from different branches with different
++       * state or limits to sanitize, then this won't work.
++       */
++      if (aux->alu_state &&
++          (aux->alu_state != alu_state ||
++           aux->alu_limit != alu_limit))
++              return -EACCES;
++
++      /* Corresponding fixup done in fixup_bpf_calls(). */
++      aux->alu_state = alu_state;
++      aux->alu_limit = alu_limit;
++      return 0;
++}
++
++static int sanitize_val_alu(struct bpf_verifier_env *env,
++                          struct bpf_insn *insn)
++{
++      struct bpf_insn_aux_data *aux = cur_aux(env);
++
++      if (can_skip_alu_sanitation(env, insn))
++              return 0;
++
++      return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
++}
++
+ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
+                           struct bpf_insn *insn,
+                           const struct bpf_reg_state *ptr_reg,
+@@ -2748,7 +2782,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
+       struct bpf_reg_state tmp;
+       bool ret;
+ 
+-      if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K)
++      if (can_skip_alu_sanitation(env, insn))
+               return 0;
+ 
+       /* We already marked aux for masking from non-speculative
+@@ -2764,19 +2798,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
+ 
+       if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
+               return 0;
+-
+-      /* If we arrived here from different branches with different
+-       * limits to sanitize, then this won't work.
+-       */
+-      if (aux->alu_state &&
+-          (aux->alu_state != alu_state ||
+-           aux->alu_limit != alu_limit))
++      if (update_alu_sanitation_state(aux, alu_state, alu_limit))
+               return -EACCES;
+-
+-      /* Corresponding fixup done in fixup_bpf_calls(). */
+-      aux->alu_state = alu_state;
+-      aux->alu_limit = alu_limit;
+-
+ do_sim:
+       /* Simulate and find potential out-of-bounds access under
+        * speculative execution from truncation as a result of
+@@ -3048,6 +3071,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+       s64 smin_val, smax_val;
+       u64 umin_val, umax_val;
+       u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
++      u32 dst = insn->dst_reg;
++      int ret;
+ 
+       if (insn_bitness == 32) {
+               /* Relevant for 32-bit RSH: Information can propagate towards
+@@ -3082,6 +3107,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+ 
+       switch (opcode) {
+       case BPF_ADD:
++              ret = sanitize_val_alu(env, insn);
++              if (ret < 0) {
++                      verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
++                      return ret;
++              }
+               if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
+                   signed_add_overflows(dst_reg->smax_value, smax_val)) {
+                       dst_reg->smin_value = S64_MIN;
+@@ -3101,6 +3131,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
+               dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
+               break;
+       case BPF_SUB:
++              ret = sanitize_val_alu(env, insn);
++              if (ret < 0) {
++                      verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
++                      return ret;
++              }
+               if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
+                   signed_sub_overflows(dst_reg->smax_value, smin_val)) {
+                       /* Overflow possible, we know nothing */
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-improve-verifier-branch-analysis.patch b/queue-4.19/bpf-improve-verifier-branch-analysis.patch

new file mode 100644 (file)

index 0000000..5042597
--- /dev/null
+++ b/queue-4.19/bpf-improve-verifier-branch-analysis.patch
@@ -0,0 +1,163 @@
+From 7125bbd1caa7f8875350606add501e2948c16b95 Mon Sep 17 00:00:00 2001
+From: Alexei Starovoitov <ast@kernel.org>
+Date: Mon, 28 Jan 2019 21:28:19 +0100
+Subject: bpf: improve verifier branch analysis
+
+[ commit 4f7b3e82589e0de723780198ec7983e427144c0a upstream ]
+
+pathological bpf programs may try to force verifier to explode in
+the number of branch states:
+  20: (d5) if r1 s<= 0x24000028 goto pc+0
+  21: (b5) if r0 <= 0xe1fa20 goto pc+2
+  22: (d5) if r1 s<= 0x7e goto pc+0
+  23: (b5) if r0 <= 0xe880e000 goto pc+0
+  24: (c5) if r0 s< 0x2100ecf4 goto pc+0
+  25: (d5) if r1 s<= 0xe880e000 goto pc+1
+  26: (c5) if r0 s< 0xf4041810 goto pc+0
+  27: (d5) if r1 s<= 0x1e007e goto pc+0
+  28: (b5) if r0 <= 0xe86be000 goto pc+0
+  29: (07) r0 += 16614
+  30: (c5) if r0 s< 0x6d0020da goto pc+0
+  31: (35) if r0 >= 0x2100ecf4 goto pc+0
+
+Teach verifier to recognize always taken and always not taken branches.
+This analysis is already done for == and != comparison.
+Expand it to all other branches.
+
+It also helps real bpf programs to be verified faster:
+                       before  after
+bpf_lb-DLB_L3.o         2003    1940
+bpf_lb-DLB_L4.o         3173    3089
+bpf_lb-DUNKNOWN.o       1080    1065
+bpf_lxc-DDROP_ALL.o     29584   28052
+bpf_lxc-DUNKNOWN.o      36916   35487
+bpf_netdev.o            11188   10864
+bpf_overlay.o           6679    6643
+bpf_lcx_jit.o           39555   38437
+
+Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Edward Cree <ecree@solarflare.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 93 +++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 80 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 341806668f03..3d093003c723 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -3475,6 +3475,79 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
+       }
+ }
+ 
++/* compute branch direction of the expression "if (reg opcode val) goto target;"
++ * and return:
++ *  1 - branch will be taken and "goto target" will be executed
++ *  0 - branch will not be taken and fall-through to next insn
++ * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
++ */
++static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
++{
++      if (__is_pointer_value(false, reg))
++              return -1;
++
++      switch (opcode) {
++      case BPF_JEQ:
++              if (tnum_is_const(reg->var_off))
++                      return !!tnum_equals_const(reg->var_off, val);
++              break;
++      case BPF_JNE:
++              if (tnum_is_const(reg->var_off))
++                      return !tnum_equals_const(reg->var_off, val);
++              break;
++      case BPF_JGT:
++              if (reg->umin_value > val)
++                      return 1;
++              else if (reg->umax_value <= val)
++                      return 0;
++              break;
++      case BPF_JSGT:
++              if (reg->smin_value > (s64)val)
++                      return 1;
++              else if (reg->smax_value < (s64)val)
++                      return 0;
++              break;
++      case BPF_JLT:
++              if (reg->umax_value < val)
++                      return 1;
++              else if (reg->umin_value >= val)
++                      return 0;
++              break;
++      case BPF_JSLT:
++              if (reg->smax_value < (s64)val)
++                      return 1;
++              else if (reg->smin_value >= (s64)val)
++                      return 0;
++              break;
++      case BPF_JGE:
++              if (reg->umin_value >= val)
++                      return 1;
++              else if (reg->umax_value < val)
++                      return 0;
++              break;
++      case BPF_JSGE:
++              if (reg->smin_value >= (s64)val)
++                      return 1;
++              else if (reg->smax_value < (s64)val)
++                      return 0;
++              break;
++      case BPF_JLE:
++              if (reg->umax_value <= val)
++                      return 1;
++              else if (reg->umin_value > val)
++                      return 0;
++              break;
++      case BPF_JSLE:
++              if (reg->smax_value <= (s64)val)
++                      return 1;
++              else if (reg->smin_value > (s64)val)
++                      return 0;
++              break;
++      }
++
++      return -1;
++}
++
+ /* Adjusts the register min/max values in the case that the dst_reg is the
+  * variable register that we are working on, and src_reg is a constant or we're
+  * simply doing a BPF_K check.
+@@ -3868,21 +3941,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
+ 
+       dst_reg = &regs[insn->dst_reg];
+ 
+-      /* detect if R == 0 where R was initialized to zero earlier */
+-      if (BPF_SRC(insn->code) == BPF_K &&
+-          (opcode == BPF_JEQ || opcode == BPF_JNE) &&
+-          dst_reg->type == SCALAR_VALUE &&
+-          tnum_is_const(dst_reg->var_off)) {
+-              if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
+-                  (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
+-                      /* if (imm == imm) goto pc+off;
+-                       * only follow the goto, ignore fall-through
+-                       */
++      if (BPF_SRC(insn->code) == BPF_K) {
++              int pred = is_branch_taken(dst_reg, insn->imm, opcode);
++
++              if (pred == 1) {
++                       /* only follow the goto, ignore fall-through */
+                       *insn_idx += insn->off;
+                       return 0;
+-              } else {
+-                      /* if (imm != imm) goto pc+off;
+-                       * only follow fall-through branch, since
++              } else if (pred == 0) {
++                      /* only follow fall-through branch, since
+                        * that's where the program will go
+                        */
+                       return 0;
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-move-prev_-insn_idx-into-verifier-env.patch b/queue-4.19/bpf-move-prev_-insn_idx-into-verifier-env.patch

new file mode 100644 (file)

index 0000000..f672843
--- /dev/null
+++ b/queue-4.19/bpf-move-prev_-insn_idx-into-verifier-env.patch
@@ -0,0 +1,268 @@
+From 94ad22febaf47fcf129727187fd62feac7919bdf Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:21 +0100
+Subject: bpf: move {prev_,}insn_idx into verifier env
+
+[ commit c08435ec7f2bc8f4109401f696fd55159b4b40cb upstream ]
+
+Move prev_insn_idx and insn_idx from the do_check() function into
+the verifier environment, so they can be read inside the various
+helper functions for handling the instructions. It's easier to put
+this into the environment rather than changing all call-sites only
+to pass it along. insn_idx is useful in particular since this later
+on allows to hold state in env->insn_aux_data[env->insn_idx].
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_verifier.h |  2 +
+ kernel/bpf/verifier.c        | 75 ++++++++++++++++++------------------
+ 2 files changed, 40 insertions(+), 37 deletions(-)
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index 1fd6fa822d2c..b1587ffdea7b 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -186,6 +186,8 @@ struct bpf_subprog_info {
+  * one verifier_env per bpf_check() call
+  */
+ struct bpf_verifier_env {
++      u32 insn_idx;
++      u32 prev_insn_idx;
+       struct bpf_prog *prog;          /* eBPF program being verified */
+       const struct bpf_verifier_ops *ops;
+       struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 2bbb98535b70..6d11320998c6 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4816,7 +4816,6 @@ static int do_check(struct bpf_verifier_env *env)
+       struct bpf_insn *insns = env->prog->insnsi;
+       struct bpf_reg_state *regs;
+       int insn_cnt = env->prog->len, i;
+-      int insn_idx, prev_insn_idx = 0;
+       int insn_processed = 0;
+       bool do_print_state = false;
+ 
+@@ -4835,19 +4834,19 @@ static int do_check(struct bpf_verifier_env *env)
+                       BPF_MAIN_FUNC /* callsite */,
+                       0 /* frameno */,
+                       0 /* subprogno, zero == main subprog */);
+-      insn_idx = 0;
++
+       for (;;) {
+               struct bpf_insn *insn;
+               u8 class;
+               int err;
+ 
+-              if (insn_idx >= insn_cnt) {
++              if (env->insn_idx >= insn_cnt) {
+                       verbose(env, "invalid insn idx %d insn_cnt %d\n",
+-                              insn_idx, insn_cnt);
++                              env->insn_idx, insn_cnt);
+                       return -EFAULT;
+               }
+ 
+-              insn = &insns[insn_idx];
++              insn = &insns[env->insn_idx];
+               class = BPF_CLASS(insn->code);
+ 
+               if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
+@@ -4857,7 +4856,7 @@ static int do_check(struct bpf_verifier_env *env)
+                       return -E2BIG;
+               }
+ 
+-              err = is_state_visited(env, insn_idx);
++              err = is_state_visited(env, env->insn_idx);
+               if (err < 0)
+                       return err;
+               if (err == 1) {
+@@ -4865,9 +4864,9 @@ static int do_check(struct bpf_verifier_env *env)
+                       if (env->log.level) {
+                               if (do_print_state)
+                                       verbose(env, "\nfrom %d to %d: safe\n",
+-                                              prev_insn_idx, insn_idx);
++                                              env->prev_insn_idx, env->insn_idx);
+                               else
+-                                      verbose(env, "%d: safe\n", insn_idx);
++                                      verbose(env, "%d: safe\n", env->insn_idx);
+                       }
+                       goto process_bpf_exit;
+               }
+@@ -4880,10 +4879,10 @@ static int do_check(struct bpf_verifier_env *env)
+ 
+               if (env->log.level > 1 || (env->log.level && do_print_state)) {
+                       if (env->log.level > 1)
+-                              verbose(env, "%d:", insn_idx);
++                              verbose(env, "%d:", env->insn_idx);
+                       else
+                               verbose(env, "\nfrom %d to %d:",
+-                                      prev_insn_idx, insn_idx);
++                                      env->prev_insn_idx, env->insn_idx);
+                       print_verifier_state(env, state->frame[state->curframe]);
+                       do_print_state = false;
+               }
+@@ -4894,19 +4893,20 @@ static int do_check(struct bpf_verifier_env *env)
+                               .private_data   = env,
+                       };
+ 
+-                      verbose(env, "%d: ", insn_idx);
++                      verbose(env, "%d: ", env->insn_idx);
+                       print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
+               }
+ 
+               if (bpf_prog_is_dev_bound(env->prog->aux)) {
+-                      err = bpf_prog_offload_verify_insn(env, insn_idx,
+-                                                         prev_insn_idx);
++                      err = bpf_prog_offload_verify_insn(env, env->insn_idx,
++                                                         env->prev_insn_idx);
+                       if (err)
+                               return err;
+               }
+ 
+               regs = cur_regs(env);
+-              env->insn_aux_data[insn_idx].seen = true;
++              env->insn_aux_data[env->insn_idx].seen = true;
++
+               if (class == BPF_ALU || class == BPF_ALU64) {
+                       err = check_alu_op(env, insn);
+                       if (err)
+@@ -4931,13 +4931,13 @@ static int do_check(struct bpf_verifier_env *env)
+                       /* check that memory (src_reg + off) is readable,
+                        * the state of dst_reg will be updated by this func
+                        */
+-                      err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
+-                                             BPF_SIZE(insn->code), BPF_READ,
+-                                             insn->dst_reg, false);
++                      err = check_mem_access(env, env->insn_idx, insn->src_reg,
++                                             insn->off, BPF_SIZE(insn->code),
++                                             BPF_READ, insn->dst_reg, false);
+                       if (err)
+                               return err;
+ 
+-                      prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
++                      prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
+ 
+                       if (*prev_src_type == NOT_INIT) {
+                               /* saw a valid insn
+@@ -4964,10 +4964,10 @@ static int do_check(struct bpf_verifier_env *env)
+                       enum bpf_reg_type *prev_dst_type, dst_reg_type;
+ 
+                       if (BPF_MODE(insn->code) == BPF_XADD) {
+-                              err = check_xadd(env, insn_idx, insn);
++                              err = check_xadd(env, env->insn_idx, insn);
+                               if (err)
+                                       return err;
+-                              insn_idx++;
++                              env->insn_idx++;
+                               continue;
+                       }
+ 
+@@ -4983,13 +4983,13 @@ static int do_check(struct bpf_verifier_env *env)
+                       dst_reg_type = regs[insn->dst_reg].type;
+ 
+                       /* check that memory (dst_reg + off) is writeable */
+-                      err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+-                                             BPF_SIZE(insn->code), BPF_WRITE,
+-                                             insn->src_reg, false);
++                      err = check_mem_access(env, env->insn_idx, insn->dst_reg,
++                                             insn->off, BPF_SIZE(insn->code),
++                                             BPF_WRITE, insn->src_reg, false);
+                       if (err)
+                               return err;
+ 
+-                      prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
++                      prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
+ 
+                       if (*prev_dst_type == NOT_INIT) {
+                               *prev_dst_type = dst_reg_type;
+@@ -5018,9 +5018,9 @@ static int do_check(struct bpf_verifier_env *env)
+                       }
+ 
+                       /* check that memory (dst_reg + off) is writeable */
+-                      err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+-                                             BPF_SIZE(insn->code), BPF_WRITE,
+-                                             -1, false);
++                      err = check_mem_access(env, env->insn_idx, insn->dst_reg,
++                                             insn->off, BPF_SIZE(insn->code),
++                                             BPF_WRITE, -1, false);
+                       if (err)
+                               return err;
+ 
+@@ -5038,9 +5038,9 @@ static int do_check(struct bpf_verifier_env *env)
+                               }
+ 
+                               if (insn->src_reg == BPF_PSEUDO_CALL)
+-                                      err = check_func_call(env, insn, &insn_idx);
++                                      err = check_func_call(env, insn, &env->insn_idx);
+                               else
+-                                      err = check_helper_call(env, insn->imm, insn_idx);
++                                      err = check_helper_call(env, insn->imm, env->insn_idx);
+                               if (err)
+                                       return err;
+ 
+@@ -5053,7 +5053,7 @@ static int do_check(struct bpf_verifier_env *env)
+                                       return -EINVAL;
+                               }
+ 
+-                              insn_idx += insn->off + 1;
++                              env->insn_idx += insn->off + 1;
+                               continue;
+ 
+                       } else if (opcode == BPF_EXIT) {
+@@ -5067,8 +5067,8 @@ static int do_check(struct bpf_verifier_env *env)
+ 
+                               if (state->curframe) {
+                                       /* exit from nested function */
+-                                      prev_insn_idx = insn_idx;
+-                                      err = prepare_func_exit(env, &insn_idx);
++                                      env->prev_insn_idx = env->insn_idx;
++                                      err = prepare_func_exit(env, &env->insn_idx);
+                                       if (err)
+                                               return err;
+                                       do_print_state = true;
+@@ -5094,7 +5094,8 @@ static int do_check(struct bpf_verifier_env *env)
+                               if (err)
+                                       return err;
+ process_bpf_exit:
+-                              err = pop_stack(env, &prev_insn_idx, &insn_idx);
++                              err = pop_stack(env, &env->prev_insn_idx,
++                                              &env->insn_idx);
+                               if (err < 0) {
+                                       if (err != -ENOENT)
+                                               return err;
+@@ -5104,7 +5105,7 @@ static int do_check(struct bpf_verifier_env *env)
+                                       continue;
+                               }
+                       } else {
+-                              err = check_cond_jmp_op(env, insn, &insn_idx);
++                              err = check_cond_jmp_op(env, insn, &env->insn_idx);
+                               if (err)
+                                       return err;
+                       }
+@@ -5121,8 +5122,8 @@ static int do_check(struct bpf_verifier_env *env)
+                               if (err)
+                                       return err;
+ 
+-                              insn_idx++;
+-                              env->insn_aux_data[insn_idx].seen = true;
++                              env->insn_idx++;
++                              env->insn_aux_data[env->insn_idx].seen = true;
+                       } else {
+                               verbose(env, "invalid BPF_LD mode\n");
+                               return -EINVAL;
+@@ -5132,7 +5133,7 @@ static int do_check(struct bpf_verifier_env *env)
+                       return -EINVAL;
+               }
+ 
+-              insn_idx++;
++              env->insn_idx++;
+       }
+ 
+       verbose(env, "processed %d insns (limit %d), stack depth ",
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-move-tmp-variable-into-ax-register-in-interprete.patch b/queue-4.19/bpf-move-tmp-variable-into-ax-register-in-interprete.patch

new file mode 100644 (file)

index 0000000..32aab3f
--- /dev/null
+++ b/queue-4.19/bpf-move-tmp-variable-into-ax-register-in-interprete.patch
@@ -0,0 +1,130 @@
+From b9750ff1cc957ae81cde2758812e14497b209a3c Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:22 +0100
+Subject: bpf: move tmp variable into ax register in interpreter
+
+[ commit 144cd91c4c2bced6eb8a7e25e590f6618a11e854 upstream ]
+
+This change moves the on-stack 64 bit tmp variable in ___bpf_prog_run()
+into the hidden ax register. The latter is currently only used in JITs
+for constant blinding as a temporary scratch register, meaning the BPF
+interpreter will never see the use of ax. Therefore it is safe to use
+it for the cases where tmp has been used earlier. This is needed to later
+on allow restricted hidden use of ax in both interpreter and JITs.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/filter.h |  3 ++-
+ kernel/bpf/core.c      | 34 +++++++++++++++++-----------------
+ 2 files changed, 19 insertions(+), 18 deletions(-)
+
+diff --git a/include/linux/filter.h b/include/linux/filter.h
+index ec90d5255cf7..81420a0efdbe 100644
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -60,7 +60,8 @@ struct sock_reuseport;
+  * constants. See JIT pre-step in bpf_jit_blind_constants().
+  */
+ #define BPF_REG_AX            MAX_BPF_REG
+-#define MAX_BPF_JIT_REG               (MAX_BPF_REG + 1)
++#define MAX_BPF_EXT_REG               (MAX_BPF_REG + 1)
++#define MAX_BPF_JIT_REG               MAX_BPF_EXT_REG
+ 
+ /* unused opcode to mark special call to bpf_tail_call() helper */
+ #define BPF_TAIL_CALL 0xf0
+diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
+index 3f5bf1af0826..aefc62ae4a1e 100644
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -52,6 +52,7 @@
+ #define DST   regs[insn->dst_reg]
+ #define SRC   regs[insn->src_reg]
+ #define FP    regs[BPF_REG_FP]
++#define AX    regs[BPF_REG_AX]
+ #define ARG1  regs[BPF_REG_ARG1]
+ #define CTX   regs[BPF_REG_CTX]
+ #define IMM   insn->imm
+@@ -971,7 +972,6 @@ bool bpf_opcode_in_insntable(u8 code)
+  */
+ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+ {
+-      u64 tmp;
+ #define BPF_INSN_2_LBL(x, y)    [BPF_##x | BPF_##y] = &&x##_##y
+ #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
+       static const void *jumptable[256] = {
+@@ -1045,36 +1045,36 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+               (*(s64 *) &DST) >>= IMM;
+               CONT;
+       ALU64_MOD_X:
+-              div64_u64_rem(DST, SRC, &tmp);
+-              DST = tmp;
++              div64_u64_rem(DST, SRC, &AX);
++              DST = AX;
+               CONT;
+       ALU_MOD_X:
+-              tmp = (u32) DST;
+-              DST = do_div(tmp, (u32) SRC);
++              AX = (u32) DST;
++              DST = do_div(AX, (u32) SRC);
+               CONT;
+       ALU64_MOD_K:
+-              div64_u64_rem(DST, IMM, &tmp);
+-              DST = tmp;
++              div64_u64_rem(DST, IMM, &AX);
++              DST = AX;
+               CONT;
+       ALU_MOD_K:
+-              tmp = (u32) DST;
+-              DST = do_div(tmp, (u32) IMM);
++              AX = (u32) DST;
++              DST = do_div(AX, (u32) IMM);
+               CONT;
+       ALU64_DIV_X:
+               DST = div64_u64(DST, SRC);
+               CONT;
+       ALU_DIV_X:
+-              tmp = (u32) DST;
+-              do_div(tmp, (u32) SRC);
+-              DST = (u32) tmp;
++              AX = (u32) DST;
++              do_div(AX, (u32) SRC);
++              DST = (u32) AX;
+               CONT;
+       ALU64_DIV_K:
+               DST = div64_u64(DST, IMM);
+               CONT;
+       ALU_DIV_K:
+-              tmp = (u32) DST;
+-              do_div(tmp, (u32) IMM);
+-              DST = (u32) tmp;
++              AX = (u32) DST;
++              do_div(AX, (u32) IMM);
++              DST = (u32) AX;
+               CONT;
+       ALU_END_TO_BE:
+               switch (IMM) {
+@@ -1330,7 +1330,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
+ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
+ { \
+       u64 stack[stack_size / sizeof(u64)]; \
+-      u64 regs[MAX_BPF_REG]; \
++      u64 regs[MAX_BPF_EXT_REG]; \
+ \
+       FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
+       ARG1 = (u64) (unsigned long) ctx; \
+@@ -1343,7 +1343,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
+                                     const struct bpf_insn *insn) \
+ { \
+       u64 stack[stack_size / sizeof(u64)]; \
+-      u64 regs[MAX_BPF_REG]; \
++      u64 regs[MAX_BPF_EXT_REG]; \
+ \
+       FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
+       BPF_R1 = r1; \
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch b/queue-4.19/bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch

new file mode 100644 (file)

index 0000000..f1f2d6a
--- /dev/null
+++ b/queue-4.19/bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch
@@ -0,0 +1,608 @@
+From 054668490b98b1cffa6d01681969bcfc102e10d4 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:28 +0100
+Subject: bpf: prevent out of bounds speculation on pointer arithmetic
+
+[ commit 979d63d50c0c0f7bc537bf821e056cc9fe5abd38 upstream ]
+
+Jann reported that the original commit back in b2157399cc98
+("bpf: prevent out-of-bounds speculation") was not sufficient
+to stop CPU from speculating out of bounds memory access:
+While b2157399cc98 only focussed on masking array map access
+for unprivileged users for tail calls and data access such
+that the user provided index gets sanitized from BPF program
+and syscall side, there is still a more generic form affected
+from BPF programs that applies to most maps that hold user
+data in relation to dynamic map access when dealing with
+unknown scalars or "slow" known scalars as access offset, for
+example:
+
+  - Load a map value pointer into R6
+  - Load an index into R7
+  - Do a slow computation (e.g. with a memory dependency) that
+    loads a limit into R8 (e.g. load the limit from a map for
+    high latency, then mask it to make the verifier happy)
+  - Exit if R7 >= R8 (mispredicted branch)
+  - Load R0 = R6[R7]
+  - Load R0 = R6[R0]
+
+For unknown scalars there are two options in the BPF verifier
+where we could derive knowledge from in order to guarantee
+safe access to the memory: i) While </>/<=/>= variants won't
+allow to derive any lower or upper bounds from the unknown
+scalar where it would be safe to add it to the map value
+pointer, it is possible through ==/!= test however. ii) another
+option is to transform the unknown scalar into a known scalar,
+for example, through ALU ops combination such as R &= <imm>
+followed by R |= <imm> or any similar combination where the
+original information from the unknown scalar would be destroyed
+entirely leaving R with a constant. The initial slow load still
+precedes the latter ALU ops on that register, so the CPU
+executes speculatively from that point. Once we have the known
+scalar, any compare operation would work then. A third option
+only involving registers with known scalars could be crafted
+as described in [0] where a CPU port (e.g. Slow Int unit)
+would be filled with many dependent computations such that
+the subsequent condition depending on its outcome has to wait
+for evaluation on its execution port and thereby executing
+speculatively if the speculated code can be scheduled on a
+different execution port, or any other form of mistraining
+as described in [1], for example. Given this is not limited
+to only unknown scalars, not only map but also stack access
+is affected since both is accessible for unprivileged users
+and could potentially be used for out of bounds access under
+speculation.
+
+In order to prevent any of these cases, the verifier is now
+sanitizing pointer arithmetic on the offset such that any
+out of bounds speculation would be masked in a way where the
+pointer arithmetic result in the destination register will
+stay unchanged, meaning offset masked into zero similar as
+in array_index_nospec() case. With regards to implementation,
+there are three options that were considered: i) new insn
+for sanitation, ii) push/pop insn and sanitation as inlined
+BPF, iii) reuse of ax register and sanitation as inlined BPF.
+
+Option i) has the downside that we end up using from reserved
+bits in the opcode space, but also that we would require
+each JIT to emit masking as native arch opcodes meaning
+mitigation would have slow adoption till everyone implements
+it eventually which is counter-productive. Option ii) and iii)
+have both in common that a temporary register is needed in
+order to implement the sanitation as inlined BPF since we
+are not allowed to modify the source register. While a push /
+pop insn in ii) would be useful to have in any case, it
+requires once again that every JIT needs to implement it
+first. While possible, amount of changes needed would also
+be unsuitable for a -stable patch. Therefore, the path which
+has fewer changes, less BPF instructions for the mitigation
+and does not require anything to be changed in the JITs is
+option iii) which this work is pursuing. The ax register is
+already mapped to a register in all JITs (modulo arm32 where
+it's mapped to stack as various other BPF registers there)
+and used in constant blinding for JITs-only so far. It can
+be reused for verifier rewrites under certain constraints.
+The interpreter's tmp "register" has therefore been remapped
+into extending the register set with hidden ax register and
+reusing that for a number of instructions that needed the
+prior temporary variable internally (e.g. div, mod). This
+allows for zero increase in stack space usage in the interpreter,
+and enables (restricted) generic use in rewrites otherwise as
+long as such a patchlet does not make use of these instructions.
+The sanitation mask is dynamic and relative to the offset the
+map value or stack pointer currently holds.
+
+There are various cases that need to be taken under consideration
+for the masking, e.g. such operation could look as follows:
+ptr += val or val += ptr or ptr -= val. Thus, the value to be
+sanitized could reside either in source or in destination
+register, and the limit is different depending on whether
+the ALU op is addition or subtraction and depending on the
+current known and bounded offset. The limit is derived as
+follows: limit := max_value_size - (smin_value + off). For
+subtraction: limit := umax_value + off. This holds because
+we do not allow any pointer arithmetic that would
+temporarily go out of bounds or would have an unknown
+value with mixed signed bounds where it is unclear at
+verification time whether the actual runtime value would
+be either negative or positive. For example, we have a
+derived map pointer value with constant offset and bounded
+one, so limit based on smin_value works because the verifier
+requires that statically analyzed arithmetic on the pointer
+must be in bounds, and thus it checks if resulting
+smin_value + off and umax_value + off is still within map
+value bounds at time of arithmetic in addition to time of
+access. Similarly, for the case of stack access we derive
+the limit as follows: MAX_BPF_STACK + off for subtraction
+and -off for the case of addition where off := ptr_reg->off +
+ptr_reg->var_off.value. Subtraction is a special case for
+the masking which can be in form of ptr += -val, ptr -= -val,
+or ptr -= val. In the first two cases where we know that
+the value is negative, we need to temporarily negate the
+value in order to do the sanitation on a positive value
+where we later swap the ALU op, and restore original source
+register if the value was in source.
+
+The sanitation of pointer arithmetic alone is still not fully
+sufficient as is, since a scenario like the following could
+happen ...
+
+  PTR += 0x1000 (e.g. K-based imm)
+  PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON
+  PTR += 0x1000
+  PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON
+  [...]
+
+... which under speculation could end up as ...
+
+  PTR += 0x1000
+  PTR -= 0 [ truncated by mitigation ]
+  PTR += 0x1000
+  PTR -= 0 [ truncated by mitigation ]
+  [...]
+
+... and therefore still access out of bounds. To prevent such
+case, the verifier is also analyzing safety for potential out
+of bounds access under speculative execution. Meaning, it is
+also simulating pointer access under truncation. We therefore
+"branch off" and push the current verification state after the
+ALU operation with known 0 to the verification stack for later
+analysis. Given the current path analysis succeeded it is
+likely that the one under speculation can be pruned. In any
+case, it is also subject to existing complexity limits and
+therefore anything beyond this point will be rejected. In
+terms of pruning, it needs to be ensured that the verification
+state from speculative execution simulation must never prune
+a non-speculative execution path, therefore, we mark verifier
+state accordingly at the time of push_stack(). If verifier
+detects out of bounds access under speculative execution from
+one of the possible paths that includes a truncation, it will
+reject such program.
+
+Given we mask every reg-based pointer arithmetic for
+unprivileged programs, we've been looking into how it could
+affect real-world programs in terms of size increase. As the
+majority of programs are targeted for privileged-only use
+case, we've unconditionally enabled masking (with its alu
+restrictions on top of it) for privileged programs for the
+sake of testing in order to check i) whether they get rejected
+in its current form, and ii) by how much the number of
+instructions and size will increase. We've tested this by
+using Katran, Cilium and test_l4lb from the kernel selftests.
+For Katran we've evaluated balancer_kern.o, Cilium bpf_lxc.o
+and an older test object bpf_lxc_opt_-DUNKNOWN.o and l4lb
+we've used test_l4lb.o as well as test_l4lb_noinline.o. We
+found that none of the programs got rejected by the verifier
+with this change, and that impact is rather minimal to none.
+balancer_kern.o had 13,904 bytes (1,738 insns) xlated and
+7,797 bytes JITed before and after the change. Most complex
+program in bpf_lxc.o had 30,544 bytes (3,817 insns) xlated
+and 18,538 bytes JITed before and after and none of the other
+tail call programs in bpf_lxc.o had any changes either. For
+the older bpf_lxc_opt_-DUNKNOWN.o object we found a small
+increase from 20,616 bytes (2,576 insns) and 12,536 bytes JITed
+before to 20,664 bytes (2,582 insns) and 12,558 bytes JITed
+after the change. Other programs from that object file had
+similar small increase. Both test_l4lb.o had no change and
+remained at 6,544 bytes (817 insns) xlated and 3,401 bytes
+JITed and for test_l4lb_noinline.o constant at 5,080 bytes
+(634 insns) xlated and 3,313 bytes JITed. This can be explained
+in that LLVM typically optimizes stack based pointer arithmetic
+by using K-based operations and that use of dynamic map access
+is not overly frequent. However, in future we may decide to
+optimize the algorithm further under known guarantees from
+branch and value speculation. Latter seems also unclear in
+terms of prediction heuristics that today's CPUs apply as well
+as whether there could be collisions in e.g. the predictor's
+Value History/Pattern Table for triggering out of bounds access,
+thus masking is performed unconditionally at this point but could
+be subject to relaxation later on. We were generally also
+brainstorming various other approaches for mitigation, but the
+blocker was always lack of available registers at runtime and/or
+overhead for runtime tracking of limits belonging to a specific
+pointer. Thus, we found this to be minimally intrusive under
+given constraints.
+
+With that in place, a simple example with sanitized access on
+unprivileged load at post-verification time looks as follows:
+
+  # bpftool prog dump xlated id 282
+  [...]
+  28: (79) r1 = *(u64 *)(r7 +0)
+  29: (79) r2 = *(u64 *)(r7 +8)
+  30: (57) r1 &= 15
+  31: (79) r3 = *(u64 *)(r0 +4608)
+  32: (57) r3 &= 1
+  33: (47) r3 |= 1
+  34: (2d) if r2 > r3 goto pc+19
+  35: (b4) (u32) r11 = (u32) 20479  |
+  36: (1f) r11 -= r2                | Dynamic sanitation for pointer
+  37: (4f) r11 |= r2                | arithmetic with registers
+  38: (87) r11 = -r11               | containing bounded or known
+  39: (c7) r11 s>>= 63              | scalars in order to prevent
+  40: (5f) r11 &= r2                | out of bounds speculation.
+  41: (0f) r4 += r11                |
+  42: (71) r4 = *(u8 *)(r4 +0)
+  43: (6f) r4 <<= r1
+  [...]
+
+For the case where the scalar sits in the destination register
+as opposed to the source register, the following code is emitted
+for the above example:
+
+  [...]
+  16: (b4) (u32) r11 = (u32) 20479
+  17: (1f) r11 -= r2
+  18: (4f) r11 |= r2
+  19: (87) r11 = -r11
+  20: (c7) r11 s>>= 63
+  21: (5f) r2 &= r11
+  22: (0f) r2 += r0
+  23: (61) r0 = *(u32 *)(r2 +0)
+  [...]
+
+JIT blinding example with non-conflicting use of r10:
+
+  [...]
+   d5: je     0x0000000000000106    _
+   d7: mov    0x0(%rax),%edi       |
+   da: mov    $0xf153246,%r10d     | Index load from map value and
+   e0: xor    $0xf153259,%r10      | (const blinded) mask with 0x1f.
+   e7: and    %r10,%rdi            |_
+   ea: mov    $0x2f,%r10d          |
+   f0: sub    %rdi,%r10            | Sanitized addition. Both use r10
+   f3: or     %rdi,%r10            | but do not interfere with each
+   f6: neg    %r10                 | other. (Neither do these instructions
+   f9: sar    $0x3f,%r10           | interfere with the use of ax as temp
+   fd: and    %r10,%rdi            | in interpreter.)
+  100: add    %rax,%rdi            |_
+  103: mov    0x0(%rdi),%eax
+ [...]
+
+Tested that it fixes Jann's reproducer, and also checked that test_verifier
+and test_progs suite with interpreter, JIT and JIT with hardening enabled
+on x86-64 and arm64 runs successfully.
+
+  [0] Speculose: Analyzing the Security Implications of Speculative
+      Execution in CPUs, Giorgi Maisuradze and Christian Rossow,
+      https://arxiv.org/pdf/1801.04084.pdf
+
+  [1] A Systematic Evaluation of Transient Execution Attacks and
+      Defenses, Claudio Canella, Jo Van Bulck, Michael Schwarz,
+      Moritz Lipp, Benjamin von Berg, Philipp Ortner, Frank Piessens,
+      Dmitry Evtyushkin, Daniel Gruss,
+      https://arxiv.org/pdf/1811.05441.pdf
+
+Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation")
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/bpf_verifier.h |  10 ++
+ kernel/bpf/verifier.c        | 186 +++++++++++++++++++++++++++++++++--
+ 2 files changed, 189 insertions(+), 7 deletions(-)
+
+diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
+index b1587ffdea7b..2716dcbfb2f1 100644
+--- a/include/linux/bpf_verifier.h
++++ b/include/linux/bpf_verifier.h
+@@ -134,6 +134,7 @@ struct bpf_verifier_state {
+       struct bpf_func_state *frame[MAX_CALL_FRAMES];
+       struct bpf_verifier_state *parent;
+       u32 curframe;
++      bool speculative;
+ };
+ 
+ /* linked list of verifier states used to prune search */
+@@ -142,15 +143,24 @@ struct bpf_verifier_state_list {
+       struct bpf_verifier_state_list *next;
+ };
+ 
++/* Possible states for alu_state member. */
++#define BPF_ALU_SANITIZE_SRC          1U
++#define BPF_ALU_SANITIZE_DST          2U
++#define BPF_ALU_NEG_VALUE             (1U << 2)
++#define BPF_ALU_SANITIZE              (BPF_ALU_SANITIZE_SRC | \
++                                       BPF_ALU_SANITIZE_DST)
++
+ struct bpf_insn_aux_data {
+       union {
+               enum bpf_reg_type ptr_type;     /* pointer type for load/store insns */
+               unsigned long map_state;        /* pointer/poison value for maps */
+               s32 call_imm;                   /* saved imm field of call insn */
++              u32 alu_limit;                  /* limit for add/sub register with pointer */
+       };
+       int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
+       int sanitize_stack_off; /* stack slot to be cleared */
+       bool seen; /* this insn was processed by the verifier */
++      u8 alu_state; /* used in combination with alu_limit */
+ };
+ 
+ #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index f9d5aea4891d..9ab0adedb174 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -466,6 +466,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
+               free_func_state(dst_state->frame[i]);
+               dst_state->frame[i] = NULL;
+       }
++      dst_state->speculative = src->speculative;
+       dst_state->curframe = src->curframe;
+       dst_state->parent = src->parent;
+       for (i = 0; i <= src->curframe; i++) {
+@@ -511,7 +512,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
+ }
+ 
+ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+-                                           int insn_idx, int prev_insn_idx)
++                                           int insn_idx, int prev_insn_idx,
++                                           bool speculative)
+ {
+       struct bpf_verifier_state *cur = env->cur_state;
+       struct bpf_verifier_stack_elem *elem;
+@@ -529,6 +531,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+       err = copy_verifier_state(&elem->st, cur);
+       if (err)
+               goto err;
++      elem->st.speculative |= speculative;
+       if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
+               verbose(env, "BPF program is too complex\n");
+               goto err;
+@@ -2698,6 +2701,102 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+       return true;
+ }
+ 
++static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
++{
++      return &env->insn_aux_data[env->insn_idx];
++}
++
++static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
++                            u32 *ptr_limit, u8 opcode, bool off_is_neg)
++{
++      bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
++                          (opcode == BPF_SUB && !off_is_neg);
++      u32 off;
++
++      switch (ptr_reg->type) {
++      case PTR_TO_STACK:
++              off = ptr_reg->off + ptr_reg->var_off.value;
++              if (mask_to_left)
++                      *ptr_limit = MAX_BPF_STACK + off;
++              else
++                      *ptr_limit = -off;
++              return 0;
++      case PTR_TO_MAP_VALUE:
++              if (mask_to_left) {
++                      *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
++              } else {
++                      off = ptr_reg->smin_value + ptr_reg->off;
++                      *ptr_limit = ptr_reg->map_ptr->value_size - off;
++              }
++              return 0;
++      default:
++              return -EINVAL;
++      }
++}
++
++static int sanitize_ptr_alu(struct bpf_verifier_env *env,
++                          struct bpf_insn *insn,
++                          const struct bpf_reg_state *ptr_reg,
++                          struct bpf_reg_state *dst_reg,
++                          bool off_is_neg)
++{
++      struct bpf_verifier_state *vstate = env->cur_state;
++      struct bpf_insn_aux_data *aux = cur_aux(env);
++      bool ptr_is_dst_reg = ptr_reg == dst_reg;
++      u8 opcode = BPF_OP(insn->code);
++      u32 alu_state, alu_limit;
++      struct bpf_reg_state tmp;
++      bool ret;
++
++      if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K)
++              return 0;
++
++      /* We already marked aux for masking from non-speculative
++       * paths, thus we got here in the first place. We only care
++       * to explore bad access from here.
++       */
++      if (vstate->speculative)
++              goto do_sim;
++
++      alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
++      alu_state |= ptr_is_dst_reg ?
++                   BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
++
++      if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
++              return 0;
++
++      /* If we arrived here from different branches with different
++       * limits to sanitize, then this won't work.
++       */
++      if (aux->alu_state &&
++          (aux->alu_state != alu_state ||
++           aux->alu_limit != alu_limit))
++              return -EACCES;
++
++      /* Corresponding fixup done in fixup_bpf_calls(). */
++      aux->alu_state = alu_state;
++      aux->alu_limit = alu_limit;
++
++do_sim:
++      /* Simulate and find potential out-of-bounds access under
++       * speculative execution from truncation as a result of
++       * masking when off was not within expected range. If off
++       * sits in dst, then we temporarily need to move ptr there
++       * to simulate dst (== 0) +/-= ptr. Needed, for example,
++       * for cases where we use K-based arithmetic in one direction
++       * and truncated reg-based in the other in order to explore
++       * bad access.
++       */
++      if (!ptr_is_dst_reg) {
++              tmp = *dst_reg;
++              *dst_reg = *ptr_reg;
++      }
++      ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
++      if (!ptr_is_dst_reg)
++              *dst_reg = tmp;
++      return !ret ? -EFAULT : 0;
++}
++
+ /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
+  * Caller should also handle BPF_MOV case separately.
+  * If we return -EACCES, caller may want to try again treating pointer as a
+@@ -2718,6 +2817,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+           umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
+       u32 dst = insn->dst_reg, src = insn->src_reg;
+       u8 opcode = BPF_OP(insn->code);
++      int ret;
+ 
+       dst_reg = &regs[dst];
+ 
+@@ -2772,6 +2872,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+ 
+       switch (opcode) {
+       case BPF_ADD:
++              ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
++              if (ret < 0) {
++                      verbose(env, "R%d tried to add from different maps or paths\n", dst);
++                      return ret;
++              }
+               /* We can take a fixed offset as long as it doesn't overflow
+                * the s32 'off' field
+                */
+@@ -2822,6 +2927,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+               }
+               break;
+       case BPF_SUB:
++              ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
++              if (ret < 0) {
++                      verbose(env, "R%d tried to sub from different maps or paths\n", dst);
++                      return ret;
++              }
+               if (dst_reg == off_reg) {
+                       /* scalar -= pointer.  Creates an unknown scalar */
+                       verbose(env, "R%d tried to subtract pointer from scalar\n",
+@@ -3997,7 +4107,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
+               }
+       }
+ 
+-      other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
++      other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
++                                false);
+       if (!other_branch)
+               return -EFAULT;
+       other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
+@@ -4712,6 +4823,12 @@ static bool states_equal(struct bpf_verifier_env *env,
+       if (old->curframe != cur->curframe)
+               return false;
+ 
++      /* Verification state from speculative execution simulation
++       * must never prune a non-speculative execution one.
++       */
++      if (old->speculative && !cur->speculative)
++              return false;
++
+       /* for states to be equal callsites have to be the same
+        * and all frame states need to be equivalent
+        */
+@@ -4863,7 +4980,7 @@ static int do_check(struct bpf_verifier_env *env)
+       if (!state)
+               return -ENOMEM;
+       state->curframe = 0;
+-      state->parent = NULL;
++      state->speculative = false;
+       state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+       if (!state->frame[0]) {
+               kfree(state);
+@@ -4903,8 +5020,10 @@ static int do_check(struct bpf_verifier_env *env)
+                       /* found equivalent state, can prune the search */
+                       if (env->log.level) {
+                               if (do_print_state)
+-                                      verbose(env, "\nfrom %d to %d: safe\n",
+-                                              env->prev_insn_idx, env->insn_idx);
++                                      verbose(env, "\nfrom %d to %d%s: safe\n",
++                                              env->prev_insn_idx, env->insn_idx,
++                                              env->cur_state->speculative ?
++                                              " (speculative execution)" : "");
+                               else
+                                       verbose(env, "%d: safe\n", env->insn_idx);
+                       }
+@@ -4921,8 +5040,10 @@ static int do_check(struct bpf_verifier_env *env)
+                       if (env->log.level > 1)
+                               verbose(env, "%d:", env->insn_idx);
+                       else
+-                              verbose(env, "\nfrom %d to %d:",
+-                                      env->prev_insn_idx, env->insn_idx);
++                              verbose(env, "\nfrom %d to %d%s:",
++                                      env->prev_insn_idx, env->insn_idx,
++                                      env->cur_state->speculative ?
++                                      " (speculative execution)" : "");
+                       print_verifier_state(env, state->frame[state->curframe]);
+                       do_print_state = false;
+               }
+@@ -5869,6 +5990,57 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
+                       continue;
+               }
+ 
++              if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
++                  insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
++                      const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
++                      const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
++                      struct bpf_insn insn_buf[16];
++                      struct bpf_insn *patch = &insn_buf[0];
++                      bool issrc, isneg;
++                      u32 off_reg;
++
++                      aux = &env->insn_aux_data[i + delta];
++                      if (!aux->alu_state)
++                              continue;
++
++                      isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
++                      issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
++                              BPF_ALU_SANITIZE_SRC;
++
++                      off_reg = issrc ? insn->src_reg : insn->dst_reg;
++                      if (isneg)
++                              *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
++                      *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
++                      *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
++                      *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
++                      *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
++                      *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
++                      if (issrc) {
++                              *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
++                                                       off_reg);
++                              insn->src_reg = BPF_REG_AX;
++                      } else {
++                              *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
++                                                       BPF_REG_AX);
++                      }
++                      if (isneg)
++                              insn->code = insn->code == code_add ?
++                                           code_sub : code_add;
++                      *patch++ = *insn;
++                      if (issrc && isneg)
++                              *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
++                      cnt = patch - insn_buf;
++
++                      new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
++                      if (!new_prog)
++                              return -ENOMEM;
++
++                      delta    += cnt - 1;
++                      env->prog = prog = new_prog;
++                      insn      = new_prog->insnsi + i + delta;
++                      continue;
++              }
++
+               if (insn->code != (BPF_JMP | BPF_CALL))
+                       continue;
+               if (insn->src_reg == BPF_PSEUDO_CALL)
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch b/queue-4.19/bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch

new file mode 100644 (file)

index 0000000..9e11d2c
--- /dev/null
+++ b/queue-4.19/bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch
@@ -0,0 +1,51 @@
+From 5a41b1860e320e8d921b785115460e4ff80dc0eb Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:24 +0100
+Subject: bpf: restrict map value pointer arithmetic for unprivileged
+
+[ commit 0d6303db7970e6f56ae700fa07e11eb510cda125 upstream ]
+
+Restrict map value pointer arithmetic for unprivileged users in that
+arithmetic itself must not go out of bounds as opposed to the actual
+access later on. Therefore after each adjust_ptr_min_max_vals() with a
+map value pointer as a destination it will simulate a check_map_access()
+of 1 byte on the destination and once that fails the program is rejected
+for unprivileged program loads. We use this later on for masking any
+pointer arithmetic with the remainder of the map value space. The
+likelihood of breaking any existing real-world unprivileged eBPF
+program is very small for this corner case.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 6d11320998c6..110ca915cfb6 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2880,6 +2880,17 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+       __update_reg_bounds(dst_reg);
+       __reg_deduce_bounds(dst_reg);
+       __reg_bound_offset(dst_reg);
++
++      /* For unprivileged we require that resulting offset must be in bounds
++       * in order to be able to sanitize access later on.
++       */
++      if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE &&
++          check_map_access(env, dst, dst_reg->off, 1, false)) {
++              verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n",
++                      dst);
++              return -EACCES;
++      }
++
+       return 0;
+ }
+ 
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch b/queue-4.19/bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch

new file mode 100644 (file)

index 0000000..1308452
--- /dev/null
+++ b/queue-4.19/bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch
@@ -0,0 +1,116 @@
+From f2685b0fc59fc7f3debc0502020b8134277770a7 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:25 +0100
+Subject: bpf: restrict stack pointer arithmetic for unprivileged
+
+[ commit e4298d25830a866cc0f427d4bccb858e76715859 upstream ]
+
+Restrict stack pointer arithmetic for unprivileged users in that
+arithmetic itself must not go out of bounds as opposed to the actual
+access later on. Therefore after each adjust_ptr_min_max_vals() with
+a stack pointer as a destination we simulate a check_stack_access()
+of 1 byte on the destination and once that fails the program is
+rejected for unprivileged program loads. This is analog to map
+value pointer arithmetic and needed for masking later on.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 63 ++++++++++++++++++++++++++++---------------
+ 1 file changed, 41 insertions(+), 22 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 110ca915cfb6..aa2944d54e7a 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1238,6 +1238,31 @@ static int check_stack_read(struct bpf_verifier_env *env,
+       }
+ }
+ 
++static int check_stack_access(struct bpf_verifier_env *env,
++                            const struct bpf_reg_state *reg,
++                            int off, int size)
++{
++      /* Stack accesses must be at a fixed offset, so that we
++       * can determine what type of data were returned. See
++       * check_stack_read().
++       */
++      if (!tnum_is_const(reg->var_off)) {
++              char tn_buf[48];
++
++              tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
++              verbose(env, "variable stack access var_off=%s off=%d size=%d",
++                      tn_buf, off, size);
++              return -EACCES;
++      }
++
++      if (off >= 0 || off < -MAX_BPF_STACK) {
++              verbose(env, "invalid stack off=%d size=%d\n", off, size);
++              return -EACCES;
++      }
++
++      return 0;
++}
++
+ /* check read/write into map element returned by bpf_map_lookup_elem() */
+ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
+                             int size, bool zero_size_allowed)
+@@ -1736,24 +1761,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
+               }
+ 
+       } else if (reg->type == PTR_TO_STACK) {
+-              /* stack accesses must be at a fixed offset, so that we can
+-               * determine what type of data were returned.
+-               * See check_stack_read().
+-               */
+-              if (!tnum_is_const(reg->var_off)) {
+-                      char tn_buf[48];
+-
+-                      tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+-                      verbose(env, "variable stack access var_off=%s off=%d size=%d",
+-                              tn_buf, off, size);
+-                      return -EACCES;
+-              }
+               off += reg->var_off.value;
+-              if (off >= 0 || off < -MAX_BPF_STACK) {
+-                      verbose(env, "invalid stack off=%d size=%d\n", off,
+-                              size);
+-                      return -EACCES;
+-              }
++              err = check_stack_access(env, reg, off, size);
++              if (err)
++                      return err;
+ 
+               state = func(env, reg);
+               err = update_stack_depth(env, state, off);
+@@ -2884,11 +2895,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+       /* For unprivileged we require that resulting offset must be in bounds
+        * in order to be able to sanitize access later on.
+        */
+-      if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE &&
+-          check_map_access(env, dst, dst_reg->off, 1, false)) {
+-              verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n",
+-                      dst);
+-              return -EACCES;
++      if (!env->allow_ptr_leaks) {
++              if (dst_reg->type == PTR_TO_MAP_VALUE &&
++                  check_map_access(env, dst, dst_reg->off, 1, false)) {
++                      verbose(env, "R%d pointer arithmetic of map value goes out of range, "
++                              "prohibited for !root\n", dst);
++                      return -EACCES;
++              } else if (dst_reg->type == PTR_TO_STACK &&
++                         check_stack_access(env, dst_reg, dst_reg->off +
++                                            dst_reg->var_off.value, 1)) {
++                      verbose(env, "R%d stack pointer arithmetic goes out of range, "
++                              "prohibited for !root\n", dst);
++                      return -EACCES;
++              }
+       }
+ 
+       return 0;
+-- 
+2.19.1
+
diff --git a/queue-4.19/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch b/queue-4.19/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch

new file mode 100644 (file)

index 0000000..9773001
--- /dev/null
+++ b/queue-4.19/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch
@@ -0,0 +1,54 @@
+From 91b541fcd73c3735479ac2018aecab3c0ed6a9b2 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Mon, 28 Jan 2019 21:28:26 +0100
+Subject: bpf: restrict unknown scalars of mixed signed bounds for unprivileged
+
+[ commit 9d7eceede769f90b66cfa06ad5b357140d5141ed upstream ]
+
+For unknown scalars of mixed signed bounds, meaning their smin_value is
+negative and their smax_value is positive, we need to reject arithmetic
+with pointer to map value. For unprivileged the goal is to mask every
+map pointer arithmetic and this cannot reliably be done when it is
+unknown at verification time whether the scalar value is negative or
+positive. Given this is a corner case, the likelihood of breaking should
+be very small.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index aa2944d54e7a..fbaa3b9e1d71 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -2712,8 +2712,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+           smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
+       u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
+           umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
++      u32 dst = insn->dst_reg, src = insn->src_reg;
+       u8 opcode = BPF_OP(insn->code);
+-      u32 dst = insn->dst_reg;
+ 
+       dst_reg = &regs[dst];
+ 
+@@ -2749,6 +2749,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
+                       dst);
+               return -EACCES;
+       }
++      if (ptr_reg->type == PTR_TO_MAP_VALUE &&
++          !env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
++              verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
++                      off_reg == dst_reg ? dst : src);
++              return -EACCES;
++      }
+ 
+       /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
+        * The id may be overwritten later if we create a new variable offset.
+-- 
+2.19.1
+
diff --git a/queue-4.19/series b/queue-4.19/series

index c906d1a1c4fe6a4f2a0aef29bb214163e880dddb..a0fe7124c95288b8db5415147929d2b3a735d905 100644 (file)
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -83,3 +83,15 @@ vt-make-vt_console_print-compatible-with-the-unicode-screen-buffer.patch
  vt-always-call-notifier-with-the-console-lock-held.patch
  vt-invoke-notifier-on-screen-size-change.patch
  drm-meson-fix-atomic-mode-switching-regression.patch
+bpf-improve-verifier-branch-analysis.patch
+bpf-add-per-insn-complexity-limit.patch
+bpf-move-prev_-insn_idx-into-verifier-env.patch
+bpf-move-tmp-variable-into-ax-register-in-interprete.patch
+bpf-enable-access-to-ax-register-also-from-verifier-.patch
+bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch
+bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch
+bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch
+bpf-fix-check_map_access-smin_value-test-when-pointe.patch
+bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch
+bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch
+bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch
author	Sasha Levin <sashal@kernel.org>
	Mon, 28 Jan 2019 21:27:05 +0000 (16:27 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 28 Jan 2019 21:27:05 +0000 (16:27 -0500)
queue-4.19/bpf-add-per-insn-complexity-limit.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-enable-access-to-ax-register-also-from-verifier-.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-fix-check_map_access-smin_value-test-when-pointe.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-fix-inner-map-masking-to-prevent-oob-under-specu.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-fix-sanitation-of-alu-op-with-pointer-scalar-typ.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-improve-verifier-branch-analysis.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-move-prev_-insn_idx-into-verifier-env.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-move-tmp-variable-into-ax-register-in-interprete.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-prevent-out-of-bounds-speculation-on-pointer-ari.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-restrict-map-value-pointer-arithmetic-for-unpriv.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-restrict-stack-pointer-arithmetic-for-unprivileg.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/bpf-restrict-unknown-scalars-of-mixed-signed-bounds-.patch	[new file with mode: 0644]	patch \| blob
queue-4.19/series		patch \| blob \| blame \| history