]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 17 Aug 2019 15:36:01 +0000 (17:36 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 17 Aug 2019 15:36:01 +0000 (17:36 +0200)
added patches:
bpf-add-bpf_jit_limit-knob-to-restrict-unpriv-allocations.patch
bpf-get-rid-of-pure_initcall-dependency-to-enable-jits.patch
bpf-restrict-access-to-core-bpf-sysctls.patch
x86-mm-use-write_once-when-setting-ptes.patch

queue-4.14/bpf-add-bpf_jit_limit-knob-to-restrict-unpriv-allocations.patch [new file with mode: 0644]
queue-4.14/bpf-get-rid-of-pure_initcall-dependency-to-enable-jits.patch [new file with mode: 0644]
queue-4.14/bpf-restrict-access-to-core-bpf-sysctls.patch [new file with mode: 0644]
queue-4.14/series
queue-4.14/x86-mm-use-write_once-when-setting-ptes.patch [new file with mode: 0644]

diff --git a/queue-4.14/bpf-add-bpf_jit_limit-knob-to-restrict-unpriv-allocations.patch b/queue-4.14/bpf-add-bpf_jit_limit-knob-to-restrict-unpriv-allocations.patch
new file mode 100644 (file)
index 0000000..4d94513
--- /dev/null
@@ -0,0 +1,203 @@
+From foo@baz Sat 17 Aug 2019 05:34:49 PM CEST
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Fri, 16 Aug 2019 23:05:36 +0100
+Subject: bpf: add bpf_jit_limit knob to restrict unpriv allocations
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>
+Cc: stable <stable@vger.kernel.org>
+Message-ID: <20190816220536.GC9843@xylophone.i.decadent.org.uk>
+Content-Disposition: inline
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit ede95a63b5e84ddeea6b0c473b36ab8bfd8c6ce3 upstream.
+
+Rick reported that the BPF JIT could potentially fill the entire module
+space with BPF programs from unprivileged users which would prevent later
+attempts to load normal kernel modules or privileged BPF programs, for
+example. If JIT was enabled but unsuccessful to generate the image, then
+before commit 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config")
+we would always fall back to the BPF interpreter. Nowadays in the case
+where the CONFIG_BPF_JIT_ALWAYS_ON could be set, then the load will abort
+with a failure since the BPF interpreter was compiled out.
+
+Add a global limit and enforce it for unprivileged users such that in case
+of BPF interpreter compiled out we fail once the limit has been reached
+or we fall back to BPF interpreter earlier w/o using module mem if latter
+was compiled in. In a next step, fair share among unprivileged users can
+be resolved in particular for the case where we would fail hard once limit
+is reached.
+
+Fixes: 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config")
+Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64")
+Co-Developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: LKML <linux-kernel@vger.kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/sysctl/net.txt |    8 +++++++
+ include/linux/filter.h       |    1 
+ kernel/bpf/core.c            |   49 ++++++++++++++++++++++++++++++++++++++++---
+ net/core/sysctl_net_core.c   |   10 +++++++-
+ 4 files changed, 63 insertions(+), 5 deletions(-)
+
+--- a/Documentation/sysctl/net.txt
++++ b/Documentation/sysctl/net.txt
+@@ -91,6 +91,14 @@ Values :
+       0 - disable JIT kallsyms export (default value)
+       1 - enable JIT kallsyms export for privileged users only
++bpf_jit_limit
++-------------
++
++This enforces a global limit for memory allocations to the BPF JIT
++compiler in order to reject unprivileged JIT requests once it has
++been surpassed. bpf_jit_limit contains the value of the global limit
++in bytes.
++
+ dev_weight
+ --------------
+--- a/include/linux/filter.h
++++ b/include/linux/filter.h
+@@ -729,6 +729,7 @@ struct sock *do_sk_redirect_map(struct s
+ extern int bpf_jit_enable;
+ extern int bpf_jit_harden;
+ extern int bpf_jit_kallsyms;
++extern int bpf_jit_limit;
+ typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -290,10 +290,13 @@ struct bpf_prog *bpf_patch_insn_single(s
+ }
+ #ifdef CONFIG_BPF_JIT
++# define BPF_JIT_LIMIT_DEFAULT        (PAGE_SIZE * 40000)
++
+ /* All BPF JIT sysctl knobs here. */
+ int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
+ int bpf_jit_harden   __read_mostly;
+ int bpf_jit_kallsyms __read_mostly;
++int bpf_jit_limit    __read_mostly = BPF_JIT_LIMIT_DEFAULT;
+ static __always_inline void
+ bpf_get_prog_addr_region(const struct bpf_prog *prog,
+@@ -489,27 +492,64 @@ int bpf_get_kallsym(unsigned int symnum,
+       return ret;
+ }
++static atomic_long_t bpf_jit_current;
++
++#if defined(MODULES_VADDR)
++static int __init bpf_jit_charge_init(void)
++{
++      /* Only used as heuristic here to derive limit. */
++      bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
++                                          PAGE_SIZE), INT_MAX);
++      return 0;
++}
++pure_initcall(bpf_jit_charge_init);
++#endif
++
++static int bpf_jit_charge_modmem(u32 pages)
++{
++      if (atomic_long_add_return(pages, &bpf_jit_current) >
++          (bpf_jit_limit >> PAGE_SHIFT)) {
++              if (!capable(CAP_SYS_ADMIN)) {
++                      atomic_long_sub(pages, &bpf_jit_current);
++                      return -EPERM;
++              }
++      }
++
++      return 0;
++}
++
++static void bpf_jit_uncharge_modmem(u32 pages)
++{
++      atomic_long_sub(pages, &bpf_jit_current);
++}
++
+ struct bpf_binary_header *
+ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+                    unsigned int alignment,
+                    bpf_jit_fill_hole_t bpf_fill_ill_insns)
+ {
+       struct bpf_binary_header *hdr;
+-      unsigned int size, hole, start;
++      u32 size, hole, start, pages;
+       /* Most of BPF filters are really small, but if some of them
+        * fill a page, allow at least 128 extra bytes to insert a
+        * random section of illegal instructions.
+        */
+       size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
++      pages = size / PAGE_SIZE;
++
++      if (bpf_jit_charge_modmem(pages))
++              return NULL;
+       hdr = module_alloc(size);
+-      if (hdr == NULL)
++      if (!hdr) {
++              bpf_jit_uncharge_modmem(pages);
+               return NULL;
++      }
+       /* Fill space with illegal/arch-dep instructions. */
+       bpf_fill_ill_insns(hdr, size);
+-      hdr->pages = size / PAGE_SIZE;
++      hdr->pages = pages;
+       hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
+                    PAGE_SIZE - sizeof(*hdr));
+       start = (get_random_int() % hole) & ~(alignment - 1);
+@@ -522,7 +562,10 @@ bpf_jit_binary_alloc(unsigned int progle
+ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
+ {
++      u32 pages = hdr->pages;
++
+       module_memfree(hdr);
++      bpf_jit_uncharge_modmem(pages);
+ }
+ /* This symbol is only overridden by archs that have different
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -272,7 +272,6 @@ static int proc_dointvec_minmax_bpf_enab
+       return ret;
+ }
+-# ifdef CONFIG_HAVE_EBPF_JIT
+ static int
+ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
+                                   void __user *buffer, size_t *lenp,
+@@ -283,7 +282,6 @@ proc_dointvec_minmax_bpf_restricted(stru
+       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ }
+-# endif
+ #endif
+ static struct ctl_table net_core_table[] = {
+@@ -390,6 +388,14 @@ static struct ctl_table net_core_table[]
+               .extra2         = &one,
+       },
+ # endif
++      {
++              .procname       = "bpf_jit_limit",
++              .data           = &bpf_jit_limit,
++              .maxlen         = sizeof(int),
++              .mode           = 0600,
++              .proc_handler   = proc_dointvec_minmax_bpf_restricted,
++              .extra1         = &one,
++      },
+ #endif
+       {
+               .procname       = "netdev_tstamp_prequeue",
diff --git a/queue-4.14/bpf-get-rid-of-pure_initcall-dependency-to-enable-jits.patch b/queue-4.14/bpf-get-rid-of-pure_initcall-dependency-to-enable-jits.patch
new file mode 100644 (file)
index 0000000..c63c739
--- /dev/null
@@ -0,0 +1,281 @@
+From foo@baz Sat 17 Aug 2019 05:34:49 PM CEST
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Fri, 16 Aug 2019 23:04:32 +0100
+Subject: bpf: get rid of pure_initcall dependency to enable jits
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>
+Cc: stable <stable@vger.kernel.org>
+Message-ID: <20190816220431.GA9843@xylophone.i.decadent.org.uk>
+Content-Disposition: inline
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit fa9dd599b4dae841924b022768354cfde9affecb upstream.
+
+Having a pure_initcall() callback just to permanently enable BPF
+JITs under CONFIG_BPF_JIT_ALWAYS_ON is unnecessary and could leave
+a small race window in future where JIT is still disabled on boot.
+Since we know about the setting at compilation time anyway, just
+initialize it properly there. Also consolidate all the individual
+bpf_jit_enable variables into a single one and move them under one
+location. Moreover, don't allow for setting unspecified garbage
+values on them.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+[bwh: Backported to 4.14 as dependency of commit 2e4a30983b0f
+ "bpf: restrict access to core bpf sysctls":
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/net/bpf_jit_32.c         |    2 --
+ arch/arm64/net/bpf_jit_comp.c     |    2 --
+ arch/mips/net/bpf_jit.c           |    2 --
+ arch/mips/net/ebpf_jit.c          |    2 --
+ arch/powerpc/net/bpf_jit_comp.c   |    2 --
+ arch/powerpc/net/bpf_jit_comp64.c |    2 --
+ arch/s390/net/bpf_jit_comp.c      |    2 --
+ arch/sparc/net/bpf_jit_comp_32.c  |    2 --
+ arch/sparc/net/bpf_jit_comp_64.c  |    2 --
+ arch/x86/net/bpf_jit_comp.c       |    2 --
+ kernel/bpf/core.c                 |   19 ++++++++++++-------
+ net/core/sysctl_net_core.c        |   18 ++++++++++++------
+ net/socket.c                      |    9 ---------
+ 13 files changed, 24 insertions(+), 42 deletions(-)
+
+--- a/arch/arm/net/bpf_jit_32.c
++++ b/arch/arm/net/bpf_jit_32.c
+@@ -25,8 +25,6 @@
+ #include "bpf_jit_32.h"
+-int bpf_jit_enable __read_mostly;
+-
+ /*
+  * eBPF prog stack layout:
+  *
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -31,8 +31,6 @@
+ #include "bpf_jit.h"
+-int bpf_jit_enable __read_mostly;
+-
+ #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
+ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
+ #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
+--- a/arch/mips/net/bpf_jit.c
++++ b/arch/mips/net/bpf_jit.c
+@@ -1207,8 +1207,6 @@ jmp_cmp:
+       return 0;
+ }
+-int bpf_jit_enable __read_mostly;
+-
+ void bpf_jit_compile(struct bpf_prog *fp)
+ {
+       struct jit_ctx ctx;
+--- a/arch/mips/net/ebpf_jit.c
++++ b/arch/mips/net/ebpf_jit.c
+@@ -177,8 +177,6 @@ static u32 b_imm(unsigned int tgt, struc
+               (ctx->idx * 4) - 4;
+ }
+-int bpf_jit_enable __read_mostly;
+-
+ enum which_ebpf_reg {
+       src_reg,
+       src_reg_no_fp,
+--- a/arch/powerpc/net/bpf_jit_comp.c
++++ b/arch/powerpc/net/bpf_jit_comp.c
+@@ -18,8 +18,6 @@
+ #include "bpf_jit32.h"
+-int bpf_jit_enable __read_mostly;
+-
+ static inline void bpf_flush_icache(void *start, void *end)
+ {
+       smp_wmb();
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -21,8 +21,6 @@
+ #include "bpf_jit64.h"
+-int bpf_jit_enable __read_mostly;
+-
+ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
+ {
+       memset32(area, BREAKPOINT_INSTRUCTION, size/4);
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -30,8 +30,6 @@
+ #include <asm/set_memory.h>
+ #include "bpf_jit.h"
+-int bpf_jit_enable __read_mostly;
+-
+ struct bpf_jit {
+       u32 seen;               /* Flags to remember seen eBPF instructions */
+       u32 seen_reg[16];       /* Array to remember which registers are used */
+--- a/arch/sparc/net/bpf_jit_comp_32.c
++++ b/arch/sparc/net/bpf_jit_comp_32.c
+@@ -11,8 +11,6 @@
+ #include "bpf_jit_32.h"
+-int bpf_jit_enable __read_mostly;
+-
+ static inline bool is_simm13(unsigned int value)
+ {
+       return value + 0x1000 < 0x2000;
+--- a/arch/sparc/net/bpf_jit_comp_64.c
++++ b/arch/sparc/net/bpf_jit_comp_64.c
+@@ -12,8 +12,6 @@
+ #include "bpf_jit_64.h"
+-int bpf_jit_enable __read_mostly;
+-
+ static inline bool is_simm13(unsigned int value)
+ {
+       return value + 0x1000 < 0x2000;
+--- a/arch/x86/net/bpf_jit_comp.c
++++ b/arch/x86/net/bpf_jit_comp.c
+@@ -16,8 +16,6 @@
+ #include <asm/nospec-branch.h>
+ #include <linux/bpf.h>
+-int bpf_jit_enable __read_mostly;
+-
+ /*
+  * assembly code in arch/x86/net/bpf_jit.S
+  */
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -290,6 +290,11 @@ struct bpf_prog *bpf_patch_insn_single(s
+ }
+ #ifdef CONFIG_BPF_JIT
++/* All BPF JIT sysctl knobs here. */
++int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
++int bpf_jit_harden   __read_mostly;
++int bpf_jit_kallsyms __read_mostly;
++
+ static __always_inline void
+ bpf_get_prog_addr_region(const struct bpf_prog *prog,
+                        unsigned long *symbol_start,
+@@ -358,8 +363,6 @@ static DEFINE_SPINLOCK(bpf_lock);
+ static LIST_HEAD(bpf_kallsyms);
+ static struct latch_tree_root bpf_tree __cacheline_aligned;
+-int bpf_jit_kallsyms __read_mostly;
+-
+ static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux)
+ {
+       WARN_ON_ONCE(!list_empty(&aux->ksym_lnode));
+@@ -540,8 +543,6 @@ void __weak bpf_jit_free(struct bpf_prog
+       bpf_prog_unlock_free(fp);
+ }
+-int bpf_jit_harden __read_mostly;
+-
+ static int bpf_jit_blind_insn(const struct bpf_insn *from,
+                             const struct bpf_insn *aux,
+                             struct bpf_insn *to_buff)
+@@ -1327,9 +1328,13 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512
+ };
+ #else
+-static unsigned int __bpf_prog_ret0(const void *ctx,
+-                                  const struct bpf_insn *insn)
++static unsigned int __bpf_prog_ret0_warn(const void *ctx,
++                                       const struct bpf_insn *insn)
+ {
++      /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
++       * is not working properly, so warn about it!
++       */
++      WARN_ON_ONCE(1);
+       return 0;
+ }
+ #endif
+@@ -1386,7 +1391,7 @@ struct bpf_prog *bpf_prog_select_runtime
+       fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+ #else
+-      fp->bpf_func = __bpf_prog_ret0;
++      fp->bpf_func = __bpf_prog_ret0_warn;
+ #endif
+       /* eBPF JITs can rewrite the program in case constant
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -25,6 +25,7 @@
+ static int zero = 0;
+ static int one = 1;
++static int two __maybe_unused = 2;
+ static int min_sndbuf = SOCK_MIN_SNDBUF;
+ static int min_rcvbuf = SOCK_MIN_RCVBUF;
+ static int max_skb_frags = MAX_SKB_FRAGS;
+@@ -325,13 +326,14 @@ static struct ctl_table net_core_table[]
+               .data           = &bpf_jit_enable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+-              .proc_handler   = proc_dointvec
+-#else
+               .proc_handler   = proc_dointvec_minmax,
++# ifdef CONFIG_BPF_JIT_ALWAYS_ON
+               .extra1         = &one,
+               .extra2         = &one,
+-#endif
++# else
++              .extra1         = &zero,
++              .extra2         = &two,
++# endif
+       },
+ # ifdef CONFIG_HAVE_EBPF_JIT
+       {
+@@ -339,14 +341,18 @@ static struct ctl_table net_core_table[]
+               .data           = &bpf_jit_harden,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+-              .proc_handler   = proc_dointvec,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &zero,
++              .extra2         = &two,
+       },
+       {
+               .procname       = "bpf_jit_kallsyms",
+               .data           = &bpf_jit_kallsyms,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+-              .proc_handler   = proc_dointvec,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &zero,
++              .extra2         = &one,
+       },
+ # endif
+ #endif
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -2656,15 +2656,6 @@ out_fs:
+ core_initcall(sock_init);     /* early initcall */
+-static int __init jit_init(void)
+-{
+-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+-      bpf_jit_enable = 1;
+-#endif
+-      return 0;
+-}
+-pure_initcall(jit_init);
+-
+ #ifdef CONFIG_PROC_FS
+ void socket_seq_show(struct seq_file *seq)
+ {
diff --git a/queue-4.14/bpf-restrict-access-to-core-bpf-sysctls.patch b/queue-4.14/bpf-restrict-access-to-core-bpf-sysctls.patch
new file mode 100644 (file)
index 0000000..244389b
--- /dev/null
@@ -0,0 +1,106 @@
+From foo@baz Sat 17 Aug 2019 05:34:49 PM CEST
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Fri, 16 Aug 2019 23:05:20 +0100
+Subject: bpf: restrict access to core bpf sysctls
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>
+Cc: stable <stable@vger.kernel.org>
+Message-ID: <20190816220520.GB9843@xylophone.i.decadent.org.uk>
+Content-Disposition: inline
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 2e4a30983b0f9b19b59e38bbf7427d7fdd480d98 upstream.
+
+Given BPF reaches far beyond just networking these days, it was
+never intended to allow setting and in some cases reading those
+knobs out of a user namespace root running without CAP_SYS_ADMIN,
+thus tighten such access.
+
+Also the bpf_jit_enable = 2 debugging mode should only be allowed
+if kptr_restrict is not set since it otherwise can leak addresses
+to the kernel log. Dump a note to the kernel log that this is for
+debugging JITs only when enabled.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+[bwh: Backported to 4.14: We don't have bpf_dump_raw_ok(), so drop the
+ condition based on it. This condition only made it a bit harder for a
+ privileged user to do something silly.]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sysctl_net_core.c |   41 ++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 38 insertions(+), 3 deletions(-)
+
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -251,6 +251,41 @@ static int proc_do_rss_key(struct ctl_ta
+       return proc_dostring(&fake_table, write, buffer, lenp, ppos);
+ }
++#ifdef CONFIG_BPF_JIT
++static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
++                                         void __user *buffer, size_t *lenp,
++                                         loff_t *ppos)
++{
++      int ret, jit_enable = *(int *)table->data;
++      struct ctl_table tmp = *table;
++
++      if (write && !capable(CAP_SYS_ADMIN))
++              return -EPERM;
++
++      tmp.data = &jit_enable;
++      ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
++      if (write && !ret) {
++              *(int *)table->data = jit_enable;
++              if (jit_enable == 2)
++                      pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
++      }
++      return ret;
++}
++
++# ifdef CONFIG_HAVE_EBPF_JIT
++static int
++proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
++                                  void __user *buffer, size_t *lenp,
++                                  loff_t *ppos)
++{
++      if (!capable(CAP_SYS_ADMIN))
++              return -EPERM;
++
++      return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
++}
++# endif
++#endif
++
+ static struct ctl_table net_core_table[] = {
+ #ifdef CONFIG_NET
+       {
+@@ -326,7 +361,7 @@ static struct ctl_table net_core_table[]
+               .data           = &bpf_jit_enable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax_bpf_enable,
+ # ifdef CONFIG_BPF_JIT_ALWAYS_ON
+               .extra1         = &one,
+               .extra2         = &one,
+@@ -341,7 +376,7 @@ static struct ctl_table net_core_table[]
+               .data           = &bpf_jit_harden,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+-              .proc_handler   = proc_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax_bpf_restricted,
+               .extra1         = &zero,
+               .extra2         = &two,
+       },
+@@ -350,7 +385,7 @@ static struct ctl_table net_core_table[]
+               .data           = &bpf_jit_kallsyms,
+               .maxlen         = sizeof(int),
+               .mode           = 0600,
+-              .proc_handler   = proc_dointvec_minmax,
++              .proc_handler   = proc_dointvec_minmax_bpf_restricted,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
index d4eb2fdc9541ec7466ee353a89eb345050b06fcb..3da67668bfe079a23db97382578c19abcf27da69 100644 (file)
@@ -2,3 +2,7 @@ scsi-mpt3sas-use-63-bit-dma-addressing-on-sas35-hba.patch
 sh-kernel-hw_breakpoint-fix-missing-break-in-switch-statement.patch
 mm-usercopy-use-memory-range-to-be-accessed-for-wraparound-check.patch
 mm-memcontrol.c-fix-use-after-free-in-mem_cgroup_iter.patch
+bpf-get-rid-of-pure_initcall-dependency-to-enable-jits.patch
+bpf-restrict-access-to-core-bpf-sysctls.patch
+bpf-add-bpf_jit_limit-knob-to-restrict-unpriv-allocations.patch
+x86-mm-use-write_once-when-setting-ptes.patch
diff --git a/queue-4.14/x86-mm-use-write_once-when-setting-ptes.patch b/queue-4.14/x86-mm-use-write_once-when-setting-ptes.patch
new file mode 100644 (file)
index 0000000..cba321e
--- /dev/null
@@ -0,0 +1,155 @@
+From foo@baz Sat 17 Aug 2019 05:34:49 PM CEST
+From: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Date: Fri, 16 Aug 2019 23:05:46 +0100
+Subject: x86/mm: Use WRITE_ONCE() when setting PTEs
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>
+Cc: stable <stable@vger.kernel.org>
+Message-ID: <20190816220546.GD9843@xylophone.i.decadent.org.uk>
+Content-Disposition: inline
+
+From: Nadav Amit <namit@vmware.com>
+
+commit 9bc4f28af75a91aea0ae383f50b0a430c4509303 upstream.
+
+When page-table entries are set, the compiler might optimize their
+assignment by using multiple instructions to set the PTE. This might
+turn into a security hazard if the user somehow manages to use the
+interim PTE. L1TF does not make our lives easier, making even an interim
+non-present PTE a security hazard.
+
+Using WRITE_ONCE() to set PTEs and friends should prevent this potential
+security hazard.
+
+I skimmed the differences in the binary with and without this patch. The
+differences are (obviously) greater when CONFIG_PARAVIRT=n as more
+code optimizations are possible. For better and worse, the impact on the
+binary with this patch is pretty small. Skimming the code did not cause
+anything to jump out as a security hazard, but it seems that at least
+move_soft_dirty_pte() caused set_pte_at() to use multiple writes.
+
+Signed-off-by: Nadav Amit <namit@vmware.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Sean Christopherson <sean.j.christopherson@intel.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Link: https://lkml.kernel.org/r/20180902181451.80520-1-namit@vmware.com
+[bwh: Backported to 4.14:
+ - Drop changes in pmdp_establish()
+ - 5-level paging is a compile-time option
+ - Update both cases in native_set_pgd()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/pgtable_64.h |   22 +++++++++++-----------
+ arch/x86/mm/pgtable.c             |    8 ++++----
+ 2 files changed, 15 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -56,15 +56,15 @@ struct mm_struct;
+ void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
+ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
+-static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+-                                  pte_t *ptep)
++static inline void native_set_pte(pte_t *ptep, pte_t pte)
+ {
+-      *ptep = native_make_pte(0);
++      WRITE_ONCE(*ptep, pte);
+ }
+-static inline void native_set_pte(pte_t *ptep, pte_t pte)
++static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
++                                  pte_t *ptep)
+ {
+-      *ptep = pte;
++      native_set_pte(ptep, native_make_pte(0));
+ }
+ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
+@@ -74,7 +74,7 @@ static inline void native_set_pte_atomic
+ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
+ {
+-      *pmdp = pmd;
++      WRITE_ONCE(*pmdp, pmd);
+ }
+ static inline void native_pmd_clear(pmd_t *pmd)
+@@ -110,7 +110,7 @@ static inline pmd_t native_pmdp_get_and_
+ static inline void native_set_pud(pud_t *pudp, pud_t pud)
+ {
+-      *pudp = pud;
++      WRITE_ONCE(*pudp, pud);
+ }
+ static inline void native_pud_clear(pud_t *pud)
+@@ -220,9 +220,9 @@ static inline pgd_t pti_set_user_pgd(pgd
+ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+ {
+ #if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+-      p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
++      WRITE_ONCE(p4dp->pgd, pti_set_user_pgd(&p4dp->pgd, p4d.pgd));
+ #else
+-      *p4dp = p4d;
++      WRITE_ONCE(*p4dp, p4d);
+ #endif
+ }
+@@ -238,9 +238,9 @@ static inline void native_p4d_clear(p4d_
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+ #ifdef CONFIG_PAGE_TABLE_ISOLATION
+-      *pgdp = pti_set_user_pgd(pgdp, pgd);
++      WRITE_ONCE(*pgdp, pti_set_user_pgd(pgdp, pgd));
+ #else
+-      *pgdp = pgd;
++      WRITE_ONCE(*pgdp, pgd);
+ #endif
+ }
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -260,7 +260,7 @@ static void pgd_mop_up_pmds(struct mm_st
+               if (pgd_val(pgd) != 0) {
+                       pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+-                      pgdp[i] = native_make_pgd(0);
++                      pgd_clear(&pgdp[i]);
+                       paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
+                       pmd_free(mm, pmd);
+@@ -430,7 +430,7 @@ int ptep_set_access_flags(struct vm_area
+       int changed = !pte_same(*ptep, entry);
+       if (changed && dirty)
+-              *ptep = entry;
++              set_pte(ptep, entry);
+       return changed;
+ }
+@@ -445,7 +445,7 @@ int pmdp_set_access_flags(struct vm_area
+       VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+       if (changed && dirty) {
+-              *pmdp = entry;
++              set_pmd(pmdp, entry);
+               /*
+                * We had a write-protection fault here and changed the pmd
+                * to to more permissive. No need to flush the TLB for that,
+@@ -465,7 +465,7 @@ int pudp_set_access_flags(struct vm_area
+       VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+       if (changed && dirty) {
+-              *pudp = entry;
++              set_pud(pudp, entry);
+               /*
+                * We had a write-protection fault here and changed the pud
+                * to to more permissive. No need to flush the TLB for that,