4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 15 Nov 2021 15:04:47 +0000 (16:04 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 15 Nov 2021 15:04:47 +0000 (16:04 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Nov 2021 15:04:47 +0000 (16:04 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Nov 2021 15:04:47 +0000 (16:04 +0100)
diff --git a/queue-4.9/mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch b/queue-4.9/mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch

new file mode 100644 (file)

index 0000000..5d034c8
--- /dev/null
+++ b/queue-4.9/mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch
@@ -0,0 +1,102 @@
+From 60e2793d440a3ec95abb5d6d4fc034a4b480472d Mon Sep 17 00:00:00 2001
+From: Michal Hocko <mhocko@suse.com>
+Date: Fri, 5 Nov 2021 13:38:06 -0700
+Subject: mm, oom: do not trigger out_of_memory from the #PF
+
+From: Michal Hocko <mhocko@suse.com>
+
+commit 60e2793d440a3ec95abb5d6d4fc034a4b480472d upstream.
+
+Any allocation failure during the #PF path will return with VM_FAULT_OOM
+which in turn results in pagefault_out_of_memory.  This can happen for 2
+different reasons.  a) Memcg is out of memory and we rely on
+mem_cgroup_oom_synchronize to perform the memcg OOM handling or b)
+normal allocation fails.
+
+The latter is quite problematic because allocation paths already trigger
+out_of_memory and the page allocator tries really hard to not fail
+allocations.  Anyway, if the OOM killer has been already invoked there
+is no reason to invoke it again from the #PF path.  Especially when the
+OOM condition might be gone by that time and we have no way to find out
+other than allocate.
+
+Moreover if the allocation failed and the OOM killer hasn't been invoked
+then we are unlikely to do the right thing from the #PF context because
+we have already lost the allocation context and restictions and
+therefore might oom kill a task from a different NUMA domain.
+
+This all suggests that there is no legitimate reason to trigger
+out_of_memory from pagefault_out_of_memory so drop it.  Just to be sure
+that no #PF path returns with VM_FAULT_OOM without allocation print a
+warning that this is happening before we restart the #PF.
+
+[VvS: #PF allocation can hit into limit of cgroup v1 kmem controller.
+This is a local problem related to memcg, however, it causes unnecessary
+global OOM kills that are repeated over and over again and escalate into a
+real disaster.  This has been broken since kmem accounting has been
+introduced for cgroup v1 (3.8).  There was no kmem specific reclaim for
+the separate limit so the only way to handle kmem hard limit was to return
+with ENOMEM.  In upstream the problem will be fixed by removing the
+outdated kmem limit, however stable and LTS kernels cannot do it and are
+still affected.  This patch fixes the problem and should be backported
+into stable/LTS.]
+
+Link: https://lkml.kernel.org/r/f5fd8dd8-0ad4-c524-5f65-920b01972a42@virtuozzo.com
+Signed-off-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Uladzislau Rezki <urezki@gmail.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/oom_kill.c |   22 ++++++++--------------
+ 1 file changed, 8 insertions(+), 14 deletions(-)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -1078,19 +1078,15 @@ bool out_of_memory(struct oom_control *o
+ }
+ 
+ /*
+- * The pagefault handler calls here because it is out of memory, so kill a
+- * memory-hogging task. If oom_lock is held by somebody else, a parallel oom
+- * killing is already in progress so do nothing.
++ * The pagefault handler calls here because some allocation has failed. We have
++ * to take care of the memcg OOM here because this is the only safe context without
++ * any locks held but let the oom killer triggered from the allocation context care
++ * about the global OOM.
+  */
+ void pagefault_out_of_memory(void)
+ {
+-      struct oom_control oc = {
+-              .zonelist = NULL,
+-              .nodemask = NULL,
+-              .memcg = NULL,
+-              .gfp_mask = 0,
+-              .order = 0,
+-      };
++      static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL,
++                                    DEFAULT_RATELIMIT_BURST);
+ 
+       if (mem_cgroup_oom_synchronize(true))
+               return;
+@@ -1098,8 +1094,6 @@ void pagefault_out_of_memory(void)
+       if (fatal_signal_pending(current))
+               return;
+ 
+-      if (!mutex_trylock(&oom_lock))
+-              return;
+-      out_of_memory(&oc);
+-      mutex_unlock(&oom_lock);
++      if (__ratelimit(&pfoom_rs))
++              pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n");
+ }
diff --git a/queue-4.9/mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch b/queue-4.9/mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch

new file mode 100644 (file)

index 0000000..86aef72
--- /dev/null
+++ b/queue-4.9/mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch
@@ -0,0 +1,74 @@
+From 0b28179a6138a5edd9d82ad2687c05b3773c387b Mon Sep 17 00:00:00 2001
+From: Vasily Averin <vvs@virtuozzo.com>
+Date: Fri, 5 Nov 2021 13:38:02 -0700
+Subject: mm, oom: pagefault_out_of_memory: don't force global OOM for dying tasks
+
+From: Vasily Averin <vvs@virtuozzo.com>
+
+commit 0b28179a6138a5edd9d82ad2687c05b3773c387b upstream.
+
+Patch series "memcg: prohibit unconditional exceeding the limit of dying tasks", v3.
+
+Memory cgroup charging allows killed or exiting tasks to exceed the hard
+limit.  It can be misused and allowed to trigger global OOM from inside
+a memcg-limited container.  On the other hand if memcg fails allocation,
+called from inside #PF handler it triggers global OOM from inside
+pagefault_out_of_memory().
+
+To prevent these problems this patchset:
+ (a) removes execution of out_of_memory() from
+     pagefault_out_of_memory(), becasue nobody can explain why it is
+     necessary.
+ (b) allow memcg to fail allocation of dying/killed tasks.
+
+This patch (of 3):
+
+Any allocation failure during the #PF path will return with VM_FAULT_OOM
+which in turn results in pagefault_out_of_memory which in turn executes
+out_out_memory() and can kill a random task.
+
+An allocation might fail when the current task is the oom victim and
+there are no memory reserves left.  The OOM killer is already handled at
+the page allocator level for the global OOM and at the charging level
+for the memcg one.  Both have much more information about the scope of
+allocation/charge request.  This means that either the OOM killer has
+been invoked properly and didn't lead to the allocation success or it
+has been skipped because it couldn't have been invoked.  In both cases
+triggering it from here is pointless and even harmful.
+
+It makes much more sense to let the killed task die rather than to wake
+up an eternally hungry oom-killer and send him to choose a fatter victim
+for breakfast.
+
+Link: https://lkml.kernel.org/r/0828a149-786e-7c06-b70a-52d086818ea3@virtuozzo.com
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Suggested-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Uladzislau Rezki <urezki@gmail.com>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/oom_kill.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -1095,6 +1095,9 @@ void pagefault_out_of_memory(void)
+       if (mem_cgroup_oom_synchronize(true))
+               return;
+ 
++      if (fatal_signal_pending(current))
++              return;
++
+       if (!mutex_trylock(&oom_lock))
+               return;
+       out_of_memory(&oc);
diff --git a/queue-4.9/powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch b/queue-4.9/powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch

new file mode 100644 (file)

index 0000000..8a2840e
--- /dev/null
+++ b/queue-4.9/powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch
@@ -0,0 +1,66 @@
+From foo@baz Mon Nov 15 03:28:18 PM CET 2021
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+Date: Mon, 15 Nov 2021 16:30:37 +0530
+Subject: powerpc/bpf: Fix BPF_SUB when imm == 0x80000000
+To: <stable@vger.kernel.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <552698f49119e7682a578f84d841c505ad4e976b.1636969865.git.naveen.n.rao@linux.vnet.ibm.com>
+
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+
+upstream commit 5855c4c1f415ca3ba1046e77c0b3d3dfc96c9025
+
+We aren't handling subtraction involving an immediate value of
+0x80000000 properly. Fix the same.
+
+Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF")
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+[mpe: Fold in fix from Naveen to use imm <= 32768]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/fc4b1276eb10761fd7ce0814c8dd089da2815251.1633464148.git.naveen.n.rao@linux.vnet.ibm.com
+[adjust macros to account for commits 0654186510a40e and 3a181237916310]
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit_comp64.c |   27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -363,18 +363,25 @@ static int bpf_jit_build_body(struct bpf
+                       PPC_SUB(dst_reg, dst_reg, src_reg);
+                       goto bpf_alu32_trunc;
+               case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+-              case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+               case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
++                      if (!imm) {
++                              goto bpf_alu32_trunc;
++                      } else if (imm >= -32768 && imm < 32768) {
++                              PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
++                      } else {
++                              PPC_LI32(b2p[TMP_REG_1], imm);
++                              PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
++                      }
++                      goto bpf_alu32_trunc;
++              case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+               case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+-                      if (BPF_OP(code) == BPF_SUB)
+-                              imm = -imm;
+-                      if (imm) {
+-                              if (imm >= -32768 && imm < 32768)
+-                                      PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
+-                              else {
+-                                      PPC_LI32(b2p[TMP_REG_1], imm);
+-                                      PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
+-                              }
++                      if (!imm) {
++                              goto bpf_alu32_trunc;
++                      } else if (imm > -32768 && imm <= 32768) {
++                              PPC_ADDI(dst_reg, dst_reg, IMM_L(-imm));
++                      } else {
++                              PPC_LI32(b2p[TMP_REG_1], imm);
++                              PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
+                       }
+                       goto bpf_alu32_trunc;
+               case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
diff --git a/queue-4.9/powerpc-bpf-validate-branch-ranges.patch b/queue-4.9/powerpc-bpf-validate-branch-ranges.patch

new file mode 100644 (file)

index 0000000..026b87e
--- /dev/null
+++ b/queue-4.9/powerpc-bpf-validate-branch-ranges.patch
@@ -0,0 +1,107 @@
+From foo@baz Mon Nov 15 03:28:18 PM CET 2021
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+Date: Mon, 15 Nov 2021 16:30:36 +0530
+Subject: powerpc/bpf: Validate branch ranges
+To: <stable@vger.kernel.org>
+Cc: Michael Ellerman <mpe@ellerman.id.au>, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <a3eff0df543b26ce416086b0a893cc729a94799b.1636969865.git.naveen.n.rao@linux.vnet.ibm.com>
+
+From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
+
+upstream commit 3832ba4e283d7052b783dab8311df7e3590fed93
+
+Add checks to ensure that we never emit branch instructions with
+truncated branch offsets.
+
+Suggested-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Tested-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Acked-by: Song Liu <songliubraving@fb.com>
+Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/71d33a6b7603ec1013c9734dd8bdd4ff5e929142.1633464148.git.naveen.n.rao@linux.vnet.ibm.com
+[expand is_offset_in_[cond_]branch_range() helpers, drop ppc32 changes]
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/net/bpf_jit.h        |   25 +++++++++++++++++++------
+ arch/powerpc/net/bpf_jit_comp64.c |   10 +++++++---
+ 2 files changed, 26 insertions(+), 9 deletions(-)
+
+--- a/arch/powerpc/net/bpf_jit.h
++++ b/arch/powerpc/net/bpf_jit.h
+@@ -177,13 +177,26 @@
+ #define PPC_NEG(d, a)         EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a))
+ 
+ /* Long jump; (unconditional 'branch') */
+-#define PPC_JMP(dest)         EMIT(PPC_INST_BRANCH |                        \
+-                                   (((dest) - (ctx->idx * 4)) & 0x03fffffc))
++#define PPC_JMP(dest)                                                       \
++      do {                                                                  \
++              long offset = (long)(dest) - (ctx->idx * 4);                  \
++              if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3) {                                                \
++                      pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx);                       \
++                      return -ERANGE;                                       \
++              }                                                             \
++              EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc));                \
++      } while (0)
+ /* "cond" here covers BO:BI fields. */
+-#define PPC_BCC_SHORT(cond, dest)     EMIT(PPC_INST_BRANCH_COND |           \
+-                                           (((cond) & 0x3ff) << 16) |       \
+-                                           (((dest) - (ctx->idx * 4)) &     \
+-                                            0xfffc))
++#define PPC_BCC_SHORT(cond, dest)                                           \
++      do {                                                                  \
++              long offset = (long)(dest) - (ctx->idx * 4);                  \
++              if (offset < -0x8000 || offset > 0x7fff || offset & 0x3) {                                                      \
++                      pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx);           \
++                      return -ERANGE;                                       \
++              }                                                             \
++              EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc));                                      \
++      } while (0)
++
+ /* Sign-extended 32-bit immediate load */
+ #define PPC_LI32(d, i)                do {                                          \
+               if ((int)(uintptr_t)(i) >= -32768 &&                          \
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -239,7 +239,7 @@ static void bpf_jit_emit_func_call(u32 *
+       PPC_BLRL();
+ }
+ 
+-static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
++static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+ {
+       /*
+        * By now, the eBPF program has already setup parameters in r3, r4 and r5
+@@ -300,7 +300,9 @@ static void bpf_jit_emit_tail_call(u32 *
+       bpf_jit_emit_common_epilogue(image, ctx);
+ 
+       PPC_BCTR();
++
+       /* out: */
++      return 0;
+ }
+ 
+ /* Assemble the body code between the prologue & epilogue */
+@@ -310,7 +312,7 @@ static int bpf_jit_build_body(struct bpf
+ {
+       const struct bpf_insn *insn = fp->insnsi;
+       int flen = fp->len;
+-      int i;
++      int i, ret;
+ 
+       /* Start of epilogue code - will only be valid 2nd pass onwards */
+       u32 exit_addr = addrs[flen];
+@@ -938,7 +940,9 @@ common_load:
+                */
+               case BPF_JMP | BPF_CALL | BPF_X:
+                       ctx->seen |= SEEN_TAILCALL;
+-                      bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
++                      ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
++                      if (ret < 0)
++                              return ret;
+                       break;
+ 
+               default:
diff --git a/queue-4.9/series b/queue-4.9/series

index 48fd2cae4448d6d609e53898ee8896642888be3b..6f6d32b1628c2ce5e7548e7525bfc6555309afca 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -152,3 +152,7 @@ nfc-pn533-fix-double-free-when-pn533_fill_fragment_s.patch
  vsock-prevent-unnecessary-refcnt-inc-for-nonblocking.patch
  usb-chipidea-fix-interrupt-deadlock.patch
  arm-9156-1-drop-cc-option-fallbacks-for-architecture-selection.patch
+powerpc-bpf-validate-branch-ranges.patch
+powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch
+mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch
+mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 15 Nov 2021 15:04:47 +0000 (16:04 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 15 Nov 2021 15:04:47 +0000 (16:04 +0100)
queue-4.9/mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/powerpc-bpf-validate-branch-ranges.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history