From: Greg Kroah-Hartman Date: Mon, 15 Nov 2021 15:04:47 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v5.4.160~41 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2e2859efaac885e264845dc8c4b28bdf3ac55aec;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch powerpc-bpf-validate-branch-ranges.patch --- diff --git a/queue-4.9/mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch b/queue-4.9/mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch new file mode 100644 index 00000000000..5d034c8bdb5 --- /dev/null +++ b/queue-4.9/mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch @@ -0,0 +1,102 @@ +From 60e2793d440a3ec95abb5d6d4fc034a4b480472d Mon Sep 17 00:00:00 2001 +From: Michal Hocko +Date: Fri, 5 Nov 2021 13:38:06 -0700 +Subject: mm, oom: do not trigger out_of_memory from the #PF + +From: Michal Hocko + +commit 60e2793d440a3ec95abb5d6d4fc034a4b480472d upstream. + +Any allocation failure during the #PF path will return with VM_FAULT_OOM +which in turn results in pagefault_out_of_memory. This can happen for 2 +different reasons. a) Memcg is out of memory and we rely on +mem_cgroup_oom_synchronize to perform the memcg OOM handling or b) +normal allocation fails. + +The latter is quite problematic because allocation paths already trigger +out_of_memory and the page allocator tries really hard to not fail +allocations. Anyway, if the OOM killer has been already invoked there +is no reason to invoke it again from the #PF path. Especially when the +OOM condition might be gone by that time and we have no way to find out +other than allocate. + +Moreover if the allocation failed and the OOM killer hasn't been invoked +then we are unlikely to do the right thing from the #PF context because +we have already lost the allocation context and restictions and +therefore might oom kill a task from a different NUMA domain. + +This all suggests that there is no legitimate reason to trigger +out_of_memory from pagefault_out_of_memory so drop it. Just to be sure +that no #PF path returns with VM_FAULT_OOM without allocation print a +warning that this is happening before we restart the #PF. + +[VvS: #PF allocation can hit into limit of cgroup v1 kmem controller. +This is a local problem related to memcg, however, it causes unnecessary +global OOM kills that are repeated over and over again and escalate into a +real disaster. This has been broken since kmem accounting has been +introduced for cgroup v1 (3.8). There was no kmem specific reclaim for +the separate limit so the only way to handle kmem hard limit was to return +with ENOMEM. In upstream the problem will be fixed by removing the +outdated kmem limit, however stable and LTS kernels cannot do it and are +still affected. This patch fixes the problem and should be backported +into stable/LTS.] + +Link: https://lkml.kernel.org/r/f5fd8dd8-0ad4-c524-5f65-920b01972a42@virtuozzo.com +Signed-off-by: Michal Hocko +Signed-off-by: Vasily Averin +Acked-by: Michal Hocko +Cc: Johannes Weiner +Cc: Mel Gorman +Cc: Roman Gushchin +Cc: Shakeel Butt +Cc: Tetsuo Handa +Cc: Uladzislau Rezki +Cc: Vladimir Davydov +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/oom_kill.c | 22 ++++++++-------------- + 1 file changed, 8 insertions(+), 14 deletions(-) + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -1078,19 +1078,15 @@ bool out_of_memory(struct oom_control *o + } + + /* +- * The pagefault handler calls here because it is out of memory, so kill a +- * memory-hogging task. If oom_lock is held by somebody else, a parallel oom +- * killing is already in progress so do nothing. ++ * The pagefault handler calls here because some allocation has failed. We have ++ * to take care of the memcg OOM here because this is the only safe context without ++ * any locks held but let the oom killer triggered from the allocation context care ++ * about the global OOM. + */ + void pagefault_out_of_memory(void) + { +- struct oom_control oc = { +- .zonelist = NULL, +- .nodemask = NULL, +- .memcg = NULL, +- .gfp_mask = 0, +- .order = 0, +- }; ++ static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL, ++ DEFAULT_RATELIMIT_BURST); + + if (mem_cgroup_oom_synchronize(true)) + return; +@@ -1098,8 +1094,6 @@ void pagefault_out_of_memory(void) + if (fatal_signal_pending(current)) + return; + +- if (!mutex_trylock(&oom_lock)) +- return; +- out_of_memory(&oc); +- mutex_unlock(&oom_lock); ++ if (__ratelimit(&pfoom_rs)) ++ pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n"); + } diff --git a/queue-4.9/mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch b/queue-4.9/mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch new file mode 100644 index 00000000000..86aef72bf14 --- /dev/null +++ b/queue-4.9/mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch @@ -0,0 +1,74 @@ +From 0b28179a6138a5edd9d82ad2687c05b3773c387b Mon Sep 17 00:00:00 2001 +From: Vasily Averin +Date: Fri, 5 Nov 2021 13:38:02 -0700 +Subject: mm, oom: pagefault_out_of_memory: don't force global OOM for dying tasks + +From: Vasily Averin + +commit 0b28179a6138a5edd9d82ad2687c05b3773c387b upstream. + +Patch series "memcg: prohibit unconditional exceeding the limit of dying tasks", v3. + +Memory cgroup charging allows killed or exiting tasks to exceed the hard +limit. It can be misused and allowed to trigger global OOM from inside +a memcg-limited container. On the other hand if memcg fails allocation, +called from inside #PF handler it triggers global OOM from inside +pagefault_out_of_memory(). + +To prevent these problems this patchset: + (a) removes execution of out_of_memory() from + pagefault_out_of_memory(), becasue nobody can explain why it is + necessary. + (b) allow memcg to fail allocation of dying/killed tasks. + +This patch (of 3): + +Any allocation failure during the #PF path will return with VM_FAULT_OOM +which in turn results in pagefault_out_of_memory which in turn executes +out_out_memory() and can kill a random task. + +An allocation might fail when the current task is the oom victim and +there are no memory reserves left. The OOM killer is already handled at +the page allocator level for the global OOM and at the charging level +for the memcg one. Both have much more information about the scope of +allocation/charge request. This means that either the OOM killer has +been invoked properly and didn't lead to the allocation success or it +has been skipped because it couldn't have been invoked. In both cases +triggering it from here is pointless and even harmful. + +It makes much more sense to let the killed task die rather than to wake +up an eternally hungry oom-killer and send him to choose a fatter victim +for breakfast. + +Link: https://lkml.kernel.org/r/0828a149-786e-7c06-b70a-52d086818ea3@virtuozzo.com +Signed-off-by: Vasily Averin +Suggested-by: Michal Hocko +Acked-by: Michal Hocko +Cc: Johannes Weiner +Cc: Mel Gorman +Cc: Roman Gushchin +Cc: Shakeel Butt +Cc: Tetsuo Handa +Cc: Uladzislau Rezki +Cc: Vladimir Davydov +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/oom_kill.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -1095,6 +1095,9 @@ void pagefault_out_of_memory(void) + if (mem_cgroup_oom_synchronize(true)) + return; + ++ if (fatal_signal_pending(current)) ++ return; ++ + if (!mutex_trylock(&oom_lock)) + return; + out_of_memory(&oc); diff --git a/queue-4.9/powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch b/queue-4.9/powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch new file mode 100644 index 00000000000..8a2840e08cc --- /dev/null +++ b/queue-4.9/powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch @@ -0,0 +1,66 @@ +From foo@baz Mon Nov 15 03:28:18 PM CET 2021 +From: "Naveen N. Rao" +Date: Mon, 15 Nov 2021 16:30:37 +0530 +Subject: powerpc/bpf: Fix BPF_SUB when imm == 0x80000000 +To: +Cc: Michael Ellerman , Daniel Borkmann +Message-ID: <552698f49119e7682a578f84d841c505ad4e976b.1636969865.git.naveen.n.rao@linux.vnet.ibm.com> + +From: "Naveen N. Rao" + +upstream commit 5855c4c1f415ca3ba1046e77c0b3d3dfc96c9025 + +We aren't handling subtraction involving an immediate value of +0x80000000 properly. Fix the same. + +Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") +Signed-off-by: Naveen N. Rao +Reviewed-by: Christophe Leroy +[mpe: Fold in fix from Naveen to use imm <= 32768] +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/fc4b1276eb10761fd7ce0814c8dd089da2815251.1633464148.git.naveen.n.rao@linux.vnet.ibm.com +[adjust macros to account for commits 0654186510a40e and 3a181237916310] +Signed-off-by: Naveen N. Rao +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/net/bpf_jit_comp64.c | 27 +++++++++++++++++---------- + 1 file changed, 17 insertions(+), 10 deletions(-) + +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -363,18 +363,25 @@ static int bpf_jit_build_body(struct bpf + PPC_SUB(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ +- case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ ++ if (!imm) { ++ goto bpf_alu32_trunc; ++ } else if (imm >= -32768 && imm < 32768) { ++ PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); ++ } else { ++ PPC_LI32(b2p[TMP_REG_1], imm); ++ PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); ++ } ++ goto bpf_alu32_trunc; ++ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ +- if (BPF_OP(code) == BPF_SUB) +- imm = -imm; +- if (imm) { +- if (imm >= -32768 && imm < 32768) +- PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); +- else { +- PPC_LI32(b2p[TMP_REG_1], imm); +- PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); +- } ++ if (!imm) { ++ goto bpf_alu32_trunc; ++ } else if (imm > -32768 && imm <= 32768) { ++ PPC_ADDI(dst_reg, dst_reg, IMM_L(-imm)); ++ } else { ++ PPC_LI32(b2p[TMP_REG_1], imm); ++ PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ diff --git a/queue-4.9/powerpc-bpf-validate-branch-ranges.patch b/queue-4.9/powerpc-bpf-validate-branch-ranges.patch new file mode 100644 index 00000000000..026b87e9fc7 --- /dev/null +++ b/queue-4.9/powerpc-bpf-validate-branch-ranges.patch @@ -0,0 +1,107 @@ +From foo@baz Mon Nov 15 03:28:18 PM CET 2021 +From: "Naveen N. Rao" +Date: Mon, 15 Nov 2021 16:30:36 +0530 +Subject: powerpc/bpf: Validate branch ranges +To: +Cc: Michael Ellerman , Daniel Borkmann +Message-ID: + +From: "Naveen N. Rao" + +upstream commit 3832ba4e283d7052b783dab8311df7e3590fed93 + +Add checks to ensure that we never emit branch instructions with +truncated branch offsets. + +Suggested-by: Michael Ellerman +Signed-off-by: Naveen N. Rao +Tested-by: Johan Almbladh +Reviewed-by: Christophe Leroy +Acked-by: Song Liu +Acked-by: Johan Almbladh +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/71d33a6b7603ec1013c9734dd8bdd4ff5e929142.1633464148.git.naveen.n.rao@linux.vnet.ibm.com +[expand is_offset_in_[cond_]branch_range() helpers, drop ppc32 changes] +Signed-off-by: Naveen N. Rao +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/net/bpf_jit.h | 25 +++++++++++++++++++------ + arch/powerpc/net/bpf_jit_comp64.c | 10 +++++++--- + 2 files changed, 26 insertions(+), 9 deletions(-) + +--- a/arch/powerpc/net/bpf_jit.h ++++ b/arch/powerpc/net/bpf_jit.h +@@ -177,13 +177,26 @@ + #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) + + /* Long jump; (unconditional 'branch') */ +-#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ +- (((dest) - (ctx->idx * 4)) & 0x03fffffc)) ++#define PPC_JMP(dest) \ ++ do { \ ++ long offset = (long)(dest) - (ctx->idx * 4); \ ++ if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3) { \ ++ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ ++ return -ERANGE; \ ++ } \ ++ EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ ++ } while (0) + /* "cond" here covers BO:BI fields. */ +-#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ +- (((cond) & 0x3ff) << 16) | \ +- (((dest) - (ctx->idx * 4)) & \ +- 0xfffc)) ++#define PPC_BCC_SHORT(cond, dest) \ ++ do { \ ++ long offset = (long)(dest) - (ctx->idx * 4); \ ++ if (offset < -0x8000 || offset > 0x7fff || offset & 0x3) { \ ++ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ ++ return -ERANGE; \ ++ } \ ++ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ ++ } while (0) ++ + /* Sign-extended 32-bit immediate load */ + #define PPC_LI32(d, i) do { \ + if ((int)(uintptr_t)(i) >= -32768 && \ +--- a/arch/powerpc/net/bpf_jit_comp64.c ++++ b/arch/powerpc/net/bpf_jit_comp64.c +@@ -239,7 +239,7 @@ static void bpf_jit_emit_func_call(u32 * + PPC_BLRL(); + } + +-static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) ++static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) + { + /* + * By now, the eBPF program has already setup parameters in r3, r4 and r5 +@@ -300,7 +300,9 @@ static void bpf_jit_emit_tail_call(u32 * + bpf_jit_emit_common_epilogue(image, ctx); + + PPC_BCTR(); ++ + /* out: */ ++ return 0; + } + + /* Assemble the body code between the prologue & epilogue */ +@@ -310,7 +312,7 @@ static int bpf_jit_build_body(struct bpf + { + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; +- int i; ++ int i, ret; + + /* Start of epilogue code - will only be valid 2nd pass onwards */ + u32 exit_addr = addrs[flen]; +@@ -938,7 +940,9 @@ common_load: + */ + case BPF_JMP | BPF_CALL | BPF_X: + ctx->seen |= SEEN_TAILCALL; +- bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); ++ ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); ++ if (ret < 0) ++ return ret; + break; + + default: diff --git a/queue-4.9/series b/queue-4.9/series index 48fd2cae444..6f6d32b1628 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -152,3 +152,7 @@ nfc-pn533-fix-double-free-when-pn533_fill_fragment_s.patch vsock-prevent-unnecessary-refcnt-inc-for-nonblocking.patch usb-chipidea-fix-interrupt-deadlock.patch arm-9156-1-drop-cc-option-fallbacks-for-architecture-selection.patch +powerpc-bpf-validate-branch-ranges.patch +powerpc-bpf-fix-bpf_sub-when-imm-0x80000000.patch +mm-oom-pagefault_out_of_memory-don-t-force-global-oom-for-dying-tasks.patch +mm-oom-do-not-trigger-out_of_memory-from-the-pf.patch