From: Greg Kroah-Hartman Date: Mon, 15 Nov 2021 14:06:23 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v5.4.160~46 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bd011134fcaedba116a9b97a17d441c051748940;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: 9p-net-fix-missing-error-check-in-p9_check_errors.patch memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch --- diff --git a/queue-5.10/9p-net-fix-missing-error-check-in-p9_check_errors.patch b/queue-5.10/9p-net-fix-missing-error-check-in-p9_check_errors.patch new file mode 100644 index 00000000000..94061a49a23 --- /dev/null +++ b/queue-5.10/9p-net-fix-missing-error-check-in-p9_check_errors.patch @@ -0,0 +1,29 @@ +From 27eb4c3144f7a5ebef3c9a261d80cb3e1fa784dc Mon Sep 17 00:00:00 2001 +From: Dominique Martinet +Date: Tue, 2 Nov 2021 19:47:47 +0900 +Subject: 9p/net: fix missing error check in p9_check_errors + +From: Dominique Martinet + +commit 27eb4c3144f7a5ebef3c9a261d80cb3e1fa784dc upstream. + +Link: https://lkml.kernel.org/r/99338965-d36c-886e-cd0e-1d8fff2b4746@gmail.com +Reported-by: syzbot+06472778c97ed94af66d@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org +Signed-off-by: Dominique Martinet +Signed-off-by: Greg Kroah-Hartman +--- + net/9p/client.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/9p/client.c ++++ b/net/9p/client.c +@@ -538,6 +538,8 @@ static int p9_check_errors(struct p9_cli + kfree(ename); + } else { + err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode); ++ if (err) ++ goto out_err; + err = -ecode; + + p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); diff --git a/queue-5.10/memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch b/queue-5.10/memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch new file mode 100644 index 00000000000..6a8bc3dd0b9 --- /dev/null +++ b/queue-5.10/memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch @@ -0,0 +1,143 @@ +From a4ebf1b6ca1e011289677239a2a361fde4a88076 Mon Sep 17 00:00:00 2001 +From: Vasily Averin +Date: Fri, 5 Nov 2021 13:38:09 -0700 +Subject: memcg: prohibit unconditional exceeding the limit of dying tasks + +From: Vasily Averin + +commit a4ebf1b6ca1e011289677239a2a361fde4a88076 upstream. + +Memory cgroup charging allows killed or exiting tasks to exceed the hard +limit. It is assumed that the amount of the memory charged by those +tasks is bound and most of the memory will get released while the task +is exiting. This is resembling a heuristic for the global OOM situation +when tasks get access to memory reserves. There is no global memory +shortage at the memcg level so the memcg heuristic is more relieved. + +The above assumption is overly optimistic though. E.g. vmalloc can +scale to really large requests and the heuristic would allow that. We +used to have an early break in the vmalloc allocator for killed tasks +but this has been reverted by commit b8c8a338f75e ("Revert "vmalloc: +back off when the current task is killed""). There are likely other +similar code paths which do not check for fatal signals in an +allocation&charge loop. Also there are some kernel objects charged to a +memcg which are not bound to a process life time. + +It has been observed that it is not really hard to trigger these +bypasses and cause global OOM situation. + +One potential way to address these runaways would be to limit the amount +of excess (similar to the global OOM with limited oom reserves). This +is certainly possible but it is not really clear how much of an excess +is desirable and still protects from global OOMs as that would have to +consider the overall memcg configuration. + +This patch is addressing the problem by removing the heuristic +altogether. Bypass is only allowed for requests which either cannot +fail or where the failure is not desirable while excess should be still +limited (e.g. atomic requests). Implementation wise a killed or dying +task fails to charge if it has passed the OOM killer stage. That should +give all forms of reclaim chance to restore the limit before the failure +(ENOMEM) and tell the caller to back off. + +In addition, this patch renames should_force_charge() helper to +task_is_dying() because now its use is not associated witch forced +charging. + +This patch depends on pagefault_out_of_memory() to not trigger +out_of_memory(), because then a memcg failure can unwind to VM_FAULT_OOM +and cause a global OOM killer. + +Link: https://lkml.kernel.org/r/8f5cebbb-06da-4902-91f0-6566fc4b4203@virtuozzo.com +Signed-off-by: Vasily Averin +Suggested-by: Michal Hocko +Acked-by: Michal Hocko +Cc: Johannes Weiner +Cc: Vladimir Davydov +Cc: Roman Gushchin +Cc: Uladzislau Rezki +Cc: Vlastimil Babka +Cc: Shakeel Butt +Cc: Mel Gorman +Cc: Tetsuo Handa +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/memcontrol.c | 27 ++++++++------------------- + 1 file changed, 8 insertions(+), 19 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -230,7 +230,7 @@ enum res_type { + iter != NULL; \ + iter = mem_cgroup_iter(NULL, iter, NULL)) + +-static inline bool should_force_charge(void) ++static inline bool task_is_dying(void) + { + return tsk_is_oom_victim(current) || fatal_signal_pending(current) || + (current->flags & PF_EXITING); +@@ -1729,7 +1729,7 @@ static bool mem_cgroup_out_of_memory(str + * A few threads which were not waiting at mutex_lock_killable() can + * fail to bail out. Therefore, check again after holding oom_lock. + */ +- ret = should_force_charge() || out_of_memory(&oc); ++ ret = task_is_dying() || out_of_memory(&oc); + + unlock: + mutex_unlock(&oom_lock); +@@ -2683,6 +2683,7 @@ static int try_charge(struct mem_cgroup + struct page_counter *counter; + enum oom_status oom_status; + unsigned long nr_reclaimed; ++ bool passed_oom = false; + bool may_swap = true; + bool drained = false; + unsigned long pflags; +@@ -2720,15 +2721,6 @@ retry: + goto force; + + /* +- * Unlike in global OOM situations, memcg is not in a physical +- * memory shortage. Allow dying and OOM-killed tasks to +- * bypass the last charges so that they can exit quickly and +- * free their memory. +- */ +- if (unlikely(should_force_charge())) +- goto force; +- +- /* + * Prevent unbounded recursion when reclaim operations need to + * allocate memory. This might exceed the limits temporarily, + * but we prefer facilitating memory reclaim and getting back +@@ -2788,8 +2780,9 @@ retry: + if (gfp_mask & __GFP_NOFAIL) + goto force; + +- if (fatal_signal_pending(current)) +- goto force; ++ /* Avoid endless loop for tasks bypassed by the oom killer */ ++ if (passed_oom && task_is_dying()) ++ goto nomem; + + /* + * keep retrying as long as the memcg oom killer is able to make +@@ -2798,14 +2791,10 @@ retry: + */ + oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask, + get_order(nr_pages * PAGE_SIZE)); +- switch (oom_status) { +- case OOM_SUCCESS: ++ if (oom_status == OOM_SUCCESS) { ++ passed_oom = true; + nr_retries = MAX_RECLAIM_RETRIES; + goto retry; +- case OOM_FAILED: +- goto force; +- default: +- goto nomem; + } + nomem: + if (!(gfp_mask & __GFP_NOFAIL)) diff --git a/queue-5.10/series b/queue-5.10/series index c02ccf5a509..4f6bbe5b588 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -544,7 +544,6 @@ x86-mce-add-errata-workaround-for-skylake-skx37.patch posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch f2fs-should-use-gfp_nofs-for-directory-inodes.patch -block-hold-invalidate_lock-in-blkdiscard-ioctl.patch -block-hold-invalidate_lock-in-blkzeroout-ioctl.patch -block-hold-invalidate_lock-in-blkresetzone-ioctl.patch net-neigh-enable-state-migration-between-nud_permane.patch +9p-net-fix-missing-error-check-in-p9_check_errors.patch +memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch