From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 15 Nov 2021 14:06:23 +0000 (+0100)
Subject: 5.10-stable patches
X-Git-Tag: v5.4.160~46
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bd011134fcaedba116a9b97a17d441c051748940;p=thirdparty%2Fkernel%2Fstable-queue.git

5.10-stable patches

added patches:
	9p-net-fix-missing-error-check-in-p9_check_errors.patch
	memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch
---

diff --git a/queue-5.10/9p-net-fix-missing-error-check-in-p9_check_errors.patch b/queue-5.10/9p-net-fix-missing-error-check-in-p9_check_errors.patch
new file mode 100644
index 00000000000..94061a49a23
--- /dev/null
+++ b/queue-5.10/9p-net-fix-missing-error-check-in-p9_check_errors.patch
@@ -0,0 +1,29 @@
+From 27eb4c3144f7a5ebef3c9a261d80cb3e1fa784dc Mon Sep 17 00:00:00 2001
+From: Dominique Martinet <asmadeus@codewreck.org>
+Date: Tue, 2 Nov 2021 19:47:47 +0900
+Subject: 9p/net: fix missing error check in p9_check_errors
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+commit 27eb4c3144f7a5ebef3c9a261d80cb3e1fa784dc upstream.
+
+Link: https://lkml.kernel.org/r/99338965-d36c-886e-cd0e-1d8fff2b4746@gmail.com
+Reported-by: syzbot+06472778c97ed94af66d@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/9p/client.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/9p/client.c
++++ b/net/9p/client.c
+@@ -538,6 +538,8 @@ static int p9_check_errors(struct p9_cli
+ 		kfree(ename);
+ 	} else {
+ 		err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
++		if (err)
++			goto out_err;
+ 		err = -ecode;
+ 
+ 		p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
diff --git a/queue-5.10/memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch b/queue-5.10/memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch
new file mode 100644
index 00000000000..6a8bc3dd0b9
--- /dev/null
+++ b/queue-5.10/memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch
@@ -0,0 +1,143 @@
+From a4ebf1b6ca1e011289677239a2a361fde4a88076 Mon Sep 17 00:00:00 2001
+From: Vasily Averin <vvs@virtuozzo.com>
+Date: Fri, 5 Nov 2021 13:38:09 -0700
+Subject: memcg: prohibit unconditional exceeding the limit of dying tasks
+
+From: Vasily Averin <vvs@virtuozzo.com>
+
+commit a4ebf1b6ca1e011289677239a2a361fde4a88076 upstream.
+
+Memory cgroup charging allows killed or exiting tasks to exceed the hard
+limit.  It is assumed that the amount of the memory charged by those
+tasks is bound and most of the memory will get released while the task
+is exiting.  This is resembling a heuristic for the global OOM situation
+when tasks get access to memory reserves.  There is no global memory
+shortage at the memcg level so the memcg heuristic is more relieved.
+
+The above assumption is overly optimistic though.  E.g.  vmalloc can
+scale to really large requests and the heuristic would allow that.  We
+used to have an early break in the vmalloc allocator for killed tasks
+but this has been reverted by commit b8c8a338f75e ("Revert "vmalloc:
+back off when the current task is killed"").  There are likely other
+similar code paths which do not check for fatal signals in an
+allocation&charge loop.  Also there are some kernel objects charged to a
+memcg which are not bound to a process life time.
+
+It has been observed that it is not really hard to trigger these
+bypasses and cause global OOM situation.
+
+One potential way to address these runaways would be to limit the amount
+of excess (similar to the global OOM with limited oom reserves).  This
+is certainly possible but it is not really clear how much of an excess
+is desirable and still protects from global OOMs as that would have to
+consider the overall memcg configuration.
+
+This patch is addressing the problem by removing the heuristic
+altogether.  Bypass is only allowed for requests which either cannot
+fail or where the failure is not desirable while excess should be still
+limited (e.g.  atomic requests).  Implementation wise a killed or dying
+task fails to charge if it has passed the OOM killer stage.  That should
+give all forms of reclaim chance to restore the limit before the failure
+(ENOMEM) and tell the caller to back off.
+
+In addition, this patch renames should_force_charge() helper to
+task_is_dying() because now its use is not associated witch forced
+charging.
+
+This patch depends on pagefault_out_of_memory() to not trigger
+out_of_memory(), because then a memcg failure can unwind to VM_FAULT_OOM
+and cause a global OOM killer.
+
+Link: https://lkml.kernel.org/r/8f5cebbb-06da-4902-91f0-6566fc4b4203@virtuozzo.com
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Suggested-by: Michal Hocko <mhocko@suse.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
+Cc: Roman Gushchin <guro@fb.com>
+Cc: Uladzislau Rezki <urezki@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c |   27 ++++++++-------------------
+ 1 file changed, 8 insertions(+), 19 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -230,7 +230,7 @@ enum res_type {
+ 	     iter != NULL;				\
+ 	     iter = mem_cgroup_iter(NULL, iter, NULL))
+ 
+-static inline bool should_force_charge(void)
++static inline bool task_is_dying(void)
+ {
+ 	return tsk_is_oom_victim(current) || fatal_signal_pending(current) ||
+ 		(current->flags & PF_EXITING);
+@@ -1729,7 +1729,7 @@ static bool mem_cgroup_out_of_memory(str
+ 	 * A few threads which were not waiting at mutex_lock_killable() can
+ 	 * fail to bail out. Therefore, check again after holding oom_lock.
+ 	 */
+-	ret = should_force_charge() || out_of_memory(&oc);
++	ret = task_is_dying() || out_of_memory(&oc);
+ 
+ unlock:
+ 	mutex_unlock(&oom_lock);
+@@ -2683,6 +2683,7 @@ static int try_charge(struct mem_cgroup
+ 	struct page_counter *counter;
+ 	enum oom_status oom_status;
+ 	unsigned long nr_reclaimed;
++	bool passed_oom = false;
+ 	bool may_swap = true;
+ 	bool drained = false;
+ 	unsigned long pflags;
+@@ -2720,15 +2721,6 @@ retry:
+ 		goto force;
+ 
+ 	/*
+-	 * Unlike in global OOM situations, memcg is not in a physical
+-	 * memory shortage.  Allow dying and OOM-killed tasks to
+-	 * bypass the last charges so that they can exit quickly and
+-	 * free their memory.
+-	 */
+-	if (unlikely(should_force_charge()))
+-		goto force;
+-
+-	/*
+ 	 * Prevent unbounded recursion when reclaim operations need to
+ 	 * allocate memory. This might exceed the limits temporarily,
+ 	 * but we prefer facilitating memory reclaim and getting back
+@@ -2788,8 +2780,9 @@ retry:
+ 	if (gfp_mask & __GFP_NOFAIL)
+ 		goto force;
+ 
+-	if (fatal_signal_pending(current))
+-		goto force;
++	/* Avoid endless loop for tasks bypassed by the oom killer */
++	if (passed_oom && task_is_dying())
++		goto nomem;
+ 
+ 	/*
+ 	 * keep retrying as long as the memcg oom killer is able to make
+@@ -2798,14 +2791,10 @@ retry:
+ 	 */
+ 	oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
+ 		       get_order(nr_pages * PAGE_SIZE));
+-	switch (oom_status) {
+-	case OOM_SUCCESS:
++	if (oom_status == OOM_SUCCESS) {
++		passed_oom = true;
+ 		nr_retries = MAX_RECLAIM_RETRIES;
+ 		goto retry;
+-	case OOM_FAILED:
+-		goto force;
+-	default:
+-		goto nomem;
+ 	}
+ nomem:
+ 	if (!(gfp_mask & __GFP_NOFAIL))
diff --git a/queue-5.10/series b/queue-5.10/series
index c02ccf5a509..4f6bbe5b588 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -544,7 +544,6 @@ x86-mce-add-errata-workaround-for-skylake-skx37.patch
 posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch
 irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch
 f2fs-should-use-gfp_nofs-for-directory-inodes.patch
-block-hold-invalidate_lock-in-blkdiscard-ioctl.patch
-block-hold-invalidate_lock-in-blkzeroout-ioctl.patch
-block-hold-invalidate_lock-in-blkresetzone-ioctl.patch
 net-neigh-enable-state-migration-between-nud_permane.patch
+9p-net-fix-missing-error-check-in-p9_check_errors.patch
+memcg-prohibit-unconditional-exceeding-the-limit-of-dying-tasks.patch