From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 15 Mar 2023 08:30:00 +0000 (+0100)
Subject: 4.19-stable patches
X-Git-Tag: v4.14.310~43
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dc79c83cf5f1afaf5a585e804353a4582e9e505a;p=thirdparty%2Fkernel%2Fstable-queue.git

4.19-stable patches

added patches:
	cgroup-add-missing-cpus_read_lock-to-cgroup_attach_task_all.patch
	cgroup-cpuset-change-cpuset_rwsem-and-hotplug-lock-order.patch
	cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch
---

diff --git a/queue-4.19/cgroup-add-missing-cpus_read_lock-to-cgroup_attach_task_all.patch b/queue-4.19/cgroup-add-missing-cpus_read_lock-to-cgroup_attach_task_all.patch
new file mode 100644
index 00000000000..942d7071f54
--- /dev/null
+++ b/queue-4.19/cgroup-add-missing-cpus_read_lock-to-cgroup_attach_task_all.patch
@@ -0,0 +1,52 @@
+From 43626dade36fa74d3329046f4ae2d7fdefe401c6 Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Thu, 25 Aug 2022 17:38:38 +0900
+Subject: cgroup: Add missing cpus_read_lock() to cgroup_attach_task_all()
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit 43626dade36fa74d3329046f4ae2d7fdefe401c6 upstream.
+
+syzbot is hitting percpu_rwsem_assert_held(&cpu_hotplug_lock) warning at
+cpuset_attach() [1], for commit 4f7e7236435ca0ab ("cgroup: Fix
+threadgroup_rwsem <-> cpus_read_lock() deadlock") missed that
+cpuset_attach() is also called from cgroup_attach_task_all().
+Add cpus_read_lock() like what cgroup_procs_write_start() does.
+
+Link: https://syzkaller.appspot.com/bug?extid=29d3a3b4d86c8136ad9e [1]
+Reported-by: syzbot <syzbot+29d3a3b4d86c8136ad9e@syzkaller.appspotmail.com>
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Fixes: 4f7e7236435ca0ab ("cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock")
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup-v1.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -13,6 +13,7 @@
+ #include <linux/delayacct.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/cgroupstats.h>
++#include <linux/cpu.h>
+ 
+ #include <trace/events/cgroup.h>
+ 
+@@ -55,6 +56,7 @@ int cgroup_attach_task_all(struct task_s
+ 	int retval = 0;
+ 
+ 	mutex_lock(&cgroup_mutex);
++	get_online_cpus();
+ 	percpu_down_write(&cgroup_threadgroup_rwsem);
+ 	for_each_root(root) {
+ 		struct cgroup *from_cgrp;
+@@ -71,6 +73,7 @@ int cgroup_attach_task_all(struct task_s
+ 			break;
+ 	}
+ 	percpu_up_write(&cgroup_threadgroup_rwsem);
++	put_online_cpus();
+ 	mutex_unlock(&cgroup_mutex);
+ 
+ 	return retval;
diff --git a/queue-4.19/cgroup-cpuset-change-cpuset_rwsem-and-hotplug-lock-order.patch b/queue-4.19/cgroup-cpuset-change-cpuset_rwsem-and-hotplug-lock-order.patch
new file mode 100644
index 00000000000..2aee01bf849
--- /dev/null
+++ b/queue-4.19/cgroup-cpuset-change-cpuset_rwsem-and-hotplug-lock-order.patch
@@ -0,0 +1,184 @@
+From d74b27d63a8bebe2fe634944e4ebdc7b10db7a39 Mon Sep 17 00:00:00 2001
+From: Juri Lelli <juri.lelli@redhat.com>
+Date: Fri, 19 Jul 2019 15:59:58 +0200
+Subject: cgroup/cpuset: Change cpuset_rwsem and hotplug lock order
+
+From: Juri Lelli <juri.lelli@redhat.com>
+
+commit d74b27d63a8bebe2fe634944e4ebdc7b10db7a39 upstream.
+
+cpuset_rwsem is going to be acquired from sched_setscheduler() with a
+following patch. There are however paths (e.g., spawn_ksoftirqd) in
+which sched_scheduler() is eventually called while holding hotplug lock;
+this creates a dependecy between hotplug lock (to be always acquired
+first) and cpuset_rwsem (to be always acquired after hotplug lock).
+
+Fix paths which currently take the two locks in the wrong order (after
+a following patch is applied).
+
+Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: bristot@redhat.com
+Cc: claudio@evidence.eu.com
+Cc: lizefan@huawei.com
+Cc: longman@redhat.com
+Cc: luca.abeni@santannapisa.it
+Cc: mathieu.poirier@linaro.org
+Cc: rostedt@goodmis.org
+Cc: tj@kernel.org
+Cc: tommaso.cucinotta@santannapisa.it
+Link: https://lkml.kernel.org/r/20190719140000.31694-7-juri.lelli@redhat.com
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cpuset.h |    8 ++++----
+ kernel/cgroup/cpuset.c |   18 ++++++++++++++----
+ 2 files changed, 18 insertions(+), 8 deletions(-)
+
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void)
+ 
+ static inline void cpuset_inc(void)
+ {
+-	static_branch_inc(&cpusets_pre_enable_key);
+-	static_branch_inc(&cpusets_enabled_key);
++	static_branch_inc_cpuslocked(&cpusets_pre_enable_key);
++	static_branch_inc_cpuslocked(&cpusets_enabled_key);
+ }
+ 
+ static inline void cpuset_dec(void)
+ {
+-	static_branch_dec(&cpusets_enabled_key);
+-	static_branch_dec(&cpusets_pre_enable_key);
++	static_branch_dec_cpuslocked(&cpusets_enabled_key);
++	static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
+ }
+ 
+ extern int cpuset_init(void);
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -830,8 +830,8 @@ static void rebuild_sched_domains_locked
+ 	cpumask_var_t *doms;
+ 	int ndoms;
+ 
++	lockdep_assert_cpus_held();
+ 	lockdep_assert_held(&cpuset_mutex);
+-	get_online_cpus();
+ 
+ 	/*
+ 	 * We have raced with CPU hotplug. Don't do anything to avoid
+@@ -839,15 +839,13 @@ static void rebuild_sched_domains_locked
+ 	 * Anyways, hotplug work item will rebuild sched domains.
+ 	 */
+ 	if (!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
+-		goto out;
++		return;
+ 
+ 	/* Generate domain masks and attrs */
+ 	ndoms = generate_sched_domains(&doms, &attr);
+ 
+ 	/* Have scheduler rebuild the domains */
+ 	partition_sched_domains(ndoms, doms, attr);
+-out:
+-	put_online_cpus();
+ }
+ #else /* !CONFIG_SMP */
+ static void rebuild_sched_domains_locked(void)
+@@ -857,9 +855,11 @@ static void rebuild_sched_domains_locked
+ 
+ void rebuild_sched_domains(void)
+ {
++	get_online_cpus();
+ 	mutex_lock(&cpuset_mutex);
+ 	rebuild_sched_domains_locked();
+ 	mutex_unlock(&cpuset_mutex);
++	put_online_cpus();
+ }
+ 
+ /**
+@@ -1617,6 +1617,7 @@ static int cpuset_write_u64(struct cgrou
+ 	cpuset_filetype_t type = cft->private;
+ 	int retval = 0;
+ 
++	get_online_cpus();
+ 	mutex_lock(&cpuset_mutex);
+ 	if (!is_cpuset_online(cs)) {
+ 		retval = -ENODEV;
+@@ -1654,6 +1655,7 @@ static int cpuset_write_u64(struct cgrou
+ 	}
+ out_unlock:
+ 	mutex_unlock(&cpuset_mutex);
++	put_online_cpus();
+ 	return retval;
+ }
+ 
+@@ -1664,6 +1666,7 @@ static int cpuset_write_s64(struct cgrou
+ 	cpuset_filetype_t type = cft->private;
+ 	int retval = -ENODEV;
+ 
++	get_online_cpus();
+ 	mutex_lock(&cpuset_mutex);
+ 	if (!is_cpuset_online(cs))
+ 		goto out_unlock;
+@@ -1678,6 +1681,7 @@ static int cpuset_write_s64(struct cgrou
+ 	}
+ out_unlock:
+ 	mutex_unlock(&cpuset_mutex);
++	put_online_cpus();
+ 	return retval;
+ }
+ 
+@@ -1716,6 +1720,7 @@ static ssize_t cpuset_write_resmask(stru
+ 	kernfs_break_active_protection(of->kn);
+ 	flush_work(&cpuset_hotplug_work);
+ 
++	get_online_cpus();
+ 	mutex_lock(&cpuset_mutex);
+ 	if (!is_cpuset_online(cs))
+ 		goto out_unlock;
+@@ -1741,6 +1746,7 @@ static ssize_t cpuset_write_resmask(stru
+ 	free_trial_cpuset(trialcs);
+ out_unlock:
+ 	mutex_unlock(&cpuset_mutex);
++	put_online_cpus();
+ 	kernfs_unbreak_active_protection(of->kn);
+ 	css_put(&cs->css);
+ 	flush_workqueue(cpuset_migrate_mm_wq);
+@@ -1985,6 +1991,7 @@ static int cpuset_css_online(struct cgro
+ 	if (!parent)
+ 		return 0;
+ 
++	get_online_cpus();
+ 	mutex_lock(&cpuset_mutex);
+ 
+ 	set_bit(CS_ONLINE, &cs->flags);
+@@ -2035,6 +2042,7 @@ static int cpuset_css_online(struct cgro
+ 	spin_unlock_irq(&callback_lock);
+ out_unlock:
+ 	mutex_unlock(&cpuset_mutex);
++	put_online_cpus();
+ 	return 0;
+ }
+ 
+@@ -2048,6 +2056,7 @@ static void cpuset_css_offline(struct cg
+ {
+ 	struct cpuset *cs = css_cs(css);
+ 
++	get_online_cpus();
+ 	mutex_lock(&cpuset_mutex);
+ 
+ 	if (is_sched_load_balance(cs))
+@@ -2057,6 +2066,7 @@ static void cpuset_css_offline(struct cg
+ 	clear_bit(CS_ONLINE, &cs->flags);
+ 
+ 	mutex_unlock(&cpuset_mutex);
++	put_online_cpus();
+ }
+ 
+ static void cpuset_css_free(struct cgroup_subsys_state *css)
diff --git a/queue-4.19/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch b/queue-4.19/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch
new file mode 100644
index 00000000000..c0b0ed990fd
--- /dev/null
+++ b/queue-4.19/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch
@@ -0,0 +1,148 @@
+From 4f7e7236435ca0abe005c674ebd6892c6e83aeb3 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Mon, 15 Aug 2022 13:27:38 -1000
+Subject: cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock
+
+From: Tejun Heo <tj@kernel.org>
+
+commit 4f7e7236435ca0abe005c674ebd6892c6e83aeb3 upstream.
+
+Bringing up a CPU may involve creating and destroying tasks which requires
+read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
+cpus_read_lock(). However, cpuset's ->attach(), which may be called with
+thredagroup_rwsem write-locked, also wants to disable CPU hotplug and
+acquires cpus_read_lock(), leading to a deadlock.
+
+Fix it by guaranteeing that ->attach() is always called with CPU hotplug
+disabled and removing cpus_read_lock() call from cpuset_attach().
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reviewed-and-tested-by: Imran Khan <imran.f.khan@oracle.com>
+Reported-and-tested-by: Xuewen Yan <xuewen.yan@unisoc.com>
+Fixes: 05c7b7a92cc8 ("cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug")
+Cc: stable@vger.kernel.org # v5.17+
+Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup.c |   49 ++++++++++++++++++++++++++++++++++++++++++++-----
+ kernel/cgroup/cpuset.c |    7 +------
+ 2 files changed, 45 insertions(+), 11 deletions(-)
+
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2210,6 +2210,45 @@ int task_cgroup_path(struct task_struct
+ EXPORT_SYMBOL_GPL(task_cgroup_path);
+ 
+ /**
++ * cgroup_attach_lock - Lock for ->attach()
++ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
++ *
++ * cgroup migration sometimes needs to stabilize threadgroups against forks and
++ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach()
++ * implementations (e.g. cpuset), also need to disable CPU hotplug.
++ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can
++ * lead to deadlocks.
++ *
++ * Bringing up a CPU may involve creating and destroying tasks which requires
++ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
++ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while
++ * write-locking threadgroup_rwsem, the locking order is reversed and we end up
++ * waiting for an on-going CPU hotplug operation which in turn is waiting for
++ * the threadgroup_rwsem to be released to create new tasks. For more details:
++ *
++ *   http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu
++ *
++ * Resolve the situation by always acquiring cpus_read_lock() before optionally
++ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
++ * CPU hotplug is disabled on entry.
++ */
++static void cgroup_attach_lock(void)
++{
++	get_online_cpus();
++	percpu_down_write(&cgroup_threadgroup_rwsem);
++}
++
++/**
++ * cgroup_attach_unlock - Undo cgroup_attach_lock()
++ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
++ */
++static void cgroup_attach_unlock(void)
++{
++	percpu_up_write(&cgroup_threadgroup_rwsem);
++	put_online_cpus();
++}
++
++/**
+  * cgroup_migrate_add_task - add a migration target task to a migration context
+  * @task: target task
+  * @mgctx: target migration context
+@@ -2694,7 +2733,7 @@ struct task_struct *cgroup_procs_write_s
+ 	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+ 		return ERR_PTR(-EINVAL);
+ 
+-	percpu_down_write(&cgroup_threadgroup_rwsem);
++	cgroup_attach_lock();
+ 
+ 	rcu_read_lock();
+ 	if (pid) {
+@@ -2725,7 +2764,7 @@ struct task_struct *cgroup_procs_write_s
+ 	goto out_unlock_rcu;
+ 
+ out_unlock_threadgroup:
+-	percpu_up_write(&cgroup_threadgroup_rwsem);
++	cgroup_attach_unlock();
+ out_unlock_rcu:
+ 	rcu_read_unlock();
+ 	return tsk;
+@@ -2740,7 +2779,7 @@ void cgroup_procs_write_finish(struct ta
+ 	/* release reference from cgroup_procs_write_start() */
+ 	put_task_struct(task);
+ 
+-	percpu_up_write(&cgroup_threadgroup_rwsem);
++	cgroup_attach_unlock();
+ 	for_each_subsys(ss, ssid)
+ 		if (ss->post_attach)
+ 			ss->post_attach();
+@@ -2799,7 +2838,7 @@ static int cgroup_update_dfl_csses(struc
+ 
+ 	lockdep_assert_held(&cgroup_mutex);
+ 
+-	percpu_down_write(&cgroup_threadgroup_rwsem);
++	cgroup_attach_lock();
+ 
+ 	/* look up all csses currently attached to @cgrp's subtree */
+ 	spin_lock_irq(&css_set_lock);
+@@ -2830,7 +2869,7 @@ static int cgroup_update_dfl_csses(struc
+ 	ret = cgroup_migrate_execute(&mgctx);
+ out_finish:
+ 	cgroup_migrate_finish(&mgctx);
+-	percpu_up_write(&cgroup_threadgroup_rwsem);
++	cgroup_attach_unlock();
+ 	return ret;
+ }
+ 
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -1528,13 +1528,9 @@ static void cpuset_attach(struct cgroup_
+ 	cgroup_taskset_first(tset, &css);
+ 	cs = css_cs(css);
+ 
++	lockdep_assert_cpus_held();     /* see cgroup_attach_lock() */
+ 	mutex_lock(&cpuset_mutex);
+ 
+-	/*
+-	 * It should hold cpus lock because a cpu offline event can
+-	 * cause set_cpus_allowed_ptr() failed.
+-	 */
+-	get_online_cpus();
+ 	/* prepare for attach */
+ 	if (cs == &top_cpuset)
+ 		cpumask_copy(cpus_attach, cpu_possible_mask);
+@@ -1553,7 +1549,6 @@ static void cpuset_attach(struct cgroup_
+ 		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+ 		cpuset_update_task_spread_flag(cs, task);
+ 	}
+-       put_online_cpus();
+ 
+ 	/*
+ 	 * Change mm for all threadgroup leaders. This is expensive and may
diff --git a/queue-4.19/series b/queue-4.19/series
index fa86bb8b958..98fea79eed5 100644
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -37,3 +37,6 @@ pci-avoid-flr-for-solidrun-snet-dpu-rev-1.patch
 media-ov5640-fix-analogue-gain-control.patch
 tipc-improve-function-tipc_wait_for_cond.patch
 drm-i915-don-t-use-bar-mappings-for-ring-buffers-with-llc.patch
+cgroup-cpuset-change-cpuset_rwsem-and-hotplug-lock-order.patch
+cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch
+cgroup-add-missing-cpus_read_lock-to-cgroup_attach_task_all.patch