--- /dev/null
+From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Tue, 11 Apr 2023 09:35:59 -0400
+Subject: cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Waiman Long <longman@redhat.com>
+
+commit eee87853794187f6adbe19533ed79c8b44b36a91 upstream.
+
+In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
+new tasks. It is too late to check that in cpuset_fork(). So we need
+to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
+pre-check it before we can allow attachment to a different cpuset.
+
+We also need to set the attach_in_progress flag to alert other code
+that a new task is going to be added to the cpuset.
+
+Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
+Suggested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cpuset.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 83 insertions(+), 5 deletions(-)
+
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2186,6 +2186,18 @@ static int fmeter_getrate(struct fmeter
+
+ static struct cpuset *cpuset_attach_old_cs;
+
++/*
++ * Check to see if a cpuset can accept a new task
++ * For v1, cpus_allowed and mems_allowed can't be empty.
++ */
++static int cpuset_can_attach_check(struct cpuset *cs)
++{
++ if (!is_in_v2_mode() &&
++ (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
++ return -ENOSPC;
++ return 0;
++}
++
+ /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
+ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ {
+@@ -2200,10 +2212,8 @@ static int cpuset_can_attach(struct cgro
+
+ percpu_down_write(&cpuset_rwsem);
+
+- /* allow moving tasks into an empty cpuset if on default hierarchy */
+- ret = -ENOSPC;
+- if (!is_in_v2_mode() &&
+- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
++ ret = cpuset_can_attach_check(cs);
++ if (ret)
+ goto out_unlock;
+
+ cgroup_taskset_for_each(task, css, tset) {
+@@ -2220,7 +2230,6 @@ static int cpuset_can_attach(struct cgro
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+- ret = 0;
+ out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+@@ -2951,6 +2960,68 @@ static void cpuset_bind(struct cgroup_su
+ }
+
+ /*
++ * In case the child is cloned into a cpuset different from its parent,
++ * additional checks are done to see if the move is allowed.
++ */
++static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
++{
++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++ bool same_cs;
++ int ret;
++
++ rcu_read_lock();
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs)
++ return 0;
++
++ lockdep_assert_held(&cgroup_mutex);
++ percpu_down_write(&cpuset_rwsem);
++
++ /* Check to see if task is allowed in the cpuset */
++ ret = cpuset_can_attach_check(cs);
++ if (ret)
++ goto out_unlock;
++
++ ret = task_can_attach(task, cs->effective_cpus);
++ if (ret)
++ goto out_unlock;
++
++ ret = security_task_setscheduler(task);
++ if (ret)
++ goto out_unlock;
++
++ /*
++ * Mark attach is in progress. This makes validate_change() fail
++ * changes which zero cpus/mems_allowed.
++ */
++ cs->attach_in_progress++;
++out_unlock:
++ percpu_up_write(&cpuset_rwsem);
++ return ret;
++}
++
++static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
++{
++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++ bool same_cs;
++
++ rcu_read_lock();
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs)
++ return;
++
++ percpu_down_write(&cpuset_rwsem);
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
++ percpu_up_write(&cpuset_rwsem);
++}
++
++/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+@@ -2978,6 +3049,11 @@ static void cpuset_fork(struct task_stru
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
++
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
++
+ percpu_up_write(&cpuset_rwsem);
+ }
+
+@@ -2991,6 +3067,8 @@ struct cgroup_subsys cpuset_cgrp_subsys
+ .attach = cpuset_attach,
+ .post_attach = cpuset_post_attach,
+ .bind = cpuset_bind,
++ .can_fork = cpuset_can_fork,
++ .cancel_fork = cpuset_cancel_fork,
+ .fork = cpuset_fork,
+ .legacy_cftypes = legacy_files,
+ .dfl_cftypes = dfl_files,