+++ /dev/null
-From stable-owner@vger.kernel.org Mon Apr 17 19:21:21 2023
-From: Waiman Long <longman@redhat.com>
-Date: Mon, 17 Apr 2023 13:19:58 -0400
-Subject: cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
-To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org
-Cc: "Tejun Heo" <tj@kernel.org>, "Michal Koutný" <mkoutny@suse.com>, "Giuseppe Scrivano" <gscrivan@redhat.com>, "Waiman Long" <longman@redhat.com>
-Message-ID: <20230417171958.3389333-4-longman@redhat.com>
-
-From: Waiman Long <longman@redhat.com>
-
-commit eee87853794187f6adbe19533ed79c8b44b36a91 upstream.
-
-In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
-new tasks. It is too late to check that in cpuset_fork(). So we need
-to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
-pre-check it before we can allow attachment to a different cpuset.
-
-We also need to set the attach_in_progress flag to alert other code
-that a new task is going to be added to the cpuset.
-
-Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
-Suggested-by: Michal Koutný <mkoutny@suse.com>
-Signed-off-by: Waiman Long <longman@redhat.com>
-Cc: stable@vger.kernel.org # v5.7+
-Signed-off-by: Tejun Heo <tj@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/cgroup/cpuset.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 83 insertions(+), 5 deletions(-)
-
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -2151,6 +2151,18 @@ static int fmeter_getrate(struct fmeter
-
- static struct cpuset *cpuset_attach_old_cs;
-
-+/*
-+ * Check to see if a cpuset can accept a new task
-+ * For v1, cpus_allowed and mems_allowed can't be empty.
-+ */
-+static int cpuset_can_attach_check(struct cpuset *cs)
-+{
-+ if (!is_in_v2_mode() &&
-+ (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
-+ return -ENOSPC;
-+ return 0;
-+}
-+
- /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
- static int cpuset_can_attach(struct cgroup_taskset *tset)
- {
-@@ -2165,10 +2177,8 @@ static int cpuset_can_attach(struct cgro
-
- percpu_down_write(&cpuset_rwsem);
-
-- /* allow moving tasks into an empty cpuset if on default hierarchy */
-- ret = -ENOSPC;
-- if (!is_in_v2_mode() &&
-- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
-+ ret = cpuset_can_attach_check(cs);
-+ if (ret)
- goto out_unlock;
-
- cgroup_taskset_for_each(task, css, tset) {
-@@ -2185,7 +2195,6 @@ static int cpuset_can_attach(struct cgro
- * changes which zero cpus/mems_allowed.
- */
- cs->attach_in_progress++;
-- ret = 0;
- out_unlock:
- percpu_up_write(&cpuset_rwsem);
- return ret;
-@@ -2913,6 +2922,68 @@ static void cpuset_bind(struct cgroup_su
- }
-
- /*
-+ * In case the child is cloned into a cpuset different from its parent,
-+ * additional checks are done to see if the move is allowed.
-+ */
-+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
-+{
-+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
-+ bool same_cs;
-+ int ret;
-+
-+ rcu_read_lock();
-+ same_cs = (cs == task_cs(current));
-+ rcu_read_unlock();
-+
-+ if (same_cs)
-+ return 0;
-+
-+ lockdep_assert_held(&cgroup_mutex);
-+ percpu_down_write(&cpuset_rwsem);
-+
-+ /* Check to see if task is allowed in the cpuset */
-+ ret = cpuset_can_attach_check(cs);
-+ if (ret)
-+ goto out_unlock;
-+
-+ ret = task_can_attach(task, cs->effective_cpus);
-+ if (ret)
-+ goto out_unlock;
-+
-+ ret = security_task_setscheduler(task);
-+ if (ret)
-+ goto out_unlock;
-+
-+ /*
-+ * Mark attach is in progress. This makes validate_change() fail
-+ * changes which zero cpus/mems_allowed.
-+ */
-+ cs->attach_in_progress++;
-+out_unlock:
-+ percpu_up_write(&cpuset_rwsem);
-+ return ret;
-+}
-+
-+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
-+{
-+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
-+ bool same_cs;
-+
-+ rcu_read_lock();
-+ same_cs = (cs == task_cs(current));
-+ rcu_read_unlock();
-+
-+ if (same_cs)
-+ return;
-+
-+ percpu_down_write(&cpuset_rwsem);
-+ cs->attach_in_progress--;
-+ if (!cs->attach_in_progress)
-+ wake_up(&cpuset_attach_wq);
-+ percpu_up_write(&cpuset_rwsem);
-+}
-+
-+/*
- * Make sure the new task conform to the current state of its parent,
- * which could have been changed by cpuset just after it inherits the
- * state from the parent and before it sits on the cgroup's task list.
-@@ -2946,6 +3017,11 @@ static void cpuset_fork(struct task_stru
- else
- guarantee_online_cpus(cs, cpus_attach);
- cpuset_attach_task(cs, task);
-+
-+ cs->attach_in_progress--;
-+ if (!cs->attach_in_progress)
-+ wake_up(&cpuset_attach_wq);
-+
- percpu_up_write(&cpuset_rwsem);
- }
-
-@@ -2959,6 +3035,8 @@ struct cgroup_subsys cpuset_cgrp_subsys
- .attach = cpuset_attach,
- .post_attach = cpuset_post_attach,
- .bind = cpuset_bind,
-+ .can_fork = cpuset_can_fork,
-+ .cancel_fork = cpuset_cancel_fork,
- .fork = cpuset_fork,
- .legacy_cftypes = legacy_files,
- .dfl_cftypes = dfl_files,
+++ /dev/null
-From stable-owner@vger.kernel.org Mon Apr 17 19:21:31 2023
-From: Waiman Long <longman@redhat.com>
-Date: Mon, 17 Apr 2023 13:19:55 -0400
-Subject: cgroup/cpuset: Change references of cpuset_mutex to cpuset_rwsem
-To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org
-Cc: "Tejun Heo" <tj@kernel.org>, "Michal Koutný" <mkoutny@suse.com>, "Giuseppe Scrivano" <gscrivan@redhat.com>, "Waiman Long" <longman@redhat.com>
-Message-ID: <20230417171958.3389333-1-longman@redhat.com>
-
-From: Waiman Long <longman@redhat.com>
-
-Commit b94f9ac79a7395c2d6171cc753cc27942df0be73 upstream.
-
-Since commit 1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to
-percpu_rwsem"), cpuset_mutex has been replaced by cpuset_rwsem which is
-a percpu rwsem. However, the comments in kernel/cgroup/cpuset.c still
-reference cpuset_mutex which are now incorrect.
-
-Change all the references of cpuset_mutex to cpuset_rwsem.
-
-Fixes: 1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to percpu_rwsem")
-Signed-off-by: Waiman Long <longman@redhat.com>
-Signed-off-by: Tejun Heo <tj@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/cgroup/cpuset.c | 56 +++++++++++++++++++++++++------------------------
- 1 file changed, 29 insertions(+), 27 deletions(-)
-
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -299,17 +299,19 @@ static struct cpuset top_cpuset = {
- if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
-
- /*
-- * There are two global locks guarding cpuset structures - cpuset_mutex and
-+ * There are two global locks guarding cpuset structures - cpuset_rwsem and
- * callback_lock. We also require taking task_lock() when dereferencing a
- * task's cpuset pointer. See "The task_lock() exception", at the end of this
-- * comment.
-+ * comment. The cpuset code uses only cpuset_rwsem write lock. Other
-+ * kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to
-+ * prevent change to cpuset structures.
- *
- * A task must hold both locks to modify cpusets. If a task holds
-- * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
-+ * cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it
- * is the only task able to also acquire callback_lock and be able to
- * modify cpusets. It can perform various checks on the cpuset structure
- * first, knowing nothing will change. It can also allocate memory while
-- * just holding cpuset_mutex. While it is performing these checks, various
-+ * just holding cpuset_rwsem. While it is performing these checks, various
- * callback routines can briefly acquire callback_lock to query cpusets.
- * Once it is ready to make the changes, it takes callback_lock, blocking
- * everyone else.
-@@ -380,7 +382,7 @@ static inline bool is_in_v2_mode(void)
- * One way or another, we guarantee to return some non-empty subset
- * of cpu_online_mask.
- *
-- * Call with callback_lock or cpuset_mutex held.
-+ * Call with callback_lock or cpuset_rwsem held.
- */
- static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
- {
-@@ -410,7 +412,7 @@ static void guarantee_online_cpus(struct
- * One way or another, we guarantee to return some non-empty subset
- * of node_states[N_MEMORY].
- *
-- * Call with callback_lock or cpuset_mutex held.
-+ * Call with callback_lock or cpuset_rwsem held.
- */
- static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
- {
-@@ -422,7 +424,7 @@ static void guarantee_online_mems(struct
- /*
- * update task's spread flag if cpuset's page/slab spread flag is set
- *
-- * Call with callback_lock or cpuset_mutex held.
-+ * Call with callback_lock or cpuset_rwsem held.
- */
- static void cpuset_update_task_spread_flag(struct cpuset *cs,
- struct task_struct *tsk)
-@@ -443,7 +445,7 @@ static void cpuset_update_task_spread_fl
- *
- * One cpuset is a subset of another if all its allowed CPUs and
- * Memory Nodes are a subset of the other, and its exclusive flags
-- * are only set if the other's are set. Call holding cpuset_mutex.
-+ * are only set if the other's are set. Call holding cpuset_rwsem.
- */
-
- static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
-@@ -552,7 +554,7 @@ static inline void free_cpuset(struct cp
- * If we replaced the flag and mask values of the current cpuset
- * (cur) with those values in the trial cpuset (trial), would
- * our various subset and exclusive rules still be valid? Presumes
-- * cpuset_mutex held.
-+ * cpuset_rwsem held.
- *
- * 'cur' is the address of an actual, in-use cpuset. Operations
- * such as list traversal that depend on the actual address of the
-@@ -675,7 +677,7 @@ static void update_domain_attr_tree(stru
- rcu_read_unlock();
- }
-
--/* Must be called with cpuset_mutex held. */
-+/* Must be called with cpuset_rwsem held. */
- static inline int nr_cpusets(void)
- {
- /* jump label reference count + the top-level cpuset */
-@@ -701,7 +703,7 @@ static inline int nr_cpusets(void)
- * domains when operating in the severe memory shortage situations
- * that could cause allocation failures below.
- *
-- * Must be called with cpuset_mutex held.
-+ * Must be called with cpuset_rwsem held.
- *
- * The three key local variables below are:
- * cp - cpuset pointer, used (together with pos_css) to perform a
-@@ -980,7 +982,7 @@ partition_and_rebuild_sched_domains(int
- * 'cpus' is removed, then call this routine to rebuild the
- * scheduler's dynamic sched domains.
- *
-- * Call with cpuset_mutex held. Takes get_online_cpus().
-+ * Call with cpuset_rwsem held. Takes get_online_cpus().
- */
- static void rebuild_sched_domains_locked(void)
- {
-@@ -1053,7 +1055,7 @@ void rebuild_sched_domains(void)
- * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
- *
- * Iterate through each task of @cs updating its cpus_allowed to the
-- * effective cpuset's. As this function is called with cpuset_mutex held,
-+ * effective cpuset's. As this function is called with cpuset_rwsem held,
- * cpuset membership stays stable.
- */
- static void update_tasks_cpumask(struct cpuset *cs)
-@@ -1328,7 +1330,7 @@ static int update_parent_subparts_cpumas
- *
- * On legacy hierachy, effective_cpus will be the same with cpu_allowed.
- *
-- * Called with cpuset_mutex held
-+ * Called with cpuset_rwsem held
- */
- static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
- {
-@@ -1688,12 +1690,12 @@ static void *cpuset_being_rebound;
- * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
- *
- * Iterate through each task of @cs updating its mems_allowed to the
-- * effective cpuset's. As this function is called with cpuset_mutex held,
-+ * effective cpuset's. As this function is called with cpuset_rwsem held,
- * cpuset membership stays stable.
- */
- static void update_tasks_nodemask(struct cpuset *cs)
- {
-- static nodemask_t newmems; /* protected by cpuset_mutex */
-+ static nodemask_t newmems; /* protected by cpuset_rwsem */
- struct css_task_iter it;
- struct task_struct *task;
-
-@@ -1706,7 +1708,7 @@ static void update_tasks_nodemask(struct
- * take while holding tasklist_lock. Forks can happen - the
- * mpol_dup() cpuset_being_rebound check will catch such forks,
- * and rebind their vma mempolicies too. Because we still hold
-- * the global cpuset_mutex, we know that no other rebind effort
-+ * the global cpuset_rwsem, we know that no other rebind effort
- * will be contending for the global variable cpuset_being_rebound.
- * It's ok if we rebind the same mm twice; mpol_rebind_mm()
- * is idempotent. Also migrate pages in each mm to new nodes.
-@@ -1752,7 +1754,7 @@ static void update_tasks_nodemask(struct
- *
- * On legacy hiearchy, effective_mems will be the same with mems_allowed.
- *
-- * Called with cpuset_mutex held
-+ * Called with cpuset_rwsem held
- */
- static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
- {
-@@ -1805,7 +1807,7 @@ static void update_nodemasks_hier(struct
- * mempolicies and if the cpuset is marked 'memory_migrate',
- * migrate the tasks pages to the new memory.
- *
-- * Call with cpuset_mutex held. May take callback_lock during call.
-+ * Call with cpuset_rwsem held. May take callback_lock during call.
- * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
- * lock each such tasks mm->mmap_lock, scan its vma's and rebind
- * their mempolicies to the cpusets new mems_allowed.
-@@ -1895,7 +1897,7 @@ static int update_relax_domain_level(str
- * @cs: the cpuset in which each task's spread flags needs to be changed
- *
- * Iterate through each task of @cs updating its spread flags. As this
-- * function is called with cpuset_mutex held, cpuset membership stays
-+ * function is called with cpuset_rwsem held, cpuset membership stays
- * stable.
- */
- static void update_tasks_flags(struct cpuset *cs)
-@@ -1915,7 +1917,7 @@ static void update_tasks_flags(struct cp
- * cs: the cpuset to update
- * turning_on: whether the flag is being set or cleared
- *
-- * Call with cpuset_mutex held.
-+ * Call with cpuset_rwsem held.
- */
-
- static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
-@@ -1964,7 +1966,7 @@ out:
- * cs: the cpuset to update
- * new_prs: new partition root state
- *
-- * Call with cpuset_mutex held.
-+ * Call with cpuset_rwsem held.
- */
- static int update_prstate(struct cpuset *cs, int new_prs)
- {
-@@ -2145,7 +2147,7 @@ static int fmeter_getrate(struct fmeter
-
- static struct cpuset *cpuset_attach_old_cs;
-
--/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
-+/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
- static int cpuset_can_attach(struct cgroup_taskset *tset)
- {
- struct cgroup_subsys_state *css;
-@@ -2201,7 +2203,7 @@ static void cpuset_cancel_attach(struct
- }
-
- /*
-- * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach()
-+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
- * but we can't allocate it dynamically there. Define it global and
- * allocate from cpuset_init().
- */
-@@ -2209,7 +2211,7 @@ static cpumask_var_t cpus_attach;
-
- static void cpuset_attach(struct cgroup_taskset *tset)
- {
-- /* static buf protected by cpuset_mutex */
-+ /* static buf protected by cpuset_rwsem */
- static nodemask_t cpuset_attach_nodemask_to;
- struct task_struct *task;
- struct task_struct *leader;
-@@ -2402,7 +2404,7 @@ static ssize_t cpuset_write_resmask(stru
- * operation like this one can lead to a deadlock through kernfs
- * active_ref protection. Let's break the protection. Losing the
- * protection is okay as we check whether @cs is online after
-- * grabbing cpuset_mutex anyway. This only happens on the legacy
-+ * grabbing cpuset_rwsem anyway. This only happens on the legacy
- * hierarchies.
- */
- css_get(&cs->css);
-@@ -3641,7 +3643,7 @@ void __cpuset_memory_pressure_bump(void)
- * - Used for /proc/<pid>/cpuset.
- * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
- * doesn't really matter if tsk->cpuset changes after we read it,
-- * and we take cpuset_mutex, keeping cpuset_attach() from changing it
-+ * and we take cpuset_rwsem, keeping cpuset_attach() from changing it
- * anyway.
- */
- int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+++ /dev/null
-From stable-owner@vger.kernel.org Mon Apr 17 19:21:27 2023
-From: Waiman Long <longman@redhat.com>
-Date: Mon, 17 Apr 2023 13:19:57 -0400
-Subject: cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly
-To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org
-Cc: "Tejun Heo" <tj@kernel.org>, "Michal Koutný" <mkoutny@suse.com>, "Giuseppe Scrivano" <gscrivan@redhat.com>, "Waiman Long" <longman@redhat.com>
-Message-ID: <20230417171958.3389333-3-longman@redhat.com>
-
-From: Waiman Long <longman@redhat.com>
-
-commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c upstream.
-
-By default, the clone(2) syscall spawn a child process into the same
-cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
-introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
-into cgroups"), the child will be spawned into a different cgroup which
-is somewhat similar to writing the child's tid into "cgroup.threads".
-
-The current cpuset_fork() method does not properly handle the
-CLONE_INTO_CGROUP case where the cpuset of the child may be different
-from that of its parent. Update the cpuset_fork() method to treat the
-CLONE_INTO_CGROUP case similar to cpuset_attach().
-
-Since the newly cloned task has not been running yet, its actual
-memory usage isn't known. So it is not necessary to make change to mm
-in cpuset_fork().
-
-Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
-Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
-Signed-off-by: Waiman Long <longman@redhat.com>
-Cc: stable@vger.kernel.org # v5.7+
-Signed-off-by: Tejun Heo <tj@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/cgroup/cpuset.c | 60 +++++++++++++++++++++++++++++++++++--------------
- 1 file changed, 44 insertions(+), 16 deletions(-)
-
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -2207,16 +2207,29 @@ static void cpuset_cancel_attach(struct
- }
-
- /*
-- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
-+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
- * but we can't allocate it dynamically there. Define it global and
- * allocate from cpuset_init().
- */
- static cpumask_var_t cpus_attach;
-+static nodemask_t cpuset_attach_nodemask_to;
-+
-+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
-+{
-+ percpu_rwsem_assert_held(&cpuset_rwsem);
-+
-+ /*
-+ * can_attach beforehand should guarantee that this doesn't
-+ * fail. TODO: have a better way to handle failure here
-+ */
-+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-+
-+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-+ cpuset_update_task_spread_flags(cs, task);
-+}
-
- static void cpuset_attach(struct cgroup_taskset *tset)
- {
-- /* static buf protected by cpuset_rwsem */
-- static nodemask_t cpuset_attach_nodemask_to;
- struct task_struct *task;
- struct task_struct *leader;
- struct cgroup_subsys_state *css;
-@@ -2237,16 +2250,8 @@ static void cpuset_attach(struct cgroup_
-
- guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
-
-- cgroup_taskset_for_each(task, css, tset) {
-- /*
-- * can_attach beforehand should guarantee that this doesn't
-- * fail. TODO: have a better way to handle failure here
-- */
-- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
--
-- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-- cpuset_update_task_spread_flags(cs, task);
-- }
-+ cgroup_taskset_for_each(task, css, tset)
-+ cpuset_attach_task(cs, task);
-
- /*
- * Change mm for all threadgroup leaders. This is expensive and may
-@@ -2914,11 +2919,34 @@ static void cpuset_bind(struct cgroup_su
- */
- static void cpuset_fork(struct task_struct *task)
- {
-- if (task_css_is_root(task, cpuset_cgrp_id))
-+ struct cpuset *cs;
-+ bool same_cs;
-+
-+ rcu_read_lock();
-+ cs = task_cs(task);
-+ same_cs = (cs == task_cs(current));
-+ rcu_read_unlock();
-+
-+ if (same_cs) {
-+ if (cs == &top_cpuset)
-+ return;
-+
-+ set_cpus_allowed_ptr(task, current->cpus_ptr);
-+ task->mems_allowed = current->mems_allowed;
- return;
-+ }
-+
-+ /* CLONE_INTO_CGROUP */
-+ percpu_down_write(&cpuset_rwsem);
-+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
-
-- set_cpus_allowed_ptr(task, current->cpus_ptr);
-- task->mems_allowed = current->mems_allowed;
-+ /* prepare for attach */
-+ if (cs == &top_cpuset)
-+ cpumask_copy(cpus_attach, cpu_possible_mask);
-+ else
-+ guarantee_online_cpus(cs, cpus_attach);
-+ cpuset_attach_task(cs, task);
-+ percpu_up_write(&cpuset_rwsem);
- }
-
- struct cgroup_subsys cpuset_cgrp_subsys = {
+++ /dev/null
-From stable-owner@vger.kernel.org Mon Apr 17 19:21:24 2023
-From: Waiman Long <longman@redhat.com>
-Date: Mon, 17 Apr 2023 13:19:56 -0400
-Subject: cgroup/cpuset: Skip spread flags update on v2
-To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, stable@vger.kernel.org
-Cc: "Tejun Heo" <tj@kernel.org>, "Michal Koutný" <mkoutny@suse.com>, "Giuseppe Scrivano" <gscrivan@redhat.com>, "Waiman Long" <longman@redhat.com>
-Message-ID: <20230417171958.3389333-2-longman@redhat.com>
-
-From: Waiman Long <longman@redhat.com>
-
-commit 18f9a4d47527772515ad6cbdac796422566e6440 upstream.
-
-Cpuset v2 has no spread flags to set. So we can skip spread
-flags update if cpuset v2 is being used. Also change the name to
-cpuset_update_task_spread_flags() to indicate that there are multiple
-spread flags.
-
-Signed-off-by: Waiman Long <longman@redhat.com>
-Signed-off-by: Tejun Heo <tj@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/cgroup/cpuset.c | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -424,11 +424,15 @@ static void guarantee_online_mems(struct
- /*
- * update task's spread flag if cpuset's page/slab spread flag is set
- *
-- * Call with callback_lock or cpuset_rwsem held.
-+ * Call with callback_lock or cpuset_rwsem held. The check can be skipped
-+ * if on default hierarchy.
- */
--static void cpuset_update_task_spread_flag(struct cpuset *cs,
-+static void cpuset_update_task_spread_flags(struct cpuset *cs,
- struct task_struct *tsk)
- {
-+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
-+ return;
-+
- if (is_spread_page(cs))
- task_set_spread_page(tsk);
- else
-@@ -1907,7 +1911,7 @@ static void update_tasks_flags(struct cp
-
- css_task_iter_start(&cs->css, 0, &it);
- while ((task = css_task_iter_next(&it)))
-- cpuset_update_task_spread_flag(cs, task);
-+ cpuset_update_task_spread_flags(cs, task);
- css_task_iter_end(&it);
- }
-
-@@ -2241,7 +2245,7 @@ static void cpuset_attach(struct cgroup_
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-- cpuset_update_task_spread_flag(cs, task);
-+ cpuset_update_task_spread_flags(cs, task);
- }
-
- /*
sched-fair-fix-imbalance-overflow.patch
x86-rtc-remove-__init-for-runtime-functions.patch
i2c-ocores-generate-stop-condition-after-timeout-in-.patch
-cgroup-cpuset-change-references-of-cpuset_mutex-to-cpuset_rwsem.patch
-cgroup-cpuset-skip-spread-flags-update-on-v2.patch
-cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgroup-properly.patch
-cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch
watchdog-sbsa_wdog-make-sure-the-timeout-programming-is-within-the-limits.patch
coresight-etm4-fix-for-loop-drvdata-nr_addr_cmp-range-bug.patch
kbuild-check-the-minimum-assembler-version-in-kconfig.patch
+++ /dev/null
-From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001
-From: Waiman Long <longman@redhat.com>
-Date: Tue, 11 Apr 2023 09:35:59 -0400
-Subject: cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Waiman Long <longman@redhat.com>
-
-commit eee87853794187f6adbe19533ed79c8b44b36a91 upstream.
-
-In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
-new tasks. It is too late to check that in cpuset_fork(). So we need
-to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
-pre-check it before we can allow attachment to a different cpuset.
-
-We also need to set the attach_in_progress flag to alert other code
-that a new task is going to be added to the cpuset.
-
-Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
-Suggested-by: Michal Koutný <mkoutny@suse.com>
-Signed-off-by: Waiman Long <longman@redhat.com>
-Cc: stable@vger.kernel.org # v5.7+
-Signed-off-by: Tejun Heo <tj@kernel.org>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- kernel/cgroup/cpuset.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++---
- 1 file changed, 83 insertions(+), 5 deletions(-)
-
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -2186,6 +2186,18 @@ static int fmeter_getrate(struct fmeter
-
- static struct cpuset *cpuset_attach_old_cs;
-
-+/*
-+ * Check to see if a cpuset can accept a new task
-+ * For v1, cpus_allowed and mems_allowed can't be empty.
-+ */
-+static int cpuset_can_attach_check(struct cpuset *cs)
-+{
-+ if (!is_in_v2_mode() &&
-+ (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
-+ return -ENOSPC;
-+ return 0;
-+}
-+
- /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
- static int cpuset_can_attach(struct cgroup_taskset *tset)
- {
-@@ -2200,10 +2212,8 @@ static int cpuset_can_attach(struct cgro
-
- percpu_down_write(&cpuset_rwsem);
-
-- /* allow moving tasks into an empty cpuset if on default hierarchy */
-- ret = -ENOSPC;
-- if (!is_in_v2_mode() &&
-- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
-+ ret = cpuset_can_attach_check(cs);
-+ if (ret)
- goto out_unlock;
-
- cgroup_taskset_for_each(task, css, tset) {
-@@ -2220,7 +2230,6 @@ static int cpuset_can_attach(struct cgro
- * changes which zero cpus/mems_allowed.
- */
- cs->attach_in_progress++;
-- ret = 0;
- out_unlock:
- percpu_up_write(&cpuset_rwsem);
- return ret;
-@@ -2951,6 +2960,68 @@ static void cpuset_bind(struct cgroup_su
- }
-
- /*
-+ * In case the child is cloned into a cpuset different from its parent,
-+ * additional checks are done to see if the move is allowed.
-+ */
-+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
-+{
-+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
-+ bool same_cs;
-+ int ret;
-+
-+ rcu_read_lock();
-+ same_cs = (cs == task_cs(current));
-+ rcu_read_unlock();
-+
-+ if (same_cs)
-+ return 0;
-+
-+ lockdep_assert_held(&cgroup_mutex);
-+ percpu_down_write(&cpuset_rwsem);
-+
-+ /* Check to see if task is allowed in the cpuset */
-+ ret = cpuset_can_attach_check(cs);
-+ if (ret)
-+ goto out_unlock;
-+
-+ ret = task_can_attach(task, cs->effective_cpus);
-+ if (ret)
-+ goto out_unlock;
-+
-+ ret = security_task_setscheduler(task);
-+ if (ret)
-+ goto out_unlock;
-+
-+ /*
-+ * Mark attach is in progress. This makes validate_change() fail
-+ * changes which zero cpus/mems_allowed.
-+ */
-+ cs->attach_in_progress++;
-+out_unlock:
-+ percpu_up_write(&cpuset_rwsem);
-+ return ret;
-+}
-+
-+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
-+{
-+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
-+ bool same_cs;
-+
-+ rcu_read_lock();
-+ same_cs = (cs == task_cs(current));
-+ rcu_read_unlock();
-+
-+ if (same_cs)
-+ return;
-+
-+ percpu_down_write(&cpuset_rwsem);
-+ cs->attach_in_progress--;
-+ if (!cs->attach_in_progress)
-+ wake_up(&cpuset_attach_wq);
-+ percpu_up_write(&cpuset_rwsem);
-+}
-+
-+/*
- * Make sure the new task conform to the current state of its parent,
- * which could have been changed by cpuset just after it inherits the
- * state from the parent and before it sits on the cgroup's task list.
-@@ -2978,6 +3049,11 @@ static void cpuset_fork(struct task_stru
- percpu_down_write(&cpuset_rwsem);
- guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cpuset_attach_task(cs, task);
-+
-+ cs->attach_in_progress--;
-+ if (!cs->attach_in_progress)
-+ wake_up(&cpuset_attach_wq);
-+
- percpu_up_write(&cpuset_rwsem);
- }
-
-@@ -2991,6 +3067,8 @@ struct cgroup_subsys cpuset_cgrp_subsys
- .attach = cpuset_attach,
- .post_attach = cpuset_post_attach,
- .bind = cpuset_bind,
-+ .can_fork = cpuset_can_fork,
-+ .cancel_fork = cpuset_cancel_fork,
- .fork = cpuset_fork,
- .legacy_cftypes = legacy_files,
- .dfl_cftypes = dfl_files,
+++ /dev/null
-From cf2b50f99e27a18051e57b2c7beebebdc7036e46 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Tue, 11 Apr 2023 09:35:58 -0400
-Subject: cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly
-
-From: Waiman Long <longman@redhat.com>
-
-[ Upstream commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c ]
-
-By default, the clone(2) syscall spawn a child process into the same
-cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
-introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
-into cgroups"), the child will be spawned into a different cgroup which
-is somewhat similar to writing the child's tid into "cgroup.threads".
-
-The current cpuset_fork() method does not properly handle the
-CLONE_INTO_CGROUP case where the cpuset of the child may be different
-from that of its parent. Update the cpuset_fork() method to treat the
-CLONE_INTO_CGROUP case similar to cpuset_attach().
-
-Since the newly cloned task has not been running yet, its actual
-memory usage isn't known. So it is not necessary to make change to mm
-in cpuset_fork().
-
-Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
-Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
-Signed-off-by: Waiman Long <longman@redhat.com>
-Cc: stable@vger.kernel.org # v5.7+
-Signed-off-by: Tejun Heo <tj@kernel.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- kernel/cgroup/cpuset.c | 62 ++++++++++++++++++++++++++++--------------
- 1 file changed, 42 insertions(+), 20 deletions(-)
-
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 3d254498eb275..a6829e21b50c3 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -2242,16 +2242,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
- }
-
- /*
-- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
-+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
- * but we can't allocate it dynamically there. Define it global and
- * allocate from cpuset_init().
- */
- static cpumask_var_t cpus_attach;
-+static nodemask_t cpuset_attach_nodemask_to;
-+
-+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
-+{
-+ percpu_rwsem_assert_held(&cpuset_rwsem);
-+
-+ if (cs != &top_cpuset)
-+ guarantee_online_cpus(task, cpus_attach);
-+ else
-+ cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
-+ /*
-+ * can_attach beforehand should guarantee that this doesn't
-+ * fail. TODO: have a better way to handle failure here
-+ */
-+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-+
-+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-+ cpuset_update_task_spread_flags(cs, task);
-+}
-
- static void cpuset_attach(struct cgroup_taskset *tset)
- {
-- /* static buf protected by cpuset_rwsem */
-- static nodemask_t cpuset_attach_nodemask_to;
- struct task_struct *task;
- struct task_struct *leader;
- struct cgroup_subsys_state *css;
-@@ -2266,20 +2283,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
-
- guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
-
-- cgroup_taskset_for_each(task, css, tset) {
-- if (cs != &top_cpuset)
-- guarantee_online_cpus(task, cpus_attach);
-- else
-- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
-- /*
-- * can_attach beforehand should guarantee that this doesn't
-- * fail. TODO: have a better way to handle failure here
-- */
-- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
--
-- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-- cpuset_update_task_spread_flags(cs, task);
-- }
-+ cgroup_taskset_for_each(task, css, tset)
-+ cpuset_attach_task(cs, task);
-
- /*
- * Change mm for all threadgroup leaders. This is expensive and may
-@@ -2952,11 +2957,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
- */
- static void cpuset_fork(struct task_struct *task)
- {
-- if (task_css_is_root(task, cpuset_cgrp_id))
-+ struct cpuset *cs;
-+ bool same_cs;
-+
-+ rcu_read_lock();
-+ cs = task_cs(task);
-+ same_cs = (cs == task_cs(current));
-+ rcu_read_unlock();
-+
-+ if (same_cs) {
-+ if (cs == &top_cpuset)
-+ return;
-+
-+ set_cpus_allowed_ptr(task, current->cpus_ptr);
-+ task->mems_allowed = current->mems_allowed;
- return;
-+ }
-
-- set_cpus_allowed_ptr(task, current->cpus_ptr);
-- task->mems_allowed = current->mems_allowed;
-+ /* CLONE_INTO_CGROUP */
-+ percpu_down_write(&cpuset_rwsem);
-+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
-+ cpuset_attach_task(cs, task);
-+ percpu_up_write(&cpuset_rwsem);
- }
-
- struct cgroup_subsys cpuset_cgrp_subsys = {
---
-2.39.2
-
+++ /dev/null
-From 964498edc3a87e450ee8c127ce2700db134d67de Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Sat, 12 Nov 2022 17:19:38 -0500
-Subject: cgroup/cpuset: Skip spread flags update on v2
-
-From: Waiman Long <longman@redhat.com>
-
-[ Upstream commit 18f9a4d47527772515ad6cbdac796422566e6440 ]
-
-Cpuset v2 has no spread flags to set. So we can skip spread
-flags update if cpuset v2 is being used. Also change the name to
-cpuset_update_task_spread_flags() to indicate that there are multiple
-spread flags.
-
-Signed-off-by: Waiman Long <longman@redhat.com>
-Signed-off-by: Tejun Heo <tj@kernel.org>
-Stable-dep-of: 42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- kernel/cgroup/cpuset.c | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index fb895eaf3a7c3..3d254498eb275 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -450,11 +450,15 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
- /*
- * update task's spread flag if cpuset's page/slab spread flag is set
- *
-- * Call with callback_lock or cpuset_rwsem held.
-+ * Call with callback_lock or cpuset_rwsem held. The check can be skipped
-+ * if on default hierarchy.
- */
--static void cpuset_update_task_spread_flag(struct cpuset *cs,
-+static void cpuset_update_task_spread_flags(struct cpuset *cs,
- struct task_struct *tsk)
- {
-+ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
-+ return;
-+
- if (is_spread_page(cs))
- task_set_spread_page(tsk);
- else
-@@ -1941,7 +1945,7 @@ static void update_tasks_flags(struct cpuset *cs)
-
- css_task_iter_start(&cs->css, 0, &it);
- while ((task = css_task_iter_next(&it)))
-- cpuset_update_task_spread_flag(cs, task);
-+ cpuset_update_task_spread_flags(cs, task);
- css_task_iter_end(&it);
- }
-
-@@ -2274,7 +2278,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
-- cpuset_update_task_spread_flag(cs, task);
-+ cpuset_update_task_spread_flags(cs, task);
- }
-
- /*
---
-2.39.2
-
nvme-pci-add-nvme_quirk_bogus_nid-for-lexar-nm760.patch
nvme-pci-mark-lexar-nm760-as-ignore_dev_subnqn.patch
nvme-pci-add-nvme_quirk_bogus_nid-for-t-force-z330-s.patch
-cgroup-cpuset-skip-spread-flags-update-on-v2.patch
-cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgr.patch
-cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_fork-methods.patch
kexec-turn-all-kexec_mutex-acquisitions-into-trylocks.patch
panic-kexec-make-__crash_kexec-nmi-safe.patch
counter-fix-docum.-build-problems-after-filename-change.patch