Fixes for 5.4

author Sasha Levin <sashal@kernel.org>

Sat, 10 Sep 2022 17:15:17 +0000 (13:15 -0400)

committer Sasha Levin <sashal@kernel.org>

Sat, 10 Sep 2022 17:15:17 +0000 (13:15 -0400)
author Sasha Levin <sashal@kernel.org>
Sat, 10 Sep 2022 17:15:17 +0000 (13:15 -0400)
committer Sasha Levin <sashal@kernel.org>
Sat, 10 Sep 2022 17:15:17 +0000 (13:15 -0400)
diff --git a/queue-5.4/cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch b/queue-5.4/cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch

new file mode 100644 (file)

index 0000000..8233f40
--- /dev/null
+++ b/queue-5.4/cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch
@@ -0,0 +1,81 @@
+From 4ba7dde0fb3a0d459eaff4047a74f107945f2c60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 18:38:15 -1000
+Subject: cgroup: Elide write-locking threadgroup_rwsem when updating csses on
+ an empty subtree
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 671c11f0619e5ccb380bcf0f062f69ba95fc974a ]
+
+cgroup_update_dfl_csses() write-lock the threadgroup_rwsem as updating the
+csses can trigger process migrations. However, if the subtree doesn't
+contain any tasks, there aren't gonna be any cgroup migrations. This
+condition can be trivially detected by testing whether
+mgctx.preloaded_src_csets is empty. Elide write-locking threadgroup_rwsem if
+the subtree is empty.
+
+After this optimization, the usage pattern of creating a cgroup, enabling
+the necessary controllers, and then seeding it with CLONE_INTO_CGROUP and
+then removing the cgroup after it becomes empty doesn't need to write-lock
+threadgroup_rwsem at all.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cgroup.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index bc9ee9a18c1e8..c2af09b4bca62 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2985,12 +2985,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+       struct cgroup_subsys_state *d_css;
+       struct cgroup *dsct;
+       struct css_set *src_cset;
++      bool has_tasks;
+       int ret;
+ 
+       lockdep_assert_held(&cgroup_mutex);
+ 
+-      percpu_down_write(&cgroup_threadgroup_rwsem);
+-
+       /* look up all csses currently attached to @cgrp's subtree */
+       spin_lock_irq(&css_set_lock);
+       cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+@@ -3001,6 +3000,16 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+       }
+       spin_unlock_irq(&css_set_lock);
+ 
++      /*
++       * We need to write-lock threadgroup_rwsem while migrating tasks.
++       * However, if there are no source csets for @cgrp, changing its
++       * controllers isn't gonna produce any task migrations and the
++       * write-locking can be skipped safely.
++       */
++      has_tasks = !list_empty(&mgctx.preloaded_src_csets);
++      if (has_tasks)
++              percpu_down_write(&cgroup_threadgroup_rwsem);
++
+       /* NULL dst indicates self on default hierarchy */
+       ret = cgroup_migrate_prepare_dst(&mgctx);
+       if (ret)
+@@ -3020,7 +3029,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+       ret = cgroup_migrate_execute(&mgctx);
+ out_finish:
+       cgroup_migrate_finish(&mgctx);
+-      percpu_up_write(&cgroup_threadgroup_rwsem);
++      if (has_tasks)
++              percpu_up_write(&cgroup_threadgroup_rwsem);
+       return ret;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.4/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch b/queue-5.4/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch

new file mode 100644 (file)

index 0000000..db8192a
--- /dev/null
+++ b/queue-5.4/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch
@@ -0,0 +1,207 @@
+From 6585d87184ff7d1fda65590668dedced9ca2c929 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Aug 2022 13:27:38 -1000
+Subject: cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 4f7e7236435ca0abe005c674ebd6892c6e83aeb3 ]
+
+Bringing up a CPU may involve creating and destroying tasks which requires
+read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
+cpus_read_lock(). However, cpuset's ->attach(), which may be called with
+thredagroup_rwsem write-locked, also wants to disable CPU hotplug and
+acquires cpus_read_lock(), leading to a deadlock.
+
+Fix it by guaranteeing that ->attach() is always called with CPU hotplug
+disabled and removing cpus_read_lock() call from cpuset_attach().
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reviewed-and-tested-by: Imran Khan <imran.f.khan@oracle.com>
+Reported-and-tested-by: Xuewen Yan <xuewen.yan@unisoc.com>
+Fixes: 05c7b7a92cc8 ("cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug")
+Cc: stable@vger.kernel.org # v5.17+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cgroup.c | 77 +++++++++++++++++++++++++++++-------------
+ kernel/cgroup/cpuset.c |  3 +-
+ 2 files changed, 55 insertions(+), 25 deletions(-)
+
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index c2af09b4bca62..43f9bfedd890c 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2376,6 +2376,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+ }
+ EXPORT_SYMBOL_GPL(task_cgroup_path);
+ 
++/**
++ * cgroup_attach_lock - Lock for ->attach()
++ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
++ *
++ * cgroup migration sometimes needs to stabilize threadgroups against forks and
++ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach()
++ * implementations (e.g. cpuset), also need to disable CPU hotplug.
++ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can
++ * lead to deadlocks.
++ *
++ * Bringing up a CPU may involve creating and destroying tasks which requires
++ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
++ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while
++ * write-locking threadgroup_rwsem, the locking order is reversed and we end up
++ * waiting for an on-going CPU hotplug operation which in turn is waiting for
++ * the threadgroup_rwsem to be released to create new tasks. For more details:
++ *
++ *   http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu
++ *
++ * Resolve the situation by always acquiring cpus_read_lock() before optionally
++ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
++ * CPU hotplug is disabled on entry.
++ */
++static void cgroup_attach_lock(bool lock_threadgroup)
++{
++      cpus_read_lock();
++      if (lock_threadgroup)
++              percpu_down_write(&cgroup_threadgroup_rwsem);
++}
++
++/**
++ * cgroup_attach_unlock - Undo cgroup_attach_lock()
++ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
++ */
++static void cgroup_attach_unlock(bool lock_threadgroup)
++{
++      if (lock_threadgroup)
++              percpu_up_write(&cgroup_threadgroup_rwsem);
++      cpus_read_unlock();
++}
++
+ /**
+  * cgroup_migrate_add_task - add a migration target task to a migration context
+  * @task: target task
+@@ -2857,8 +2898,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+ }
+ 
+ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+-                                           bool *locked)
+-      __acquires(&cgroup_threadgroup_rwsem)
++                                           bool *threadgroup_locked)
+ {
+       struct task_struct *tsk;
+       pid_t pid;
+@@ -2875,12 +2915,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+        * Therefore, we can skip the global lock.
+        */
+       lockdep_assert_held(&cgroup_mutex);
+-      if (pid || threadgroup) {
+-              percpu_down_write(&cgroup_threadgroup_rwsem);
+-              *locked = true;
+-      } else {
+-              *locked = false;
+-      }
++      *threadgroup_locked = pid || threadgroup;
++      cgroup_attach_lock(*threadgroup_locked);
+ 
+       rcu_read_lock();
+       if (pid) {
+@@ -2911,17 +2947,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+       goto out_unlock_rcu;
+ 
+ out_unlock_threadgroup:
+-      if (*locked) {
+-              percpu_up_write(&cgroup_threadgroup_rwsem);
+-              *locked = false;
+-      }
++      cgroup_attach_unlock(*threadgroup_locked);
++      *threadgroup_locked = false;
+ out_unlock_rcu:
+       rcu_read_unlock();
+       return tsk;
+ }
+ 
+-void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+-      __releases(&cgroup_threadgroup_rwsem)
++void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked)
+ {
+       struct cgroup_subsys *ss;
+       int ssid;
+@@ -2929,8 +2962,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+       /* release reference from cgroup_procs_write_start() */
+       put_task_struct(task);
+ 
+-      if (locked)
+-              percpu_up_write(&cgroup_threadgroup_rwsem);
++      cgroup_attach_unlock(threadgroup_locked);
++
+       for_each_subsys(ss, ssid)
+               if (ss->post_attach)
+                       ss->post_attach();
+@@ -3007,8 +3040,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+        * write-locking can be skipped safely.
+        */
+       has_tasks = !list_empty(&mgctx.preloaded_src_csets);
+-      if (has_tasks)
+-              percpu_down_write(&cgroup_threadgroup_rwsem);
++      cgroup_attach_lock(has_tasks);
+ 
+       /* NULL dst indicates self on default hierarchy */
+       ret = cgroup_migrate_prepare_dst(&mgctx);
+@@ -3029,8 +3061,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+       ret = cgroup_migrate_execute(&mgctx);
+ out_finish:
+       cgroup_migrate_finish(&mgctx);
+-      if (has_tasks)
+-              percpu_up_write(&cgroup_threadgroup_rwsem);
++      cgroup_attach_unlock(has_tasks);
+       return ret;
+ }
+ 
+@@ -4859,13 +4890,13 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
+       struct task_struct *task;
+       const struct cred *saved_cred;
+       ssize_t ret;
+-      bool locked;
++      bool threadgroup_locked;
+ 
+       dst_cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!dst_cgrp)
+               return -ENODEV;
+ 
+-      task = cgroup_procs_write_start(buf, true, &locked);
++      task = cgroup_procs_write_start(buf, true, &threadgroup_locked);
+       ret = PTR_ERR_OR_ZERO(task);
+       if (ret)
+               goto out_unlock;
+@@ -4891,7 +4922,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
+       ret = cgroup_attach_task(dst_cgrp, task, true);
+ 
+ out_finish:
+-      cgroup_procs_write_finish(task, locked);
++      cgroup_procs_write_finish(task, threadgroup_locked);
+ out_unlock:
+       cgroup_kn_unlock(of->kn);
+ 
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index b02eca235ba3f..9ba94a9a67aa4 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2204,7 +2204,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+       cgroup_taskset_first(tset, &css);
+       cs = css_cs(css);
+ 
+-      cpus_read_lock();
++      lockdep_assert_cpus_held();     /* see cgroup_attach_lock() */
+       percpu_down_write(&cpuset_rwsem);
+ 
+       /* prepare for attach */
+@@ -2260,7 +2260,6 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+               wake_up(&cpuset_attach_wq);
+ 
+       percpu_up_write(&cpuset_rwsem);
+-      cpus_read_unlock();
+ }
+ 
+ /* The various types of files and directories in a cpuset file system */
+-- 
+2.35.1
+
diff --git a/queue-5.4/cgroup-optimize-single-thread-migration.patch b/queue-5.4/cgroup-optimize-single-thread-migration.patch

new file mode 100644 (file)

index 0000000..62ef76c
--- /dev/null
+++ b/queue-5.4/cgroup-optimize-single-thread-migration.patch
@@ -0,0 +1,216 @@
+From ff4c2c73ac0a2356f159aa4096720fa3a538ef12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2019 12:57:40 +0200
+Subject: cgroup: Optimize single thread migration
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Michal Koutný <mkoutny@suse.com>
+
+[ Upstream commit 9a3284fad42f66bb43629c6716709ff791aaa457 ]
+
+There are reports of users who use thread migrations between cgroups and
+they report performance drop after d59cfc09c32a ("sched, cgroup: replace
+signal_struct->group_rwsem with a global percpu_rwsem"). The effect is
+pronounced on machines with more CPUs.
+
+The migration is affected by forking noise happening in the background,
+after the mentioned commit a migrating thread must wait for all
+(forking) processes on the system, not only of its threadgroup.
+
+There are several places that need to synchronize with migration:
+       a) do_exit,
+       b) de_thread,
+       c) copy_process,
+       d) cgroup_update_dfl_csses,
+       e) parallel migration (cgroup_{proc,thread}s_write).
+
+In the case of self-migrating thread, we relax the synchronization on
+cgroup_threadgroup_rwsem to avoid the cost of waiting. d) and e) are
+excluded with cgroup_mutex, c) does not matter in case of single thread
+migration and the executing thread cannot exec(2) or exit(2) while it is
+writing into cgroup.threads. In case of do_exit because of signal
+delivery, we either exit before the migration or finish the migration
+(of not yet PF_EXITING thread) and die afterwards.
+
+This patch handles only the case of self-migration by writing "0" into
+cgroup.threads. For simplicity, we always take cgroup_threadgroup_rwsem
+with numeric PIDs.
+
+This change improves migration dependent workload performance similar
+to per-signal_struct state.
+
+Signed-off-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cgroup-internal.h |  5 +++--
+ kernel/cgroup/cgroup-v1.c       |  5 +++--
+ kernel/cgroup/cgroup.c          | 39 +++++++++++++++++++++++++--------
+ 3 files changed, 36 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
+index 236f290224aae..8dfb2526b3aa2 100644
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -250,9 +250,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
+ 
+ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+                      bool threadgroup);
+-struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
++struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
++                                           bool *locked)
+       __acquires(&cgroup_threadgroup_rwsem);
+-void cgroup_procs_write_finish(struct task_struct *task)
++void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+       __releases(&cgroup_threadgroup_rwsem);
+ 
+ void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
+diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
+index 117d70098cd49..aa7577b189e92 100644
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -498,12 +498,13 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
+       struct task_struct *task;
+       const struct cred *cred, *tcred;
+       ssize_t ret;
++      bool locked;
+ 
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENODEV;
+ 
+-      task = cgroup_procs_write_start(buf, threadgroup);
++      task = cgroup_procs_write_start(buf, threadgroup, &locked);
+       ret = PTR_ERR_OR_ZERO(task);
+       if (ret)
+               goto out_unlock;
+@@ -526,7 +527,7 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
+       ret = cgroup_attach_task(cgrp, task, threadgroup);
+ 
+ out_finish:
+-      cgroup_procs_write_finish(task);
++      cgroup_procs_write_finish(task, locked);
+ out_unlock:
+       cgroup_kn_unlock(of->kn);
+ 
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index 23f0db2900e4b..bc9ee9a18c1e8 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2856,7 +2856,8 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+       return ret;
+ }
+ 
+-struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
++struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
++                                           bool *locked)
+       __acquires(&cgroup_threadgroup_rwsem)
+ {
+       struct task_struct *tsk;
+@@ -2865,7 +2866,21 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
+       if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
+               return ERR_PTR(-EINVAL);
+ 
+-      percpu_down_write(&cgroup_threadgroup_rwsem);
++      /*
++       * If we migrate a single thread, we don't care about threadgroup
++       * stability. If the thread is `current`, it won't exit(2) under our
++       * hands or change PID through exec(2). We exclude
++       * cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write
++       * callers by cgroup_mutex.
++       * Therefore, we can skip the global lock.
++       */
++      lockdep_assert_held(&cgroup_mutex);
++      if (pid || threadgroup) {
++              percpu_down_write(&cgroup_threadgroup_rwsem);
++              *locked = true;
++      } else {
++              *locked = false;
++      }
+ 
+       rcu_read_lock();
+       if (pid) {
+@@ -2896,13 +2911,16 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
+       goto out_unlock_rcu;
+ 
+ out_unlock_threadgroup:
+-      percpu_up_write(&cgroup_threadgroup_rwsem);
++      if (*locked) {
++              percpu_up_write(&cgroup_threadgroup_rwsem);
++              *locked = false;
++      }
+ out_unlock_rcu:
+       rcu_read_unlock();
+       return tsk;
+ }
+ 
+-void cgroup_procs_write_finish(struct task_struct *task)
++void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+       __releases(&cgroup_threadgroup_rwsem)
+ {
+       struct cgroup_subsys *ss;
+@@ -2911,7 +2929,8 @@ void cgroup_procs_write_finish(struct task_struct *task)
+       /* release reference from cgroup_procs_write_start() */
+       put_task_struct(task);
+ 
+-      percpu_up_write(&cgroup_threadgroup_rwsem);
++      if (locked)
++              percpu_up_write(&cgroup_threadgroup_rwsem);
+       for_each_subsys(ss, ssid)
+               if (ss->post_attach)
+                       ss->post_attach();
+@@ -4830,12 +4849,13 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
+       struct task_struct *task;
+       const struct cred *saved_cred;
+       ssize_t ret;
++      bool locked;
+ 
+       dst_cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!dst_cgrp)
+               return -ENODEV;
+ 
+-      task = cgroup_procs_write_start(buf, true);
++      task = cgroup_procs_write_start(buf, true, &locked);
+       ret = PTR_ERR_OR_ZERO(task);
+       if (ret)
+               goto out_unlock;
+@@ -4861,7 +4881,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
+       ret = cgroup_attach_task(dst_cgrp, task, true);
+ 
+ out_finish:
+-      cgroup_procs_write_finish(task);
++      cgroup_procs_write_finish(task, locked);
+ out_unlock:
+       cgroup_kn_unlock(of->kn);
+ 
+@@ -4881,6 +4901,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
+       struct task_struct *task;
+       const struct cred *saved_cred;
+       ssize_t ret;
++      bool locked;
+ 
+       buf = strstrip(buf);
+ 
+@@ -4888,7 +4909,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
+       if (!dst_cgrp)
+               return -ENODEV;
+ 
+-      task = cgroup_procs_write_start(buf, false);
++      task = cgroup_procs_write_start(buf, false, &locked);
+       ret = PTR_ERR_OR_ZERO(task);
+       if (ret)
+               goto out_unlock;
+@@ -4919,7 +4940,7 @@ static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
+       ret = cgroup_attach_task(dst_cgrp, task, false);
+ 
+ out_finish:
+-      cgroup_procs_write_finish(task);
++      cgroup_procs_write_finish(task, locked);
+ out_unlock:
+       cgroup_kn_unlock(of->kn);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.4/series b/queue-5.4/series

index b5c161f6d0bd163f9523fd3de52b802d80d533e4..59b434f621ace9fcfd78670c877721bfa28451c5 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -82,3 +82,7 @@ debugfs-add-debugfs_lookup_and_remove.patch
  nvmet-fix-a-use-after-free.patch
  scsi-mpt3sas-fix-use-after-free-warning.patch
  scsi-lpfc-add-missing-destroy_workqueue-in-error-path.patch
+cgroup-optimize-single-thread-migration.patch
+cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch
+cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch
+smb3-missing-inode-locks-in-punch-hole.patch
diff --git a/queue-5.4/smb3-missing-inode-locks-in-punch-hole.patch b/queue-5.4/smb3-missing-inode-locks-in-punch-hole.patch

new file mode 100644 (file)

index 0000000..4bf4ad1
--- /dev/null
+++ b/queue-5.4/smb3-missing-inode-locks-in-punch-hole.patch
@@ -0,0 +1,62 @@
+From 9143d17eb544a7955e5f170f4f7f047cbf573037 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 02:10:56 -0500
+Subject: smb3: missing inode locks in punch hole
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit ba0803050d610d5072666be727bca5e03e55b242 ]
+
+smb3 fallocate punch hole was not grabbing the inode or filemap_invalidate
+locks so could have race with pagemap reinstantiating the page.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/smb2ops.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
+index 6ae281cff0d50..6039b0cdfe04e 100644
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -3051,7 +3051,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
+ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+                           loff_t offset, loff_t len)
+ {
+-      struct inode *inode;
++      struct inode *inode = file_inode(file);
+       struct cifsFileInfo *cfile = file->private_data;
+       struct file_zero_data_information fsctl_buf;
+       long rc;
+@@ -3060,14 +3060,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+ 
+       xid = get_xid();
+ 
+-      inode = d_inode(cfile->dentry);
+-
++      inode_lock(inode);
+       /* Need to make file sparse, if not already, before freeing range. */
+       /* Consider adding equivalent for compressed since it could also work */
+       if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+               rc = -EOPNOTSUPP;
+-              free_xid(xid);
+-              return rc;
++              goto out;
+       }
+ 
+       /*
+@@ -3086,6 +3084,8 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+                       true /* is_fctl */, (char *)&fsctl_buf,
+                       sizeof(struct file_zero_data_information),
+                       CIFSMaxBufSize, NULL, NULL);
++out:
++      inode_unlock(inode);
+       free_xid(xid);
+       return rc;
+ }
+-- 
+2.35.1
+
author	Sasha Levin <sashal@kernel.org>
	Sat, 10 Sep 2022 17:15:17 +0000 (13:15 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sat, 10 Sep 2022 17:15:17 +0000 (13:15 -0400)
queue-5.4/cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/cgroup-optimize-single-thread-migration.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/smb3-missing-inode-locks-in-punch-hole.patch	[new file with mode: 0644]	patch \| blob