]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.2-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 16:39:00 +0000 (18:39 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 Apr 2023 16:39:00 +0000 (18:39 +0200)
added patches:
asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch
kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch
memstick-fix-memory-leak-if-card-device-is-never-registered.patch
mptcp-fix-accept-vs-worker-race.patch
mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch
nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch
writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch

queue-6.2/asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch [new file with mode: 0644]
queue-6.2/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch [new file with mode: 0644]
queue-6.2/memstick-fix-memory-leak-if-card-device-is-never-registered.patch [new file with mode: 0644]
queue-6.2/mptcp-fix-accept-vs-worker-race.patch [new file with mode: 0644]
queue-6.2/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch [new file with mode: 0644]
queue-6.2/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch [new file with mode: 0644]
queue-6.2/series
queue-6.2/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch [new file with mode: 0644]
queue-6.2/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch [new file with mode: 0644]

diff --git a/queue-6.2/asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch b/queue-6.2/asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch
new file mode 100644 (file)
index 0000000..c90622d
--- /dev/null
@@ -0,0 +1,48 @@
+From de6aa72b265b72bca2b1897d5000c8f0147d3157 Mon Sep 17 00:00:00 2001
+From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
+Date: Mon, 3 Apr 2023 12:09:09 +0300
+Subject: ASoC: SOF: ipc4-topology: Clarify bind failure caused by missing fw_module
+
+From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
+
+commit de6aa72b265b72bca2b1897d5000c8f0147d3157 upstream.
+
+The original patch uses a feature in lib/vsprintf.c to handle the invalid
+address when tring to print *_fw_module->man4_module_entry.name when the
+*rc_fw_module is NULL.
+This case is handled by check_pointer_msg() internally and turns the
+invalid pointer to '(efault)' for printing but it is hiding useful
+information about the circumstances. Change the print to emmit the name
+of the widget and a note on which side's fw_module is missing.
+
+Fixes: e3720f92e023 ("ASoC: SOF: avoid a NULL dereference with unsupported widgets")
+Reported-by: Dan Carpenter <error27@gmail.com>
+Link: https://lore.kernel.org/alsa-devel/4826f662-42f0-4a82-ba32-8bf5f8a03256@kili.mountain/
+Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
+Rule: 'Cc: stable@vger.kernel.org' or 'commit <sha1> upstream.'
+Link: https://lore.kernel.org/r/20230403090909.18233-1-peter.ujfalusi@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/sof/ipc4-topology.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/sound/soc/sof/ipc4-topology.c
++++ b/sound/soc/sof/ipc4-topology.c
+@@ -1687,10 +1687,12 @@ static int sof_ipc4_route_setup(struct s
+       int ret;
+       if (!src_fw_module || !sink_fw_module) {
+-              /* The NULL module will print as "(efault)" */
+-              dev_err(sdev->dev, "source %s or sink %s widget weren't set up properly\n",
+-                      src_fw_module->man4_module_entry.name,
+-                      sink_fw_module->man4_module_entry.name);
++              dev_err(sdev->dev,
++                      "cannot bind %s -> %s, no firmware module for: %s%s\n",
++                      src_widget->widget->name, sink_widget->widget->name,
++                      src_fw_module ? "" : " source",
++                      sink_fw_module ? "" : " sink");
++
+               return -ENODEV;
+       }
diff --git a/queue-6.2/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch b/queue-6.2/kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch
new file mode 100644 (file)
index 0000000..0a517f3
--- /dev/null
@@ -0,0 +1,151 @@
+From 659c0ce1cb9efc7f58d380ca4bb2a51ae9e30553 Mon Sep 17 00:00:00 2001
+From: Ondrej Mosnacek <omosnace@redhat.com>
+Date: Fri, 17 Feb 2023 17:21:54 +0100
+Subject: kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
+
+From: Ondrej Mosnacek <omosnace@redhat.com>
+
+commit 659c0ce1cb9efc7f58d380ca4bb2a51ae9e30553 upstream.
+
+Linux Security Modules (LSMs) that implement the "capable" hook will
+usually emit an access denial message to the audit log whenever they
+"block" the current task from using the given capability based on their
+security policy.
+
+The occurrence of a denial is used as an indication that the given task
+has attempted an operation that requires the given access permission, so
+the callers of functions that perform LSM permission checks must take care
+to avoid calling them too early (before it is decided if the permission is
+actually needed to perform the requested operation).
+
+The __sys_setres[ug]id() functions violate this convention by first
+calling ns_capable_setid() and only then checking if the operation
+requires the capability or not.  It means that any caller that has the
+capability granted by DAC (task's capability set) but not by MAC (LSMs)
+will generate a "denied" audit record, even if is doing an operation for
+which the capability is not required.
+
+Fix this by reordering the checks such that ns_capable_setid() is checked
+last and -EPERM is returned immediately if it returns false.
+
+While there, also do two small optimizations:
+* move the capability check before prepare_creds() and
+* bail out early in case of a no-op.
+
+Link: https://lkml.kernel.org/r/20230217162154.837549-1-omosnace@redhat.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sys.c |   69 ++++++++++++++++++++++++++++++++++-------------------------
+ 1 file changed, 40 insertions(+), 29 deletions(-)
+
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -664,6 +664,7 @@ long __sys_setresuid(uid_t ruid, uid_t e
+       struct cred *new;
+       int retval;
+       kuid_t kruid, keuid, ksuid;
++      bool ruid_new, euid_new, suid_new;
+       kruid = make_kuid(ns, ruid);
+       keuid = make_kuid(ns, euid);
+@@ -678,25 +679,29 @@ long __sys_setresuid(uid_t ruid, uid_t e
+       if ((suid != (uid_t) -1) && !uid_valid(ksuid))
+               return -EINVAL;
++      old = current_cred();
++
++      /* check for no-op */
++      if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) &&
++          (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) &&
++                                  uid_eq(keuid, old->fsuid))) &&
++          (suid == (uid_t) -1 || uid_eq(ksuid, old->suid)))
++              return 0;
++
++      ruid_new = ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
++                 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid);
++      euid_new = euid != (uid_t) -1        && !uid_eq(keuid, old->uid) &&
++                 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid);
++      suid_new = suid != (uid_t) -1        && !uid_eq(ksuid, old->uid) &&
++                 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid);
++      if ((ruid_new || euid_new || suid_new) &&
++          !ns_capable_setid(old->user_ns, CAP_SETUID))
++              return -EPERM;
++
+       new = prepare_creds();
+       if (!new)
+               return -ENOMEM;
+-      old = current_cred();
+-
+-      retval = -EPERM;
+-      if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
+-              if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
+-                  !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
+-                      goto error;
+-              if (euid != (uid_t) -1        && !uid_eq(keuid, old->uid) &&
+-                  !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
+-                      goto error;
+-              if (suid != (uid_t) -1        && !uid_eq(ksuid, old->uid) &&
+-                  !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
+-                      goto error;
+-      }
+-
+       if (ruid != (uid_t) -1) {
+               new->uid = kruid;
+               if (!uid_eq(kruid, old->uid)) {
+@@ -761,6 +766,7 @@ long __sys_setresgid(gid_t rgid, gid_t e
+       struct cred *new;
+       int retval;
+       kgid_t krgid, kegid, ksgid;
++      bool rgid_new, egid_new, sgid_new;
+       krgid = make_kgid(ns, rgid);
+       kegid = make_kgid(ns, egid);
+@@ -773,23 +779,28 @@ long __sys_setresgid(gid_t rgid, gid_t e
+       if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
+               return -EINVAL;
++      old = current_cred();
++
++      /* check for no-op */
++      if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) &&
++          (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) &&
++                                  gid_eq(kegid, old->fsgid))) &&
++          (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid)))
++              return 0;
++
++      rgid_new = rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
++                 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid);
++      egid_new = egid != (gid_t) -1        && !gid_eq(kegid, old->gid) &&
++                 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid);
++      sgid_new = sgid != (gid_t) -1        && !gid_eq(ksgid, old->gid) &&
++                 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid);
++      if ((rgid_new || egid_new || sgid_new) &&
++          !ns_capable_setid(old->user_ns, CAP_SETGID))
++              return -EPERM;
++
+       new = prepare_creds();
+       if (!new)
+               return -ENOMEM;
+-      old = current_cred();
+-
+-      retval = -EPERM;
+-      if (!ns_capable_setid(old->user_ns, CAP_SETGID)) {
+-              if (rgid != (gid_t) -1        && !gid_eq(krgid, old->gid) &&
+-                  !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
+-                      goto error;
+-              if (egid != (gid_t) -1        && !gid_eq(kegid, old->gid) &&
+-                  !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
+-                      goto error;
+-              if (sgid != (gid_t) -1        && !gid_eq(ksgid, old->gid) &&
+-                  !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
+-                      goto error;
+-      }
+       if (rgid != (gid_t) -1)
+               new->gid = krgid;
diff --git a/queue-6.2/memstick-fix-memory-leak-if-card-device-is-never-registered.patch b/queue-6.2/memstick-fix-memory-leak-if-card-device-is-never-registered.patch
new file mode 100644 (file)
index 0000000..8de753d
--- /dev/null
@@ -0,0 +1,61 @@
+From 4b6d621c9d859ff89e68cebf6178652592676013 Mon Sep 17 00:00:00 2001
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Date: Sat, 1 Apr 2023 22:03:27 +0200
+Subject: memstick: fix memory leak if card device is never registered
+
+From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+commit 4b6d621c9d859ff89e68cebf6178652592676013 upstream.
+
+When calling dev_set_name() memory is allocated for the name for the
+struct device.  Once that structure device is registered, or attempted
+to be registerd, with the driver core, the driver core will handle
+cleaning up that memory when the device is removed from the system.
+
+Unfortunatly for the memstick code, there is an error path that causes
+the struct device to never be registered, and so the memory allocated in
+dev_set_name will be leaked.  Fix that leak by manually freeing it right
+before the memory for the device is freed.
+
+Cc: Maxim Levitsky <maximlevitsky@gmail.com>
+Cc: Alex Dubov <oakad@yahoo.com>
+Cc: Ulf Hansson <ulf.hansson@linaro.org>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Cc: Hans de Goede <hdegoede@redhat.com>
+Cc: Kay Sievers <kay.sievers@vrfy.org>
+Cc: linux-mmc@vger.kernel.org
+Fixes: 0252c3b4f018 ("memstick: struct device - replace bus_id with dev_name(), dev_set_name()")
+Cc: stable <stable@kernel.org>
+Co-developed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Co-developed-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
+Signed-off-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
+Link: https://lore.kernel.org/r/20230401200327.16800-1-gregkh@linuxfoundation.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/memstick/core/memstick.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/memstick/core/memstick.c
++++ b/drivers/memstick/core/memstick.c
+@@ -410,6 +410,7 @@ static struct memstick_dev *memstick_all
+       return card;
+ err_out:
+       host->card = old_card;
++      kfree_const(card->dev.kobj.name);
+       kfree(card);
+       return NULL;
+ }
+@@ -468,8 +469,10 @@ static void memstick_check(struct work_s
+                               put_device(&card->dev);
+                               host->card = NULL;
+                       }
+-              } else
++              } else {
++                      kfree_const(card->dev.kobj.name);
+                       kfree(card);
++              }
+       }
+ out_power_off:
diff --git a/queue-6.2/mptcp-fix-accept-vs-worker-race.patch b/queue-6.2/mptcp-fix-accept-vs-worker-race.patch
new file mode 100644 (file)
index 0000000..0b67105
--- /dev/null
@@ -0,0 +1,282 @@
+From 63740448a32eb662e05894425b47bcc5814136f4 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Mon, 17 Apr 2023 16:00:41 +0200
+Subject: mptcp: fix accept vs worker race
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 63740448a32eb662e05894425b47bcc5814136f4 upstream.
+
+The mptcp worker and mptcp_accept() can race, as reported by Christoph:
+
+refcount_t: addition on 0; use-after-free.
+WARNING: CPU: 1 PID: 14351 at lib/refcount.c:25 refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25
+Modules linked in:
+CPU: 1 PID: 14351 Comm: syz-executor.2 Not tainted 6.3.0-rc1-gde5e8fd0123c #11
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+RIP: 0010:refcount_warn_saturate+0x105/0x1b0 lib/refcount.c:25
+Code: 02 31 ff 89 de e8 1b f0 a7 ff 84 db 0f 85 6e ff ff ff e8 3e f5 a7 ff 48 c7 c7 d8 c7 34 83 c6 05 6d 2d 0f 02 01 e8 cb 3d 90 ff <0f> 0b e9 4f ff ff ff e8 1f f5 a7 ff 0f b6 1d 54 2d 0f 02 31 ff 89
+RSP: 0018:ffffc90000a47bf8 EFLAGS: 00010282
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: ffff88802eae98c0 RSI: ffffffff81097d4f RDI: 0000000000000001
+RBP: ffff88802e712180 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000001 R11: ffff88802eaea148 R12: ffff88802e712100
+R13: ffff88802e712a88 R14: ffff888005cb93a8 R15: ffff88802e712a88
+FS:  0000000000000000(0000) GS:ffff88803ed00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f277fd89120 CR3: 0000000035486002 CR4: 0000000000370ee0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ __refcount_add include/linux/refcount.h:199 [inline]
+ __refcount_inc include/linux/refcount.h:250 [inline]
+ refcount_inc include/linux/refcount.h:267 [inline]
+ sock_hold include/net/sock.h:775 [inline]
+ __mptcp_close+0x4c6/0x4d0 net/mptcp/protocol.c:3051
+ mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072
+ inet_release+0x56/0xa0 net/ipv4/af_inet.c:429
+ __sock_release+0x51/0xf0 net/socket.c:653
+ sock_close+0x18/0x20 net/socket.c:1395
+ __fput+0x113/0x430 fs/file_table.c:321
+ task_work_run+0x96/0x100 kernel/task_work.c:179
+ exit_task_work include/linux/task_work.h:38 [inline]
+ do_exit+0x4fc/0x10c0 kernel/exit.c:869
+ do_group_exit+0x51/0xf0 kernel/exit.c:1019
+ get_signal+0x12b0/0x1390 kernel/signal.c:2859
+ arch_do_signal_or_restart+0x25/0x260 arch/x86/kernel/signal.c:306
+ exit_to_user_mode_loop kernel/entry/common.c:168 [inline]
+ exit_to_user_mode_prepare+0x131/0x1a0 kernel/entry/common.c:203
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
+ syscall_exit_to_user_mode+0x19/0x40 kernel/entry/common.c:296
+ do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7fec4b4926a9
+Code: Unable to access opcode bytes at 0x7fec4b49267f.
+RSP: 002b:00007fec49f9dd78 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
+RAX: fffffffffffffe00 RBX: 00000000006bc058 RCX: 00007fec4b4926a9
+RDX: 0000000000000000 RSI: 0000000000000080 RDI: 00000000006bc058
+RBP: 00000000006bc050 R08: 00000000007df998 R09: 00000000007df998
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c
+R13: fffffffffffffea8 R14: 000000000000000b R15: 000000000001fe40
+ </TASK>
+
+The root cause is that the worker can force fallback to TCP the first
+mptcp subflow, actually deleting the unaccepted msk socket.
+
+We can explicitly prevent the race delaying the unaccepted msk deletion
+at listener shutdown time. In case the closed subflow is later accepted,
+just drop the mptcp context and let the user-space deal with the
+paired mptcp socket.
+
+Fixes: b6985b9b8295 ("mptcp: use the workqueue to destroy unaccepted sockets")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/375
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Tested-by: Christoph Paasch <cpaasch@apple.com>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |   68 +++++++++++++++++++++++++++++++++------------------
+ net/mptcp/protocol.h |    1 
+ net/mptcp/subflow.c  |   22 +++++++++-------
+ 3 files changed, 58 insertions(+), 33 deletions(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2316,7 +2316,26 @@ static void __mptcp_close_ssk(struct soc
+                             unsigned int flags)
+ {
+       struct mptcp_sock *msk = mptcp_sk(sk);
+-      bool need_push, dispose_it;
++      bool dispose_it, need_push = false;
++
++      /* If the first subflow moved to a close state before accept, e.g. due
++       * to an incoming reset, mptcp either:
++       * - if either the subflow or the msk are dead, destroy the context
++       *   (the subflow socket is deleted by inet_child_forget) and the msk
++       * - otherwise do nothing at the moment and take action at accept and/or
++       *   listener shutdown - user-space must be able to accept() the closed
++       *   socket.
++       */
++      if (msk->in_accept_queue && msk->first == ssk) {
++              if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
++                      return;
++
++              /* ensure later check in mptcp_worker() will dispose the msk */
++              sock_set_flag(sk, SOCK_DEAD);
++              lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
++              mptcp_subflow_drop_ctx(ssk);
++              goto out_release;
++      }
+       dispose_it = !msk->subflow || ssk != msk->subflow->sk;
+       if (dispose_it)
+@@ -2352,18 +2371,6 @@ static void __mptcp_close_ssk(struct soc
+       if (!inet_csk(ssk)->icsk_ulp_ops) {
+               WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
+               kfree_rcu(subflow, rcu);
+-      } else if (msk->in_accept_queue && msk->first == ssk) {
+-              /* if the first subflow moved to a close state, e.g. due to
+-               * incoming reset and we reach here before inet_child_forget()
+-               * the TCP stack could later try to close it via
+-               * inet_csk_listen_stop(), or deliver it to the user space via
+-               * accept().
+-               * We can't delete the subflow - or risk a double free - nor let
+-               * the msk survive - or will be leaked in the non accept scenario:
+-               * fallback and let TCP cope with the subflow cleanup.
+-               */
+-              WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
+-              mptcp_subflow_drop_ctx(ssk);
+       } else {
+               /* otherwise tcp will dispose of the ssk and subflow ctx */
+               if (ssk->sk_state == TCP_LISTEN) {
+@@ -2378,6 +2385,8 @@ static void __mptcp_close_ssk(struct soc
+               /* close acquired an extra ref */
+               __sock_put(ssk);
+       }
++
++out_release:
+       release_sock(ssk);
+       sock_put(ssk);
+@@ -2432,21 +2441,14 @@ static void __mptcp_close_subflow(struct
+               mptcp_close_ssk(sk, ssk, subflow);
+       }
+-      /* if the MPC subflow has been closed before the msk is accepted,
+-       * msk will never be accept-ed, close it now
+-       */
+-      if (!msk->first && msk->in_accept_queue) {
+-              sock_set_flag(sk, SOCK_DEAD);
+-              inet_sk_state_store(sk, TCP_CLOSE);
+-      }
+ }
+-static bool mptcp_check_close_timeout(const struct sock *sk)
++static bool mptcp_should_close(const struct sock *sk)
+ {
+       s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
+       struct mptcp_subflow_context *subflow;
+-      if (delta >= TCP_TIMEWAIT_LEN)
++      if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
+               return true;
+       /* if all subflows are in closed status don't bother with additional
+@@ -2654,7 +2656,7 @@ static void mptcp_worker(struct work_str
+        * even if it is orphaned and in FIN_WAIT2 state
+        */
+       if (sock_flag(sk, SOCK_DEAD)) {
+-              if (mptcp_check_close_timeout(sk)) {
++              if (mptcp_should_close(sk)) {
+                       inet_sk_state_store(sk, TCP_CLOSE);
+                       mptcp_do_fastclose(sk);
+               }
+@@ -2901,6 +2903,14 @@ static void __mptcp_destroy_sock(struct
+       sock_put(sk);
+ }
++void __mptcp_unaccepted_force_close(struct sock *sk)
++{
++      sock_set_flag(sk, SOCK_DEAD);
++      inet_sk_state_store(sk, TCP_CLOSE);
++      mptcp_do_fastclose(sk);
++      __mptcp_destroy_sock(sk);
++}
++
+ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
+ {
+       /* Concurrent splices from sk_receive_queue into receive_queue will
+@@ -3728,6 +3738,18 @@ static int mptcp_stream_accept(struct so
+                       if (!ssk->sk_socket)
+                               mptcp_sock_graft(ssk, newsock);
+               }
++
++              /* Do late cleanup for the first subflow as necessary. Also
++               * deal with bad peers not doing a complete shutdown.
++               */
++              if (msk->first &&
++                  unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
++                      __mptcp_close_ssk(newsk, msk->first,
++                                        mptcp_subflow_ctx(msk->first), 0);
++                      if (unlikely(list_empty(&msk->conn_list)))
++                              inet_sk_state_store(newsk, TCP_CLOSE);
++              }
++
+               release_sock(newsk);
+       }
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -634,6 +634,7 @@ void mptcp_sock_graft(struct sock *sk, s
+ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+ bool __mptcp_close(struct sock *sk, long timeout);
+ void mptcp_cancel_work(struct sock *sk);
++void __mptcp_unaccepted_force_close(struct sock *sk);
+ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
+ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -722,9 +722,12 @@ void mptcp_subflow_drop_ctx(struct sock
+       if (!ctx)
+               return;
+-      subflow_ulp_fallback(ssk, ctx);
+-      if (ctx->conn)
+-              sock_put(ctx->conn);
++      list_del(&mptcp_subflow_ctx(ssk)->node);
++      if (inet_csk(ssk)->icsk_ulp_ops) {
++              subflow_ulp_fallback(ssk, ctx);
++              if (ctx->conn)
++                      sock_put(ctx->conn);
++      }
+       kfree_rcu(ctx, rcu);
+ }
+@@ -1821,6 +1824,7 @@ void mptcp_subflow_queue_clean(struct so
+       struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+       struct mptcp_sock *msk, *next, *head = NULL;
+       struct request_sock *req;
++      struct sock *sk;
+       /* build a list of all unaccepted mptcp sockets */
+       spin_lock_bh(&queue->rskq_lock);
+@@ -1836,11 +1840,12 @@ void mptcp_subflow_queue_clean(struct so
+                       continue;
+               /* skip if already in list */
+-              msk = mptcp_sk(subflow->conn);
++              sk = subflow->conn;
++              msk = mptcp_sk(sk);
+               if (msk->dl_next || msk == head)
+                       continue;
+-              sock_hold(subflow->conn);
++              sock_hold(sk);
+               msk->dl_next = head;
+               head = msk;
+       }
+@@ -1854,16 +1859,13 @@ void mptcp_subflow_queue_clean(struct so
+       release_sock(listener_ssk);
+       for (msk = head; msk; msk = next) {
+-              struct sock *sk = (struct sock *)msk;
++              sk = (struct sock *)msk;
+               lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+               next = msk->dl_next;
+               msk->dl_next = NULL;
+-              /* prevent the stack from later re-schedule the worker for
+-               * this socket
+-               */
+-              inet_sk_state_store(sk, TCP_CLOSE);
++              __mptcp_unaccepted_force_close(sk);
+               release_sock(sk);
+               /* lockdep will report a false positive ABBA deadlock
diff --git a/queue-6.2/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch b/queue-6.2/mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch
new file mode 100644 (file)
index 0000000..118dbfb
--- /dev/null
@@ -0,0 +1,192 @@
+From 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Mon, 17 Apr 2023 16:00:40 +0200
+Subject: mptcp: stops worker on unaccepted sockets at listener close
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 2a6a870e44dd88f1a6a2893c65ef756a9edfb4c7 upstream.
+
+This is a partial revert of the blamed commit, with a relevant
+change: mptcp_subflow_queue_clean() now just change the msk
+socket status and stop the worker, so that the UaF issue addressed
+by the blamed commit is not re-introduced.
+
+The above prevents the mptcp worker from running concurrently with
+inet_csk_listen_stop(), as such race would trigger a warning, as
+reported by Christoph:
+
+RSP: 002b:00007f784fe09cd8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+WARNING: CPU: 0 PID: 25807 at net/ipv4/inet_connection_sock.c:1387 inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387
+RAX: ffffffffffffffda RBX: 00000000006bc050 RCX: 00007f7850afd6a9
+RDX: 0000000000000000 RSI: 0000000020000340 RDI: 0000000000000004
+Modules linked in:
+RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006bc05c
+R13: fffffffffffffea8 R14: 00000000006bc050 R15: 000000000001fe40
+
+ </TASK>
+CPU: 0 PID: 25807 Comm: syz-executor.7 Not tainted 6.2.0-g778e54711659 #7
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+RIP: 0010:inet_csk_listen_stop+0x664/0x870 net/ipv4/inet_connection_sock.c:1387
+RAX: 0000000000000000 RBX: ffff888100dfbd40 RCX: 0000000000000000
+RDX: ffff8881363aab80 RSI: ffffffff81c494f4 RDI: 0000000000000005
+RBP: ffff888126dad080 R08: 0000000000000005 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: ffff888100dfe040
+R13: 0000000000000001 R14: 0000000000000000 R15: ffff888100dfbdd8
+FS:  00007f7850a2c800(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000001b32d26000 CR3: 000000012fdd8006 CR4: 0000000000770ef0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ __tcp_close+0x5b2/0x620 net/ipv4/tcp.c:2875
+ __mptcp_close_ssk+0x145/0x3d0 net/mptcp/protocol.c:2427
+ mptcp_destroy_common+0x8a/0x1c0 net/mptcp/protocol.c:3277
+ mptcp_destroy+0x41/0x60 net/mptcp/protocol.c:3304
+ __mptcp_destroy_sock+0x56/0x140 net/mptcp/protocol.c:2965
+ __mptcp_close+0x38f/0x4a0 net/mptcp/protocol.c:3057
+ mptcp_close+0x24/0xe0 net/mptcp/protocol.c:3072
+ inet_release+0x53/0xa0 net/ipv4/af_inet.c:429
+ __sock_release+0x4e/0xf0 net/socket.c:651
+ sock_close+0x15/0x20 net/socket.c:1393
+ __fput+0xff/0x420 fs/file_table.c:321
+ task_work_run+0x8b/0xe0 kernel/task_work.c:179
+ resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
+ exit_to_user_mode_prepare+0x113/0x120 kernel/entry/common.c:203
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline]
+ syscall_exit_to_user_mode+0x1d/0x40 kernel/entry/common.c:296
+ do_syscall_64+0x46/0x90 arch/x86/entry/common.c:86
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7f7850af70dc
+RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7850af70dc
+RDX: 00007f7850a2c800 RSI: 0000000000000002 RDI: 0000000000000003
+RBP: 00000000006bd980 R08: 0000000000000000 R09: 00000000000018a0
+R10: 00000000316338a4 R11: 0000000000000293 R12: 0000000000211e31
+R13: 00000000006bc05c R14: 00007f785062c000 R15: 0000000000211af0
+
+Fixes: 0a3f4f1f9c27 ("mptcp: fix UaF in listener shutdown")
+Cc: stable@vger.kernel.org
+Reported-by: Christoph Paasch <cpaasch@apple.com>
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/371
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |    6 +++-
+ net/mptcp/protocol.h |    1 
+ net/mptcp/subflow.c  |   72 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 78 insertions(+), 1 deletion(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2366,8 +2366,12 @@ static void __mptcp_close_ssk(struct soc
+               mptcp_subflow_drop_ctx(ssk);
+       } else {
+               /* otherwise tcp will dispose of the ssk and subflow ctx */
+-              if (ssk->sk_state == TCP_LISTEN)
++              if (ssk->sk_state == TCP_LISTEN) {
++                      tcp_set_state(ssk, TCP_CLOSE);
++                      mptcp_subflow_queue_clean(sk, ssk);
++                      inet_csk_listen_stop(ssk);
+                       mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
++              }
+               __tcp_close(ssk, 0);
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -629,6 +629,7 @@ void mptcp_close_ssk(struct sock *sk, st
+                    struct mptcp_subflow_context *subflow);
+ void __mptcp_subflow_send_ack(struct sock *ssk);
+ void mptcp_subflow_reset(struct sock *ssk);
++void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
+ void mptcp_sock_graft(struct sock *sk, struct socket *parent);
+ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+ bool __mptcp_close(struct sock *sk, long timeout);
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1816,6 +1816,78 @@ static void subflow_state_change(struct
+       }
+ }
++void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
++{
++      struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
++      struct mptcp_sock *msk, *next, *head = NULL;
++      struct request_sock *req;
++
++      /* build a list of all unaccepted mptcp sockets */
++      spin_lock_bh(&queue->rskq_lock);
++      for (req = queue->rskq_accept_head; req; req = req->dl_next) {
++              struct mptcp_subflow_context *subflow;
++              struct sock *ssk = req->sk;
++
++              if (!sk_is_mptcp(ssk))
++                      continue;
++
++              subflow = mptcp_subflow_ctx(ssk);
++              if (!subflow || !subflow->conn)
++                      continue;
++
++              /* skip if already in list */
++              msk = mptcp_sk(subflow->conn);
++              if (msk->dl_next || msk == head)
++                      continue;
++
++              sock_hold(subflow->conn);
++              msk->dl_next = head;
++              head = msk;
++      }
++      spin_unlock_bh(&queue->rskq_lock);
++      if (!head)
++              return;
++
++      /* can't acquire the msk socket lock under the subflow one,
++       * or will cause ABBA deadlock
++       */
++      release_sock(listener_ssk);
++
++      for (msk = head; msk; msk = next) {
++              struct sock *sk = (struct sock *)msk;
++
++              lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
++              next = msk->dl_next;
++              msk->dl_next = NULL;
++
++              /* prevent the stack from later re-schedule the worker for
++               * this socket
++               */
++              inet_sk_state_store(sk, TCP_CLOSE);
++              release_sock(sk);
++
++              /* lockdep will report a false positive ABBA deadlock
++               * between cancel_work_sync and the listener socket.
++               * The involved locks belong to different sockets WRT
++               * the existing AB chain.
++               * Using a per socket key is problematic as key
++               * deregistration requires process context and must be
++               * performed at socket disposal time, in atomic
++               * context.
++               * Just tell lockdep to consider the listener socket
++               * released here.
++               */
++              mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
++              mptcp_cancel_work(sk);
++              mutex_acquire(&listener_sk->sk_lock.dep_map, 0, 0, _RET_IP_);
++
++              sock_put(sk);
++      }
++
++      /* we are still under the listener msk socket lock */
++      lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
++}
++
+ static int subflow_ulp_init(struct sock *sk)
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/queue-6.2/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch b/queue-6.2/nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
new file mode 100644 (file)
index 0000000..1717870
--- /dev/null
@@ -0,0 +1,80 @@
+From ef832747a82dfbc22a3702219cc716f449b24e4a Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Tue, 18 Apr 2023 02:35:13 +0900
+Subject: nilfs2: initialize unused bytes in segment summary blocks
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit ef832747a82dfbc22a3702219cc716f449b24e4a upstream.
+
+Syzbot still reports uninit-value in nilfs_add_checksums_on_logs() for
+KMSAN enabled kernels after applying commit 7397031622e0 ("nilfs2:
+initialize "struct nilfs_binfo_dat"->bi_pad field").
+
+This is because the unused bytes at the end of each block in segment
+summaries are not initialized.  So this fixes the issue by padding the
+unused bytes with null bytes.
+
+Link: https://lkml.kernel.org/r/20230417173513.12598-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+048585f3f4227bb2b49b@syzkaller.appspotmail.com
+  Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b
+Cc: Alexander Potapenko <glider@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/segment.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_b
+       return 0;
+ }
++/**
++ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
++ * @sci: segment constructor object
++ *
++ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
++ * the current segment summary block.
++ */
++static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
++{
++      struct nilfs_segsum_pointer *ssp;
++
++      ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
++      if (ssp->offset < ssp->bh->b_size)
++              memset(ssp->bh->b_data + ssp->offset, 0,
++                     ssp->bh->b_size - ssp->offset);
++}
++
+ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
+ {
+       sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
+@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(st
+                               * The current segment is filled up
+                               * (internal code)
+                               */
++      nilfs_segctor_zeropad_segsum(sci);
+       sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
+       return nilfs_segctor_reset_segment_buffer(sci);
+ }
+@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(
+               goto retry;
+       }
+       if (unlikely(required)) {
++              nilfs_segctor_zeropad_segsum(sci);
+               err = nilfs_segbuf_extend_segsum(segbuf);
+               if (unlikely(err))
+                       goto failed;
+@@ -1531,6 +1550,7 @@ static int nilfs_segctor_collect(struct
+               nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
+               sci->sc_stage = prev_stage;
+       }
++      nilfs_segctor_zeropad_segsum(sci);
+       nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
+       return 0;
index 34863be53447e4dfedbf696402aa4262babb47f5..3098ee3df3d5d203e0c07eefafb81e8e381ac89b 100644 (file)
@@ -68,3 +68,11 @@ wifi-ath9k-don-t-mark-channelmap-stack-variable-read-only-in-ath9k_mci_update_wl
 maple_tree-make-maple-state-reusable-after-mas_empty_area_rev.patch
 maple_tree-fix-mas_empty_area-search.patch
 maple_tree-fix-a-potential-memory-leak-oob-access-or-other-unpredictable-bug.patch
+asoc-sof-ipc4-topology-clarify-bind-failure-caused-by-missing-fw_module.patch
+nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
+mptcp-stops-worker-on-unaccepted-sockets-at-listener-close.patch
+mptcp-fix-accept-vs-worker-race.patch
+tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch
+memstick-fix-memory-leak-if-card-device-is-never-registered.patch
+kernel-sys.c-fix-and-improve-control-flow-in-__sys_setresid.patch
+writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch
diff --git a/queue-6.2/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch b/queue-6.2/tools-mm-page_owner_sort.c-fix-tgid-output-when-cull-tg-is-used.patch
new file mode 100644 (file)
index 0000000..89579cc
--- /dev/null
@@ -0,0 +1,34 @@
+From 9235756885e865070c4be2facda75262dbd85967 Mon Sep 17 00:00:00 2001
+From: Steve Chou <steve_chou@pesi.com.tw>
+Date: Tue, 11 Apr 2023 11:49:28 +0800
+Subject: tools/mm/page_owner_sort.c: fix TGID output when cull=tg is used
+
+From: Steve Chou <steve_chou@pesi.com.tw>
+
+commit 9235756885e865070c4be2facda75262dbd85967 upstream.
+
+When using cull option with 'tg' flag, the fprintf is using pid instead
+of tgid. It should use tgid instead.
+
+Link: https://lkml.kernel.org/r/20230411034929.2071501-1-steve_chou@pesi.com.tw
+Fixes: 9c8a0a8e599f4a ("tools/vm/page_owner_sort.c: support for user-defined culling rules")
+Signed-off-by: Steve Chou <steve_chou@pesi.com.tw>
+Cc: Jiajian Ye <yejiajian2018@email.szu.edu.cn>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/vm/page_owner_sort.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/tools/vm/page_owner_sort.c
++++ b/tools/vm/page_owner_sort.c
+@@ -847,7 +847,7 @@ int main(int argc, char **argv)
+                       if (cull & CULL_PID || filter & FILTER_PID)
+                               fprintf(fout, ", PID %d", list[i].pid);
+                       if (cull & CULL_TGID || filter & FILTER_TGID)
+-                              fprintf(fout, ", TGID %d", list[i].pid);
++                              fprintf(fout, ", TGID %d", list[i].tgid);
+                       if (cull & CULL_COMM || filter & FILTER_COMM)
+                               fprintf(fout, ", task_comm_name: %s", list[i].comm);
+                       if (cull & CULL_ALLOCATOR) {
diff --git a/queue-6.2/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch b/queue-6.2/writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch
new file mode 100644 (file)
index 0000000..edbf725
--- /dev/null
@@ -0,0 +1,172 @@
+From 1ba1199ec5747f475538c0d25a32804e5ba1dfde Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Mon, 10 Apr 2023 21:08:26 +0800
+Subject: writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit 1ba1199ec5747f475538c0d25a32804e5ba1dfde upstream.
+
+KASAN report null-ptr-deref:
+==================================================================
+BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0
+Write of size 8 at addr 0000000000000000 by task sync/943
+CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x7f/0xc0
+ print_report+0x2ba/0x340
+ kasan_report+0xc4/0x120
+ kasan_check_range+0x1b7/0x2e0
+ __kasan_check_write+0x24/0x40
+ bdi_split_work_to_wbs+0x5c5/0x7b0
+ sync_inodes_sb+0x195/0x630
+ sync_inodes_one_sb+0x3a/0x50
+ iterate_supers+0x106/0x1b0
+ ksys_sync+0x98/0x160
+[...]
+==================================================================
+
+The race that causes the above issue is as follows:
+
+           cpu1                     cpu2
+-------------------------|-------------------------
+inode_switch_wbs
+ INIT_WORK(&isw->work, inode_switch_wbs_work_fn)
+ queue_rcu_work(isw_wq, &isw->work)
+ // queue_work async
+  inode_switch_wbs_work_fn
+   wb_put_many(old_wb, nr_switched)
+    percpu_ref_put_many
+     ref->data->release(ref)
+     cgwb_release
+      queue_work(cgwb_release_wq, &wb->release_work)
+      // queue_work async
+       &wb->release_work
+       cgwb_release_workfn
+                            ksys_sync
+                             iterate_supers
+                              sync_inodes_one_sb
+                               sync_inodes_sb
+                                bdi_split_work_to_wbs
+                                 kmalloc(sizeof(*work), GFP_ATOMIC)
+                                 // alloc memory failed
+        percpu_ref_exit
+         ref->data = NULL
+         kfree(data)
+                                 wb_get(wb)
+                                  percpu_ref_get(&wb->refcnt)
+                                   percpu_ref_get_many(ref, 1)
+                                    atomic_long_add(nr, &ref->data->count)
+                                     atomic64_add(i, v)
+                                     // trigger null-ptr-deref
+
+bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all
+wbs.  If the allocation of new work fails, the on-stack fallback will be
+used and the reference count of the current wb is increased afterwards.
+If cgroup writeback membership switches occur before getting the reference
+count and the current wb is released as old_wd, then calling wb_get() or
+wb_put() will trigger the null pointer dereference above.
+
+This issue was introduced in v4.3-rc7 (see fix tag1).  Both
+sync_inodes_sb() and __writeback_inodes_sb_nr() calls to
+bdi_split_work_to_wbs() can trigger this issue.  For scenarios called via
+sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize
+sync(2) against cgroup writeback membership switches") reduced the
+possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see
+fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from
+inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io,
+thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(),
+and the issue becomes easily reproducible again.
+
+To solve this problem, percpu_ref_exit() is called under RCU protection to
+avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs().
+Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(),
+and skip the current wb if wb_tryget() fails because the wb has already
+been shutdown.
+
+Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com
+Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Acked-by: Tejun Heo <tj@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Andreas Dilger <adilger.kernel@dilger.ca>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Dennis Zhou <dennis@kernel.org>
+Cc: Hou Tao <houtao1@huawei.com>
+Cc: yangerkun <yangerkun@huawei.com>
+Cc: Zhang Yi <yi.zhang@huawei.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fs-writeback.c |   17 ++++++++++-------
+ mm/backing-dev.c  |   12 ++++++++++--
+ 2 files changed, 20 insertions(+), 9 deletions(-)
+
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -976,6 +976,16 @@ restart:
+                       continue;
+               }
++              /*
++               * If wb_tryget fails, the wb has been shutdown, skip it.
++               *
++               * Pin @wb so that it stays on @bdi->wb_list.  This allows
++               * continuing iteration from @wb after dropping and
++               * regrabbing rcu read lock.
++               */
++              if (!wb_tryget(wb))
++                      continue;
++
+               /* alloc failed, execute synchronously using on-stack fallback */
+               work = &fallback_work;
+               *work = *base_work;
+@@ -984,13 +994,6 @@ restart:
+               work->done = &fallback_work_done;
+               wb_queue_work(wb, work);
+-
+-              /*
+-               * Pin @wb so that it stays on @bdi->wb_list.  This allows
+-               * continuing iteration from @wb after dropping and
+-               * regrabbing rcu read lock.
+-               */
+-              wb_get(wb);
+               last_wb = wb;
+               rcu_read_unlock();
+--- a/mm/backing-dev.c
++++ b/mm/backing-dev.c
+@@ -507,6 +507,15 @@ static LIST_HEAD(offline_cgwbs);
+ static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
+ static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
++static void cgwb_free_rcu(struct rcu_head *rcu_head)
++{
++      struct bdi_writeback *wb = container_of(rcu_head,
++                      struct bdi_writeback, rcu);
++
++      percpu_ref_exit(&wb->refcnt);
++      kfree(wb);
++}
++
+ static void cgwb_release_workfn(struct work_struct *work)
+ {
+       struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
+@@ -529,11 +538,10 @@ static void cgwb_release_workfn(struct w
+       list_del(&wb->offline_node);
+       spin_unlock_irq(&cgwb_lock);
+-      percpu_ref_exit(&wb->refcnt);
+       wb_exit(wb);
+       bdi_put(bdi);
+       WARN_ON_ONCE(!list_empty(&wb->b_attached));
+-      kfree_rcu(wb, rcu);
++      call_rcu(&wb->rcu, cgwb_free_rcu);
+ }
+ static void cgwb_release(struct percpu_ref *refcnt)