5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 20 Sep 2023 10:51:01 +0000 (12:51 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 20 Sep 2023 10:51:01 +0000 (12:51 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 20 Sep 2023 10:51:01 +0000 (12:51 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 20 Sep 2023 10:51:01 +0000 (12:51 +0200)
diff --git a/queue-5.10/ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch b/queue-5.10/ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch

new file mode 100644 (file)

index 0000000..224527f
--- /dev/null
+++ b/queue-5.10/ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch
@@ -0,0 +1,112 @@
+From 24e0e61db3cb86a66824531989f1df80e0939f26 Mon Sep 17 00:00:00 2001
+From: Niklas Cassel <niklas.cassel@wdc.com>
+Date: Mon, 4 Sep 2023 22:42:56 +0200
+Subject: ata: libata: disallow dev-initiated LPM transitions to unsupported states
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Niklas Cassel <niklas.cassel@wdc.com>
+
+commit 24e0e61db3cb86a66824531989f1df80e0939f26 upstream.
+
+In AHCI 1.3.1, the register description for CAP.SSC:
+"When cleared to ‘0’, software must not allow the HBA to initiate
+transitions to the Slumber state via agressive link power management nor
+the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port
+must be programmed to disallow device initiated Slumber requests."
+
+In AHCI 1.3.1, the register description for CAP.PSC:
+"When cleared to ‘0’, software must not allow the HBA to initiate
+transitions to the Partial state via agressive link power management nor
+the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port
+must be programmed to disallow device initiated Partial requests."
+
+Ensure that we always set the corresponding bits in PxSCTL.IPM, such that
+a device is not allowed to initiate transitions to power states which are
+unsupported by the HBA.
+
+DevSleep is always initiated by the HBA, however, for completeness, set the
+corresponding bit in PxSCTL.IPM such that agressive link power management
+cannot transition to DevSleep if DevSleep is not supported.
+
+sata_link_scr_lpm() is used by libahci, ata_piix and libata-pmp.
+However, only libahci has the ability to read the CAP/CAP2 register to see
+if these features are supported. Therefore, in order to not introduce any
+regressions on ata_piix or libata-pmp, create flags that indicate that the
+respective feature is NOT supported. This way, the behavior for ata_piix
+and libata-pmp should remain unchanged.
+
+This change is based on a patch originally submitted by Runa Guo-oc.
+
+Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
+Fixes: 1152b2617a6e ("libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/ahci.c        |    9 +++++++++
+ drivers/ata/libata-sata.c |   19 ++++++++++++++++---
+ include/linux/libata.h    |    4 ++++
+ 3 files changed, 29 insertions(+), 3 deletions(-)
+
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -1882,6 +1882,15 @@ static int ahci_init_one(struct pci_dev
+       else
+               dev_info(&pdev->dev, "SSS flag set, parallel bus scan disabled\n");
+ 
++      if (!(hpriv->cap & HOST_CAP_PART))
++              host->flags |= ATA_HOST_NO_PART;
++
++      if (!(hpriv->cap & HOST_CAP_SSC))
++              host->flags |= ATA_HOST_NO_SSC;
++
++      if (!(hpriv->cap2 & HOST_CAP2_SDS))
++              host->flags |= ATA_HOST_NO_DEVSLP;
++
+       if (pi.flags & ATA_FLAG_EM)
+               ahci_reset_em(host);
+ 
+--- a/drivers/ata/libata-sata.c
++++ b/drivers/ata/libata-sata.c
+@@ -394,10 +394,23 @@ int sata_link_scr_lpm(struct ata_link *l
+       case ATA_LPM_MED_POWER_WITH_DIPM:
+       case ATA_LPM_MIN_POWER_WITH_PARTIAL:
+       case ATA_LPM_MIN_POWER:
+-              if (ata_link_nr_enabled(link) > 0)
+-                      /* no restrictions on LPM transitions */
++              if (ata_link_nr_enabled(link) > 0) {
++                      /* assume no restrictions on LPM transitions */
+                       scontrol &= ~(0x7 << 8);
+-              else {
++
++                      /*
++                       * If the controller does not support partial, slumber,
++                       * or devsleep, then disallow these transitions.
++                       */
++                      if (link->ap->host->flags & ATA_HOST_NO_PART)
++                              scontrol |= (0x1 << 8);
++
++                      if (link->ap->host->flags & ATA_HOST_NO_SSC)
++                              scontrol |= (0x2 << 8);
++
++                      if (link->ap->host->flags & ATA_HOST_NO_DEVSLP)
++                              scontrol |= (0x4 << 8);
++              } else {
+                       /* empty port, power off */
+                       scontrol &= ~0xf;
+                       scontrol |= (0x1 << 2);
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -260,6 +260,10 @@ enum {
+       ATA_HOST_PARALLEL_SCAN  = (1 << 2),     /* Ports on this host can be scanned in parallel */
+       ATA_HOST_IGNORE_ATA     = (1 << 3),     /* Ignore ATA devices on this host. */
+ 
++      ATA_HOST_NO_PART        = (1 << 4), /* Host does not support partial */
++      ATA_HOST_NO_SSC         = (1 << 5), /* Host does not support slumber */
++      ATA_HOST_NO_DEVSLP      = (1 << 6), /* Host does not support devslp */
++
+       /* bits 24:31 of host->flags are reserved for LLD specific flags */
+ 
+       /* various lengths of time */
diff --git a/queue-5.10/attr-block-mode-changes-of-symlinks.patch b/queue-5.10/attr-block-mode-changes-of-symlinks.patch

new file mode 100644 (file)

index 0000000..3c68134
--- /dev/null
+++ b/queue-5.10/attr-block-mode-changes-of-symlinks.patch
@@ -0,0 +1,140 @@
+From 5d1f903f75a80daa4dfb3d84e114ec8ecbf29956 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Wed, 12 Jul 2023 20:58:49 +0200
+Subject: attr: block mode changes of symlinks
+
+From: Christian Brauner <brauner@kernel.org>
+
+commit 5d1f903f75a80daa4dfb3d84e114ec8ecbf29956 upstream.
+
+Changing the mode of symlinks is meaningless as the vfs doesn't take the
+mode of a symlink into account during path lookup permission checking.
+
+However, the vfs doesn't block mode changes on symlinks. This however,
+has lead to an untenable mess roughly classifiable into the following
+two categories:
+
+(1) Filesystems that don't implement a i_op->setattr() for symlinks.
+
+    Such filesystems may or may not know that without i_op->setattr()
+    defined, notify_change() falls back to simple_setattr() causing the
+    inode's mode in the inode cache to be changed.
+
+    That's a generic issue as this will affect all non-size changing
+    inode attributes including ownership changes.
+
+    Example: afs
+
+(2) Filesystems that fail with EOPNOTSUPP but change the mode of the
+    symlink nonetheless.
+
+    Some filesystems will happily update the mode of a symlink but still
+    return EOPNOTSUPP. This is the biggest source of confusion for
+    userspace.
+
+    The EOPNOTSUPP in this case comes from POSIX ACLs. Specifically it
+    comes from filesystems that call posix_acl_chmod(), e.g., btrfs via
+
+        if (!err && attr->ia_valid & ATTR_MODE)
+                err = posix_acl_chmod(idmap, dentry, inode->i_mode);
+
+    Filesystems including btrfs don't implement i_op->set_acl() so
+    posix_acl_chmod() will report EOPNOTSUPP.
+
+    When posix_acl_chmod() is called, most filesystems will have
+    finished updating the inode.
+
+    Perversely, this has the consequences that this behavior may depend
+    on two kconfig options and mount options:
+
+    * CONFIG_POSIX_ACL={y,n}
+    * CONFIG_${FSTYPE}_POSIX_ACL={y,n}
+    * Opt_acl, Opt_noacl
+
+    Example: btrfs, ext4, xfs
+
+The only way to change the mode on a symlink currently involves abusing
+an O_PATH file descriptor in the following manner:
+
+        fd = openat(-1, "/path/to/link", O_CLOEXEC | O_PATH | O_NOFOLLOW);
+
+        char path[PATH_MAX];
+        snprintf(path, sizeof(path), "/proc/self/fd/%d", fd);
+        chmod(path, 0000);
+
+But for most major filesystems with POSIX ACL support such as btrfs,
+ext4, ceph, tmpfs, xfs and others this will fail with EOPNOTSUPP with
+the mode still updated due to the aforementioned posix_acl_chmod()
+nonsense.
+
+So, given that for all major filesystems this would fail with EOPNOTSUPP
+and that both glibc (cf. [1]) and musl (cf. [2]) outright block mode
+changes on symlinks we should just try and block mode changes on
+symlinks directly in the vfs and have a clean break with this nonsense.
+
+If this causes any regressions, we do the next best thing and fix up all
+filesystems that do return EOPNOTSUPP with the mode updated to not call
+posix_acl_chmod() on symlinks.
+
+But as usual, let's try the clean cut solution first. It's a simple
+patch that can be easily reverted. Not marking this for backport as I'll
+do that manually if we're reasonably sure that this works and there are
+no strong objections.
+
+We could block this in chmod_common() but it's more appropriate to do it
+notify_change() as it will also mean that we catch filesystems that
+change symlink permissions explicitly or accidently.
+
+Similar proposals were floated in the past as in [3] and [4] and again
+recently in [5]. There's also a couple of bugs about this inconsistency
+as in [6] and [7].
+
+Link: https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=99527a3727e44cb8661ee1f743068f108ec93979;hb=HEAD [1]
+Link: https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c [2]
+Link: https://lore.kernel.org/all/20200911065733.GA31579@infradead.org [3]
+Link: https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00518.html [4]
+Link: https://lore.kernel.org/lkml/87lefmbppo.fsf@oldenburg.str.redhat.com [5]
+Link: https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html [6]
+Link: https://sourceware.org/bugzilla/show_bug.cgi?id=14578#c17 [7]
+Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: stable@vger.kernel.org # please backport to all LTSes but not before v6.6-rc2 is tagged
+Suggested-by: Christoph Hellwig <hch@lst.de>
+Suggested-by: Florian Weimer <fweimer@redhat.com>
+Message-Id: <20230712-vfs-chmod-symlinks-v2-1-08cfb92b61dd@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/attr.c |   20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -309,9 +309,25 @@ int notify_change(struct dentry * dentry
+       }
+ 
+       if ((ia_valid & ATTR_MODE)) {
+-              umode_t amode = attr->ia_mode;
++              /*
++               * Don't allow changing the mode of symlinks:
++               *
++               * (1) The vfs doesn't take the mode of symlinks into account
++               *     during permission checking.
++               * (2) This has never worked correctly. Most major filesystems
++               *     did return EOPNOTSUPP due to interactions with POSIX ACLs
++               *     but did still updated the mode of the symlink.
++               *     This inconsistency led system call wrapper providers such
++               *     as libc to block changing the mode of symlinks with
++               *     EOPNOTSUPP already.
++               * (3) To even do this in the first place one would have to use
++               *     specific file descriptors and quite some effort.
++               */
++              if (S_ISLNK(inode->i_mode))
++                      return -EOPNOTSUPP;
++
+               /* Flag setting protected by i_mutex */
+-              if (is_sxid(amode))
++              if (is_sxid(attr->ia_mode))
+                       inode->i_flags &= ~S_NOSEC;
+       }
+ 
diff --git a/queue-5.10/btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch b/queue-5.10/btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch

new file mode 100644 (file)

index 0000000..cd8afef
--- /dev/null
+++ b/queue-5.10/btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch
@@ -0,0 +1,188 @@
+From e110f8911ddb93e6f55da14ccbbe705397b30d0b Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 29 Aug 2023 11:34:52 +0100
+Subject: btrfs: fix lockdep splat and potential deadlock after failure running delayed items
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit e110f8911ddb93e6f55da14ccbbe705397b30d0b upstream.
+
+When running delayed items we are holding a delayed node's mutex and then
+we will attempt to modify a subvolume btree to insert/update/delete the
+delayed items. However if have an error during the insertions for example,
+btrfs_insert_delayed_items() may return with a path that has locked extent
+buffers (a leaf at the very least), and then we attempt to release the
+delayed node at __btrfs_run_delayed_items(), which requires taking the
+delayed node's mutex, causing an ABBA type of deadlock. This was reported
+by syzbot and the lockdep splat is the following:
+
+  WARNING: possible circular locking dependency detected
+  6.5.0-rc7-syzkaller-00024-g93f5de5f648d #0 Not tainted
+  ------------------------------------------------------
+  syz-executor.2/13257 is trying to acquire lock:
+  ffff88801835c0c0 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node+0x9a/0xaa0 fs/btrfs/delayed-inode.c:256
+
+  but task is already holding lock:
+  ffff88802a5ab8e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_lock+0x3c/0x2a0 fs/btrfs/locking.c:198
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #1 (btrfs-tree-00){++++}-{3:3}:
+         __lock_release kernel/locking/lockdep.c:5475 [inline]
+         lock_release+0x36f/0x9d0 kernel/locking/lockdep.c:5781
+         up_write+0x79/0x580 kernel/locking/rwsem.c:1625
+         btrfs_tree_unlock_rw fs/btrfs/locking.h:189 [inline]
+         btrfs_unlock_up_safe+0x179/0x3b0 fs/btrfs/locking.c:239
+         search_leaf fs/btrfs/ctree.c:1986 [inline]
+         btrfs_search_slot+0x2511/0x2f80 fs/btrfs/ctree.c:2230
+         btrfs_insert_empty_items+0x9c/0x180 fs/btrfs/ctree.c:4376
+         btrfs_insert_delayed_item fs/btrfs/delayed-inode.c:746 [inline]
+         btrfs_insert_delayed_items fs/btrfs/delayed-inode.c:824 [inline]
+         __btrfs_commit_inode_delayed_items+0xd24/0x2410 fs/btrfs/delayed-inode.c:1111
+         __btrfs_run_delayed_items+0x1db/0x430 fs/btrfs/delayed-inode.c:1153
+         flush_space+0x269/0xe70 fs/btrfs/space-info.c:723
+         btrfs_async_reclaim_metadata_space+0x106/0x350 fs/btrfs/space-info.c:1078
+         process_one_work+0x92c/0x12c0 kernel/workqueue.c:2600
+         worker_thread+0xa63/0x1210 kernel/workqueue.c:2751
+         kthread+0x2b8/0x350 kernel/kthread.c:389
+         ret_from_fork+0x2e/0x60 arch/x86/kernel/process.c:145
+         ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+  -> #0 (&delayed_node->mutex){+.+.}-{3:3}:
+         check_prev_add kernel/locking/lockdep.c:3142 [inline]
+         check_prevs_add kernel/locking/lockdep.c:3261 [inline]
+         validate_chain kernel/locking/lockdep.c:3876 [inline]
+         __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144
+         lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761
+         __mutex_lock_common+0x1d8/0x2530 kernel/locking/mutex.c:603
+         __mutex_lock kernel/locking/mutex.c:747 [inline]
+         mutex_lock_nested+0x1b/0x20 kernel/locking/mutex.c:799
+         __btrfs_release_delayed_node+0x9a/0xaa0 fs/btrfs/delayed-inode.c:256
+         btrfs_release_delayed_node fs/btrfs/delayed-inode.c:281 [inline]
+         __btrfs_run_delayed_items+0x2b5/0x430 fs/btrfs/delayed-inode.c:1156
+         btrfs_commit_transaction+0x859/0x2ff0 fs/btrfs/transaction.c:2276
+         btrfs_sync_file+0xf56/0x1330 fs/btrfs/file.c:1988
+         vfs_fsync_range fs/sync.c:188 [inline]
+         vfs_fsync fs/sync.c:202 [inline]
+         do_fsync fs/sync.c:212 [inline]
+         __do_sys_fsync fs/sync.c:220 [inline]
+         __se_sys_fsync fs/sync.c:218 [inline]
+         __x64_sys_fsync+0x196/0x1e0 fs/sync.c:218
+         do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+         do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+         entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+  other info that might help us debug this:
+
+   Possible unsafe locking scenario:
+
+         CPU0                    CPU1
+         ----                    ----
+    lock(btrfs-tree-00);
+                                 lock(&delayed_node->mutex);
+                                 lock(btrfs-tree-00);
+    lock(&delayed_node->mutex);
+
+   *** DEADLOCK ***
+
+  3 locks held by syz-executor.2/13257:
+   #0: ffff88802c1ee370 (btrfs_trans_num_writers){++++}-{0:0}, at: spin_unlock include/linux/spinlock.h:391 [inline]
+   #0: ffff88802c1ee370 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0xb87/0xe00 fs/btrfs/transaction.c:287
+   #1: ffff88802c1ee398 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0xbb2/0xe00 fs/btrfs/transaction.c:288
+   #2: ffff88802a5ab8e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_lock+0x3c/0x2a0 fs/btrfs/locking.c:198
+
+  stack backtrace:
+  CPU: 0 PID: 13257 Comm: syz-executor.2 Not tainted 6.5.0-rc7-syzkaller-00024-g93f5de5f648d #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+  Call Trace:
+   <TASK>
+   __dump_stack lib/dump_stack.c:88 [inline]
+   dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
+   check_noncircular+0x375/0x4a0 kernel/locking/lockdep.c:2195
+   check_prev_add kernel/locking/lockdep.c:3142 [inline]
+   check_prevs_add kernel/locking/lockdep.c:3261 [inline]
+   validate_chain kernel/locking/lockdep.c:3876 [inline]
+   __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144
+   lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761
+   __mutex_lock_common+0x1d8/0x2530 kernel/locking/mutex.c:603
+   __mutex_lock kernel/locking/mutex.c:747 [inline]
+   mutex_lock_nested+0x1b/0x20 kernel/locking/mutex.c:799
+   __btrfs_release_delayed_node+0x9a/0xaa0 fs/btrfs/delayed-inode.c:256
+   btrfs_release_delayed_node fs/btrfs/delayed-inode.c:281 [inline]
+   __btrfs_run_delayed_items+0x2b5/0x430 fs/btrfs/delayed-inode.c:1156
+   btrfs_commit_transaction+0x859/0x2ff0 fs/btrfs/transaction.c:2276
+   btrfs_sync_file+0xf56/0x1330 fs/btrfs/file.c:1988
+   vfs_fsync_range fs/sync.c:188 [inline]
+   vfs_fsync fs/sync.c:202 [inline]
+   do_fsync fs/sync.c:212 [inline]
+   __do_sys_fsync fs/sync.c:220 [inline]
+   __se_sys_fsync fs/sync.c:218 [inline]
+   __x64_sys_fsync+0x196/0x1e0 fs/sync.c:218
+   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+   do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+   entry_SYSCALL_64_after_hwframe+0x63/0xcd
+  RIP: 0033:0x7f3ad047cae9
+  Code: 28 00 00 00 75 (...)
+  RSP: 002b:00007f3ad12510c8 EFLAGS: 00000246 ORIG_RAX: 000000000000004a
+  RAX: ffffffffffffffda RBX: 00007f3ad059bf80 RCX: 00007f3ad047cae9
+  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005
+  RBP: 00007f3ad04c847a R08: 0000000000000000 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+  R13: 000000000000000b R14: 00007f3ad059bf80 R15: 00007ffe56af92f8
+   </TASK>
+  ------------[ cut here ]------------
+
+Fix this by releasing the path before releasing the delayed node in the
+error path at __btrfs_run_delayed_items().
+
+Reported-by: syzbot+a379155f07c134ea9879@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/000000000000abba27060403b5bd@google.com/
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/delayed-inode.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1173,20 +1173,33 @@ static int __btrfs_run_delayed_items(str
+               ret = __btrfs_commit_inode_delayed_items(trans, path,
+                                                        curr_node);
+               if (ret) {
+-                      btrfs_release_delayed_node(curr_node);
+-                      curr_node = NULL;
+                       btrfs_abort_transaction(trans, ret);
+                       break;
+               }
+ 
+               prev_node = curr_node;
+               curr_node = btrfs_next_delayed_node(curr_node);
++              /*
++               * See the comment below about releasing path before releasing
++               * node. If the commit of delayed items was successful the path
++               * should always be released, but in case of an error, it may
++               * point to locked extent buffers (a leaf at the very least).
++               */
++              ASSERT(path->nodes[0] == NULL);
+               btrfs_release_delayed_node(prev_node);
+       }
+ 
++      /*
++       * Release the path to avoid a potential deadlock and lockdep splat when
++       * releasing the delayed node, as that requires taking the delayed node's
++       * mutex. If another task starts running delayed items before we take
++       * the mutex, it will first lock the mutex and then it may try to lock
++       * the same btree path (leaf).
++       */
++      btrfs_free_path(path);
++
+       if (curr_node)
+               btrfs_release_delayed_node(curr_node);
+-      btrfs_free_path(path);
+       trans->block_rsv = block_rsv;
+ 
+       return ret;
diff --git a/queue-5.10/btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch b/queue-5.10/btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch

new file mode 100644 (file)

index 0000000..48d675a
--- /dev/null
+++ b/queue-5.10/btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch
@@ -0,0 +1,169 @@
+From ee34a82e890a7babb5585daf1a6dd7d4d1cf142a Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sat, 26 Aug 2023 11:28:20 +0100
+Subject: btrfs: release path before inode lookup during the ino lookup ioctl
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit ee34a82e890a7babb5585daf1a6dd7d4d1cf142a upstream.
+
+During the ino lookup ioctl we can end up calling btrfs_iget() to get an
+inode reference while we are holding on a root's btree. If btrfs_iget()
+needs to lookup the inode from the root's btree, because it's not
+currently loaded in memory, then it will need to lock another or the
+same path in the same root btree. This may result in a deadlock and
+trigger the following lockdep splat:
+
+  WARNING: possible circular locking dependency detected
+  6.5.0-rc7-syzkaller-00004-gf7757129e3de #0 Not tainted
+  ------------------------------------------------------
+  syz-executor277/5012 is trying to acquire lock:
+  ffff88802df41710 (btrfs-tree-01){++++}-{3:3}, at: __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136
+
+  but task is already holding lock:
+  ffff88802df418e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136
+
+  which lock already depends on the new lock.
+
+  the existing dependency chain (in reverse order) is:
+
+  -> #1 (btrfs-tree-00){++++}-{3:3}:
+         down_read_nested+0x49/0x2f0 kernel/locking/rwsem.c:1645
+         __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136
+         btrfs_search_slot+0x13a4/0x2f80 fs/btrfs/ctree.c:2302
+         btrfs_init_root_free_objectid+0x148/0x320 fs/btrfs/disk-io.c:4955
+         btrfs_init_fs_root fs/btrfs/disk-io.c:1128 [inline]
+         btrfs_get_root_ref+0x5ae/0xae0 fs/btrfs/disk-io.c:1338
+         btrfs_get_fs_root fs/btrfs/disk-io.c:1390 [inline]
+         open_ctree+0x29c8/0x3030 fs/btrfs/disk-io.c:3494
+         btrfs_fill_super+0x1c7/0x2f0 fs/btrfs/super.c:1154
+         btrfs_mount_root+0x7e0/0x910 fs/btrfs/super.c:1519
+         legacy_get_tree+0xef/0x190 fs/fs_context.c:611
+         vfs_get_tree+0x8c/0x270 fs/super.c:1519
+         fc_mount fs/namespace.c:1112 [inline]
+         vfs_kern_mount+0xbc/0x150 fs/namespace.c:1142
+         btrfs_mount+0x39f/0xb50 fs/btrfs/super.c:1579
+         legacy_get_tree+0xef/0x190 fs/fs_context.c:611
+         vfs_get_tree+0x8c/0x270 fs/super.c:1519
+         do_new_mount+0x28f/0xae0 fs/namespace.c:3335
+         do_mount fs/namespace.c:3675 [inline]
+         __do_sys_mount fs/namespace.c:3884 [inline]
+         __se_sys_mount+0x2d9/0x3c0 fs/namespace.c:3861
+         do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+         do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+         entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+  -> #0 (btrfs-tree-01){++++}-{3:3}:
+         check_prev_add kernel/locking/lockdep.c:3142 [inline]
+         check_prevs_add kernel/locking/lockdep.c:3261 [inline]
+         validate_chain kernel/locking/lockdep.c:3876 [inline]
+         __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144
+         lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761
+         down_read_nested+0x49/0x2f0 kernel/locking/rwsem.c:1645
+         __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136
+         btrfs_tree_read_lock fs/btrfs/locking.c:142 [inline]
+         btrfs_read_lock_root_node+0x292/0x3c0 fs/btrfs/locking.c:281
+         btrfs_search_slot_get_root fs/btrfs/ctree.c:1832 [inline]
+         btrfs_search_slot+0x4ff/0x2f80 fs/btrfs/ctree.c:2154
+         btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:412
+         btrfs_read_locked_inode fs/btrfs/inode.c:3892 [inline]
+         btrfs_iget_path+0x2d9/0x1520 fs/btrfs/inode.c:5716
+         btrfs_search_path_in_tree_user fs/btrfs/ioctl.c:1961 [inline]
+         btrfs_ioctl_ino_lookup_user+0x77a/0xf50 fs/btrfs/ioctl.c:2105
+         btrfs_ioctl+0xb0b/0xd40 fs/btrfs/ioctl.c:4683
+         vfs_ioctl fs/ioctl.c:51 [inline]
+         __do_sys_ioctl fs/ioctl.c:870 [inline]
+         __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:856
+         do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+         do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+         entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+  other info that might help us debug this:
+
+   Possible unsafe locking scenario:
+
+         CPU0                    CPU1
+         ----                    ----
+    rlock(btrfs-tree-00);
+                                 lock(btrfs-tree-01);
+                                 lock(btrfs-tree-00);
+    rlock(btrfs-tree-01);
+
+   *** DEADLOCK ***
+
+  1 lock held by syz-executor277/5012:
+   #0: ffff88802df418e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136
+
+  stack backtrace:
+  CPU: 1 PID: 5012 Comm: syz-executor277 Not tainted 6.5.0-rc7-syzkaller-00004-gf7757129e3de #0
+  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+  Call Trace:
+   <TASK>
+   __dump_stack lib/dump_stack.c:88 [inline]
+   dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
+   check_noncircular+0x375/0x4a0 kernel/locking/lockdep.c:2195
+   check_prev_add kernel/locking/lockdep.c:3142 [inline]
+   check_prevs_add kernel/locking/lockdep.c:3261 [inline]
+   validate_chain kernel/locking/lockdep.c:3876 [inline]
+   __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144
+   lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761
+   down_read_nested+0x49/0x2f0 kernel/locking/rwsem.c:1645
+   __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136
+   btrfs_tree_read_lock fs/btrfs/locking.c:142 [inline]
+   btrfs_read_lock_root_node+0x292/0x3c0 fs/btrfs/locking.c:281
+   btrfs_search_slot_get_root fs/btrfs/ctree.c:1832 [inline]
+   btrfs_search_slot+0x4ff/0x2f80 fs/btrfs/ctree.c:2154
+   btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:412
+   btrfs_read_locked_inode fs/btrfs/inode.c:3892 [inline]
+   btrfs_iget_path+0x2d9/0x1520 fs/btrfs/inode.c:5716
+   btrfs_search_path_in_tree_user fs/btrfs/ioctl.c:1961 [inline]
+   btrfs_ioctl_ino_lookup_user+0x77a/0xf50 fs/btrfs/ioctl.c:2105
+   btrfs_ioctl+0xb0b/0xd40 fs/btrfs/ioctl.c:4683
+   vfs_ioctl fs/ioctl.c:51 [inline]
+   __do_sys_ioctl fs/ioctl.c:870 [inline]
+   __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:856
+   do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+   do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+   entry_SYSCALL_64_after_hwframe+0x63/0xcd
+  RIP: 0033:0x7f0bec94ea39
+
+Fix this simply by releasing the path before calling btrfs_iget() as at
+point we don't need the path anymore.
+
+Reported-by: syzbot+bf66ad948981797d2f1d@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/00000000000045fa140603c4a969@google.com/
+Fixes: 23d0b79dfaed ("btrfs: Add unprivileged version of ino_lookup ioctl")
+CC: stable@vger.kernel.org # 4.19+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -2550,6 +2550,13 @@ static int btrfs_search_path_in_tree_use
+                               goto out_put;
+                       }
+ 
++                      /*
++                       * We don't need the path anymore, so release it and
++                       * avoid deadlocks and lockdep warnings in case
++                       * btrfs_iget() needs to lookup the inode from its root
++                       * btree and lock the same leaf.
++                       */
++                      btrfs_release_path(path);
+                       temp_inode = btrfs_iget(sb, key2.objectid, root);
+                       if (IS_ERR(temp_inode)) {
+                               ret = PTR_ERR(temp_inode);
+@@ -2569,7 +2576,6 @@ static int btrfs_search_path_in_tree_use
+                               goto out_put;
+                       }
+ 
+-                      btrfs_release_path(path);
+                       key.objectid = key.offset;
+                       key.offset = (u64)-1;
+                       dirid = key.objectid;
diff --git a/queue-5.10/ext4-fix-rec_len-verify-error.patch b/queue-5.10/ext4-fix-rec_len-verify-error.patch

new file mode 100644 (file)

index 0000000..9e0e6a7
--- /dev/null
+++ b/queue-5.10/ext4-fix-rec_len-verify-error.patch
@@ -0,0 +1,122 @@
+From 7fda67e8c3ab6069f75888f67958a6d30454a9f6 Mon Sep 17 00:00:00 2001
+From: Shida Zhang <zhangshida@kylinos.cn>
+Date: Thu, 3 Aug 2023 14:09:38 +0800
+Subject: ext4: fix rec_len verify error
+
+From: Shida Zhang <zhangshida@kylinos.cn>
+
+commit 7fda67e8c3ab6069f75888f67958a6d30454a9f6 upstream.
+
+With the configuration PAGE_SIZE 64k and filesystem blocksize 64k,
+a problem occurred when more than 13 million files were directly created
+under a directory:
+
+EXT4-fs error (device xx): ext4_dx_csum_set:492: inode #xxxx: comm xxxxx: dir seems corrupt?  Run e2fsck -D.
+EXT4-fs error (device xx): ext4_dx_csum_verify:463: inode #xxxx: comm xxxxx: dir seems corrupt?  Run e2fsck -D.
+EXT4-fs error (device xx): dx_probe:856: inode #xxxx: block 8188: comm xxxxx: Directory index failed checksum
+
+When enough files are created, the fake_dirent->reclen will be 0xffff.
+it doesn't equal to the blocksize 65536, i.e. 0x10000.
+
+But it is not the same condition when blocksize equals to 4k.
+when enough files are created, the fake_dirent->reclen will be 0x1000.
+it equals to the blocksize 4k, i.e. 0x1000.
+
+The problem seems to be related to the limitation of the 16-bit field
+when the blocksize is set to 64k.
+To address this, helpers like ext4_rec_len_{from,to}_disk has already
+been introduced to complete the conversion between the encoded and the
+plain form of rec_len.
+
+So fix this one by using the helper, and all the other in this file too.
+
+Cc: stable@kernel.org
+Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes")
+Suggested-by: Andreas Dilger <adilger@dilger.ca>
+Suggested-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Shida Zhang <zhangshida@kylinos.cn>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Link: https://lore.kernel.org/r/20230803060938.1929759-1-zhangshida@kylinos.cn
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/namei.c |   26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -339,17 +339,17 @@ static struct ext4_dir_entry_tail *get_d
+                                                  struct buffer_head *bh)
+ {
+       struct ext4_dir_entry_tail *t;
++      int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
+ 
+ #ifdef PARANOID
+       struct ext4_dir_entry *d, *top;
+ 
+       d = (struct ext4_dir_entry *)bh->b_data;
+       top = (struct ext4_dir_entry *)(bh->b_data +
+-              (EXT4_BLOCK_SIZE(inode->i_sb) -
+-               sizeof(struct ext4_dir_entry_tail)));
+-      while (d < top && d->rec_len)
++              (blocksize - sizeof(struct ext4_dir_entry_tail)));
++      while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize))
+               d = (struct ext4_dir_entry *)(((void *)d) +
+-                  le16_to_cpu(d->rec_len));
++                  ext4_rec_len_from_disk(d->rec_len, blocksize));
+ 
+       if (d != top)
+               return NULL;
+@@ -360,7 +360,8 @@ static struct ext4_dir_entry_tail *get_d
+ #endif
+ 
+       if (t->det_reserved_zero1 ||
+-          le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
++          (ext4_rec_len_from_disk(t->det_rec_len, blocksize) !=
++           sizeof(struct ext4_dir_entry_tail)) ||
+           t->det_reserved_zero2 ||
+           t->det_reserved_ft != EXT4_FT_DIR_CSUM)
+               return NULL;
+@@ -441,13 +442,14 @@ static struct dx_countlimit *get_dx_coun
+       struct ext4_dir_entry *dp;
+       struct dx_root_info *root;
+       int count_offset;
++      int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
++      unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
+ 
+-      if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
++      if (rlen == blocksize)
+               count_offset = 8;
+-      else if (le16_to_cpu(dirent->rec_len) == 12) {
++      else if (rlen == 12) {
+               dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
+-              if (le16_to_cpu(dp->rec_len) !=
+-                  EXT4_BLOCK_SIZE(inode->i_sb) - 12)
++              if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12)
+                       return NULL;
+               root = (struct dx_root_info *)(((void *)dp + 12));
+               if (root->reserved_zero ||
+@@ -1256,6 +1258,7 @@ static int dx_make_map(struct inode *dir
+       unsigned int buflen = bh->b_size;
+       char *base = bh->b_data;
+       struct dx_hash_info h = *hinfo;
++      int blocksize = EXT4_BLOCK_SIZE(dir->i_sb);
+ 
+       if (ext4_has_metadata_csum(dir->i_sb))
+               buflen -= sizeof(struct ext4_dir_entry_tail);
+@@ -1269,11 +1272,12 @@ static int dx_make_map(struct inode *dir
+                       map_tail--;
+                       map_tail->hash = h.hash;
+                       map_tail->offs = ((char *) de - base)>>2;
+-                      map_tail->size = le16_to_cpu(de->rec_len);
++                      map_tail->size = ext4_rec_len_from_disk(de->rec_len,
++                                                              blocksize);
+                       count++;
+                       cond_resched();
+               }
+-              de = ext4_next_entry(de, dir->i_sb->s_blocksize);
++              de = ext4_next_entry(de, blocksize);
+       }
+       return count;
+ }
diff --git a/queue-5.10/i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch b/queue-5.10/i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch

new file mode 100644 (file)

index 0000000..572493e
--- /dev/null
+++ b/queue-5.10/i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch
@@ -0,0 +1,44 @@
+From fee465150b458351b6d9b9f66084f3cc3022b88b Mon Sep 17 00:00:00 2001
+From: Tommy Huang <tommy_huang@aspeedtech.com>
+Date: Wed, 6 Sep 2023 08:49:10 +0800
+Subject: i2c: aspeed: Reset the i2c controller when timeout occurs
+
+From: Tommy Huang <tommy_huang@aspeedtech.com>
+
+commit fee465150b458351b6d9b9f66084f3cc3022b88b upstream.
+
+Reset the i2c controller when an i2c transfer timeout occurs.
+The remaining interrupts and device should be reset to avoid
+unpredictable controller behavior.
+
+Fixes: 2e57b7cebb98 ("i2c: aspeed: Add multi-master use case support")
+Cc: <stable@vger.kernel.org> # v5.1+
+Signed-off-by: Tommy Huang <tommy_huang@aspeedtech.com>
+Reviewed-by: Andi Shyti <andi.shyti@kernel.org>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-aspeed.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-aspeed.c
++++ b/drivers/i2c/busses/i2c-aspeed.c
+@@ -693,13 +693,16 @@ static int aspeed_i2c_master_xfer(struct
+ 
+       if (time_left == 0) {
+               /*
+-               * If timed out and bus is still busy in a multi master
+-               * environment, attempt recovery at here.
++               * In a multi-master setup, if a timeout occurs, attempt
++               * recovery. But if the bus is idle, we still need to reset the
++               * i2c controller to clear the remaining interrupts.
+                */
+               if (bus->multi_master &&
+                   (readl(bus->base + ASPEED_I2C_CMD_REG) &
+                    ASPEED_I2CD_BUS_BUSY_STS))
+                       aspeed_i2c_recover_bus(bus);
++              else
++                      aspeed_i2c_reset(bus);
+ 
+               /*
+                * If timed out and the state is still pending, drop the pending
diff --git a/queue-5.10/nfsd-fix-change_info-in-nfsv4-rename-replies.patch b/queue-5.10/nfsd-fix-change_info-in-nfsv4-rename-replies.patch

new file mode 100644 (file)

index 0000000..f2f21b8
--- /dev/null
+++ b/queue-5.10/nfsd-fix-change_info-in-nfsv4-rename-replies.patch
@@ -0,0 +1,36 @@
+From fdd2630a7398191e84822612e589062063bd4f3d Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Sat, 9 Sep 2023 07:12:30 -0400
+Subject: nfsd: fix change_info in NFSv4 RENAME replies
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit fdd2630a7398191e84822612e589062063bd4f3d upstream.
+
+nfsd sends the transposed directory change info in the RENAME reply. The
+source directory is in save_fh and the target is in current_fh.
+
+Reported-by: Zhi Li <yieli@redhat.com>
+Reported-by: Benjamin Coddington <bcodding@redhat.com>
+Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2218844
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4proc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -881,8 +881,8 @@ nfsd4_rename(struct svc_rqst *rqstp, str
+                            rename->rn_tname, rename->rn_tnamelen);
+       if (status)
+               return status;
+-      set_change_info(&rename->rn_sinfo, &cstate->current_fh);
+-      set_change_info(&rename->rn_tinfo, &cstate->save_fh);
++      set_change_info(&rename->rn_sinfo, &cstate->save_fh);
++      set_change_info(&rename->rn_tinfo, &cstate->current_fh);
+       return nfs_ok;
+ }
+ 
diff --git a/queue-5.10/ovl-fix-incorrect-fdput-on-aio-completion.patch b/queue-5.10/ovl-fix-incorrect-fdput-on-aio-completion.patch

new file mode 100644 (file)

index 0000000..a05e055
--- /dev/null
+++ b/queue-5.10/ovl-fix-incorrect-fdput-on-aio-completion.patch
@@ -0,0 +1,69 @@
+From 724768a39374d35b70eaeae8dd87048a2ec7ae8e Mon Sep 17 00:00:00 2001
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Tue, 22 Aug 2023 20:50:59 +0300
+Subject: ovl: fix incorrect fdput() on aio completion
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+commit 724768a39374d35b70eaeae8dd87048a2ec7ae8e upstream.
+
+ovl_{read,write}_iter() always call fdput(real) to put one or zero
+refcounts of the real file, but for aio, whether it was submitted or not,
+ovl_aio_put() also calls fdput(), which is not balanced.  This is only a
+problem in the less common case when FDPUT_FPUT flag is set.
+
+To fix the problem use get_file() to take file refcount and use fput()
+instead of fdput() in ovl_aio_put().
+
+Fixes: 2406a307ac7d ("ovl: implement async IO routines")
+Cc: <stable@vger.kernel.org> # v5.6
+Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/overlayfs/file.c |    9 +++------
+ 1 file changed, 3 insertions(+), 6 deletions(-)
+
+--- a/fs/overlayfs/file.c
++++ b/fs/overlayfs/file.c
+@@ -19,7 +19,6 @@ struct ovl_aio_req {
+       struct kiocb iocb;
+       refcount_t ref;
+       struct kiocb *orig_iocb;
+-      struct fd fd;
+ };
+ 
+ static struct kmem_cache *ovl_aio_request_cachep;
+@@ -261,7 +260,7 @@ static rwf_t ovl_iocb_to_rwf(int ifl)
+ static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
+ {
+       if (refcount_dec_and_test(&aio_req->ref)) {
+-              fdput(aio_req->fd);
++              fput(aio_req->iocb.ki_filp);
+               kmem_cache_free(ovl_aio_request_cachep, aio_req);
+       }
+ }
+@@ -327,10 +326,9 @@ static ssize_t ovl_read_iter(struct kioc
+               if (!aio_req)
+                       goto out;
+ 
+-              aio_req->fd = real;
+               real.flags = 0;
+               aio_req->orig_iocb = iocb;
+-              kiocb_clone(&aio_req->iocb, iocb, real.file);
++              kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
+               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+               refcount_set(&aio_req->ref, 2);
+               ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+@@ -399,10 +397,9 @@ static ssize_t ovl_write_iter(struct kio
+               /* Pacify lockdep, same trick as done in aio_write() */
+               __sb_writers_release(file_inode(real.file)->i_sb,
+                                    SB_FREEZE_WRITE);
+-              aio_req->fd = real;
+               real.flags = 0;
+               aio_req->orig_iocb = iocb;
+-              kiocb_clone(&aio_req->iocb, iocb, real.file);
++              kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
+               aio_req->iocb.ki_flags = ifl;
+               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+               refcount_set(&aio_req->ref, 2);
diff --git a/queue-5.10/scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch b/queue-5.10/scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch

new file mode 100644 (file)

index 0000000..8740e6a
--- /dev/null
+++ b/queue-5.10/scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch
@@ -0,0 +1,175 @@
+From 0b0747d507bffb827e40fc0f9fb5883fffc23477 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <junxiao.bi@oracle.com>
+Date: Mon, 28 Aug 2023 15:10:18 -0700
+Subject: scsi: megaraid_sas: Fix deadlock on firmware crashdump
+
+From: Junxiao Bi <junxiao.bi@oracle.com>
+
+commit 0b0747d507bffb827e40fc0f9fb5883fffc23477 upstream.
+
+The following processes run into a deadlock. CPU 41 was waiting for CPU 29
+to handle a CSD request while holding spinlock "crashdump_lock", but CPU 29
+was hung by that spinlock with IRQs disabled.
+
+  PID: 17360    TASK: ffff95c1090c5c40  CPU: 41  COMMAND: "mrdiagd"
+  !# 0 [ffffb80edbf37b58] __read_once_size at ffffffff9b871a40 include/linux/compiler.h:185:0
+  !# 1 [ffffb80edbf37b58] atomic_read at ffffffff9b871a40 arch/x86/include/asm/atomic.h:27:0
+  !# 2 [ffffb80edbf37b58] dump_stack at ffffffff9b871a40 lib/dump_stack.c:54:0
+   # 3 [ffffb80edbf37b78] csd_lock_wait_toolong at ffffffff9b131ad5 kernel/smp.c:364:0
+   # 4 [ffffb80edbf37b78] __csd_lock_wait at ffffffff9b131ad5 kernel/smp.c:384:0
+   # 5 [ffffb80edbf37bf8] csd_lock_wait at ffffffff9b13267a kernel/smp.c:394:0
+   # 6 [ffffb80edbf37bf8] smp_call_function_many at ffffffff9b13267a kernel/smp.c:843:0
+   # 7 [ffffb80edbf37c50] smp_call_function at ffffffff9b13279d kernel/smp.c:867:0
+   # 8 [ffffb80edbf37c50] on_each_cpu at ffffffff9b13279d kernel/smp.c:976:0
+   # 9 [ffffb80edbf37c78] flush_tlb_kernel_range at ffffffff9b085c4b arch/x86/mm/tlb.c:742:0
+   #10 [ffffb80edbf37cb8] __purge_vmap_area_lazy at ffffffff9b23a1e0 mm/vmalloc.c:701:0
+   #11 [ffffb80edbf37ce0] try_purge_vmap_area_lazy at ffffffff9b23a2cc mm/vmalloc.c:722:0
+   #12 [ffffb80edbf37ce0] free_vmap_area_noflush at ffffffff9b23a2cc mm/vmalloc.c:754:0
+   #13 [ffffb80edbf37cf8] free_unmap_vmap_area at ffffffff9b23bb3b mm/vmalloc.c:764:0
+   #14 [ffffb80edbf37cf8] remove_vm_area at ffffffff9b23bb3b mm/vmalloc.c:1509:0
+   #15 [ffffb80edbf37d18] __vunmap at ffffffff9b23bb8a mm/vmalloc.c:1537:0
+   #16 [ffffb80edbf37d40] vfree at ffffffff9b23bc85 mm/vmalloc.c:1612:0
+   #17 [ffffb80edbf37d58] megasas_free_host_crash_buffer [megaraid_sas] at ffffffffc020b7f2 drivers/scsi/megaraid/megaraid_sas_fusion.c:3932:0
+   #18 [ffffb80edbf37d80] fw_crash_state_store [megaraid_sas] at ffffffffc01f804d drivers/scsi/megaraid/megaraid_sas_base.c:3291:0
+   #19 [ffffb80edbf37dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0
+   #20 [ffffb80edbf37dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0
+   #21 [ffffb80edbf37de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0
+   #22 [ffffb80edbf37e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0
+   #23 [ffffb80edbf37ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0
+   #24 [ffffb80edbf37ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0
+   #25 [ffffb80edbf37ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0
+   #26 [ffffb80edbf37f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0
+   #27 [ffffb80edbf37f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0
+
+  PID: 17355    TASK: ffff95c1090c3d80  CPU: 29  COMMAND: "mrdiagd"
+  !# 0 [ffffb80f2d3c7d30] __read_once_size at ffffffff9b0f2ab0 include/linux/compiler.h:185:0
+  !# 1 [ffffb80f2d3c7d30] native_queued_spin_lock_slowpath at ffffffff9b0f2ab0 kernel/locking/qspinlock.c:368:0
+   # 2 [ffffb80f2d3c7d58] pv_queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/paravirt.h:674:0
+   # 3 [ffffb80f2d3c7d58] queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/qspinlock.h:53:0
+   # 4 [ffffb80f2d3c7d68] queued_spin_lock at ffffffff9b8961a6 include/asm-generic/qspinlock.h:90:0
+   # 5 [ffffb80f2d3c7d68] do_raw_spin_lock_flags at ffffffff9b8961a6 include/linux/spinlock.h:173:0
+   # 6 [ffffb80f2d3c7d68] __raw_spin_lock_irqsave at ffffffff9b8961a6 include/linux/spinlock_api_smp.h:122:0
+   # 7 [ffffb80f2d3c7d68] _raw_spin_lock_irqsave at ffffffff9b8961a6 kernel/locking/spinlock.c:160:0
+   # 8 [ffffb80f2d3c7d88] fw_crash_buffer_store [megaraid_sas] at ffffffffc01f8129 drivers/scsi/megaraid/megaraid_sas_base.c:3205:0
+   # 9 [ffffb80f2d3c7dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0
+   #10 [ffffb80f2d3c7dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0
+   #11 [ffffb80f2d3c7de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0
+   #12 [ffffb80f2d3c7e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0
+   #13 [ffffb80f2d3c7ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0
+   #14 [ffffb80f2d3c7ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0
+   #15 [ffffb80f2d3c7ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0
+   #16 [ffffb80f2d3c7f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0
+   #17 [ffffb80f2d3c7f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0
+
+The lock is used to synchronize different sysfs operations, it doesn't
+protect any resource that will be touched by an interrupt. Consequently
+it's not required to disable IRQs. Replace the spinlock with a mutex to fix
+the deadlock.
+
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Link: https://lore.kernel.org/r/20230828221018.19471-1-junxiao.bi@oracle.com
+Reviewed-by: Mike Christie <michael.christie@oracle.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/megaraid/megaraid_sas.h      |    2 +-
+ drivers/scsi/megaraid/megaraid_sas_base.c |   21 +++++++++------------
+ 2 files changed, 10 insertions(+), 13 deletions(-)
+
+--- a/drivers/scsi/megaraid/megaraid_sas.h
++++ b/drivers/scsi/megaraid/megaraid_sas.h
+@@ -2327,7 +2327,7 @@ struct megasas_instance {
+       u32 support_morethan256jbod; /* FW support for more than 256 PD/JBOD */
+       bool use_seqnum_jbod_fp;   /* Added for PD sequence */
+       bool smp_affinity_enable;
+-      spinlock_t crashdump_lock;
++      struct mutex crashdump_lock;
+ 
+       struct megasas_register_set __iomem *reg_set;
+       u32 __iomem *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY];
+--- a/drivers/scsi/megaraid/megaraid_sas_base.c
++++ b/drivers/scsi/megaraid/megaraid_sas_base.c
+@@ -3221,14 +3221,13 @@ fw_crash_buffer_store(struct device *cde
+       struct megasas_instance *instance =
+               (struct megasas_instance *) shost->hostdata;
+       int val = 0;
+-      unsigned long flags;
+ 
+       if (kstrtoint(buf, 0, &val) != 0)
+               return -EINVAL;
+ 
+-      spin_lock_irqsave(&instance->crashdump_lock, flags);
++      mutex_lock(&instance->crashdump_lock);
+       instance->fw_crash_buffer_offset = val;
+-      spin_unlock_irqrestore(&instance->crashdump_lock, flags);
++      mutex_unlock(&instance->crashdump_lock);
+       return strlen(buf);
+ }
+ 
+@@ -3243,24 +3242,23 @@ fw_crash_buffer_show(struct device *cdev
+       unsigned long dmachunk = CRASH_DMA_BUF_SIZE;
+       unsigned long chunk_left_bytes;
+       unsigned long src_addr;
+-      unsigned long flags;
+       u32 buff_offset;
+ 
+-      spin_lock_irqsave(&instance->crashdump_lock, flags);
++      mutex_lock(&instance->crashdump_lock);
+       buff_offset = instance->fw_crash_buffer_offset;
+       if (!instance->crash_dump_buf ||
+               !((instance->fw_crash_state == AVAILABLE) ||
+               (instance->fw_crash_state == COPYING))) {
+               dev_err(&instance->pdev->dev,
+                       "Firmware crash dump is not available\n");
+-              spin_unlock_irqrestore(&instance->crashdump_lock, flags);
++              mutex_unlock(&instance->crashdump_lock);
+               return -EINVAL;
+       }
+ 
+       if (buff_offset > (instance->fw_crash_buffer_size * dmachunk)) {
+               dev_err(&instance->pdev->dev,
+                       "Firmware crash dump offset is out of range\n");
+-              spin_unlock_irqrestore(&instance->crashdump_lock, flags);
++              mutex_unlock(&instance->crashdump_lock);
+               return 0;
+       }
+ 
+@@ -3272,7 +3270,7 @@ fw_crash_buffer_show(struct device *cdev
+       src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] +
+               (buff_offset % dmachunk);
+       memcpy(buf, (void *)src_addr, size);
+-      spin_unlock_irqrestore(&instance->crashdump_lock, flags);
++      mutex_unlock(&instance->crashdump_lock);
+ 
+       return size;
+ }
+@@ -3297,7 +3295,6 @@ fw_crash_state_store(struct device *cdev
+       struct megasas_instance *instance =
+               (struct megasas_instance *) shost->hostdata;
+       int val = 0;
+-      unsigned long flags;
+ 
+       if (kstrtoint(buf, 0, &val) != 0)
+               return -EINVAL;
+@@ -3311,9 +3308,9 @@ fw_crash_state_store(struct device *cdev
+       instance->fw_crash_state = val;
+ 
+       if ((val == COPIED) || (val == COPY_ERROR)) {
+-              spin_lock_irqsave(&instance->crashdump_lock, flags);
++              mutex_lock(&instance->crashdump_lock);
+               megasas_free_host_crash_buffer(instance);
+-              spin_unlock_irqrestore(&instance->crashdump_lock, flags);
++              mutex_unlock(&instance->crashdump_lock);
+               if (val == COPY_ERROR)
+                       dev_info(&instance->pdev->dev, "application failed to "
+                               "copy Firmware crash dump\n");
+@@ -7325,7 +7322,7 @@ static inline void megasas_init_ctrl_par
+       init_waitqueue_head(&instance->int_cmd_wait_q);
+       init_waitqueue_head(&instance->abort_cmd_wait_q);
+ 
+-      spin_lock_init(&instance->crashdump_lock);
++      mutex_init(&instance->crashdump_lock);
+       spin_lock_init(&instance->mfi_pool_lock);
+       spin_lock_init(&instance->hba_lock);
+       spin_lock_init(&instance->stream_lock);
diff --git a/queue-5.10/scsi-pm8001-setup-irqs-on-resume.patch b/queue-5.10/scsi-pm8001-setup-irqs-on-resume.patch

new file mode 100644 (file)

index 0000000..e91d358
--- /dev/null
+++ b/queue-5.10/scsi-pm8001-setup-irqs-on-resume.patch
@@ -0,0 +1,117 @@
+From c91774818b041ed290df29fb1dc0725be9b12e83 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Tue, 12 Sep 2023 08:27:36 +0900
+Subject: scsi: pm8001: Setup IRQs on resume
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit c91774818b041ed290df29fb1dc0725be9b12e83 upstream.
+
+The function pm8001_pci_resume() only calls pm8001_request_irq() without
+calling pm8001_setup_irq(). This causes the IRQ allocation to fail, which
+leads all drives being removed from the system.
+
+Fix this issue by integrating the code for pm8001_setup_irq() directly
+inside pm8001_request_irq() so that MSI-X setup is performed both during
+normal initialization and resume operations.
+
+Fixes: dbf9bfe61571 ("[SCSI] pm8001: add SAS/SATA HBA driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Link: https://lore.kernel.org/r/20230911232745.325149-2-dlemoal@kernel.org
+Acked-by: Jack Wang <jinpu.wang@ionos.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/pm8001/pm8001_init.c |   51 ++++++++++++--------------------------
+ 1 file changed, 17 insertions(+), 34 deletions(-)
+
+--- a/drivers/scsi/pm8001/pm8001_init.c
++++ b/drivers/scsi/pm8001/pm8001_init.c
+@@ -255,7 +255,6 @@ static irqreturn_t pm8001_interrupt_hand
+       return ret;
+ }
+ 
+-static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha);
+ static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha);
+ 
+ /**
+@@ -275,13 +274,6 @@ static int pm8001_alloc(struct pm8001_hb
+       pm8001_dbg(pm8001_ha, INIT, "pm8001_alloc: PHY:%x\n",
+                  pm8001_ha->chip->n_phy);
+ 
+-      /* Setup Interrupt */
+-      rc = pm8001_setup_irq(pm8001_ha);
+-      if (rc) {
+-              pm8001_dbg(pm8001_ha, FAIL,
+-                         "pm8001_setup_irq failed [ret: %d]\n", rc);
+-              goto err_out;
+-      }
+       /* Request Interrupt */
+       rc = pm8001_request_irq(pm8001_ha);
+       if (rc)
+@@ -990,47 +982,38 @@ static u32 pm8001_request_msix(struct pm
+ }
+ #endif
+ 
+-static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha)
+-{
+-      struct pci_dev *pdev;
+-
+-      pdev = pm8001_ha->pdev;
+-
+-#ifdef PM8001_USE_MSIX
+-      if (pci_find_capability(pdev, PCI_CAP_ID_MSIX))
+-              return pm8001_setup_msix(pm8001_ha);
+-      pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n");
+-#endif
+-      return 0;
+-}
+-
+ /**
+  * pm8001_request_irq - register interrupt
+  * @pm8001_ha: our ha struct.
+  */
+ static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha)
+ {
+-      struct pci_dev *pdev;
++      struct pci_dev *pdev = pm8001_ha->pdev;
++#ifdef PM8001_USE_MSIX
+       int rc;
+ 
+-      pdev = pm8001_ha->pdev;
++      if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
++              rc = pm8001_setup_msix(pm8001_ha);
++              if (rc) {
++                      pm8001_dbg(pm8001_ha, FAIL,
++                                 "pm8001_setup_irq failed [ret: %d]\n", rc);
++                      return rc;
++              }
+ 
+-#ifdef PM8001_USE_MSIX
+-      if (pdev->msix_cap && pci_msi_enabled())
+-              return pm8001_request_msix(pm8001_ha);
+-      else {
+-              pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n");
+-              goto intx;
++              if (pdev->msix_cap && pci_msi_enabled())
++                      return pm8001_request_msix(pm8001_ha);
+       }
++
++      pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n");
+ #endif
+ 
+-intx:
+       /* initialize the INT-X interrupt */
+       pm8001_ha->irq_vector[0].irq_id = 0;
+       pm8001_ha->irq_vector[0].drv_inst = pm8001_ha;
+-      rc = request_irq(pdev->irq, pm8001_interrupt_handler_intx, IRQF_SHARED,
+-              pm8001_ha->name, SHOST_TO_SAS_HA(pm8001_ha->shost));
+-      return rc;
++
++      return request_irq(pdev->irq, pm8001_interrupt_handler_intx,
++                         IRQF_SHARED, pm8001_ha->name,
++                         SHOST_TO_SAS_HA(pm8001_ha->shost));
+ }
+ 
+ /**
diff --git a/queue-5.10/series b/queue-5.10/series

index 033dde7c23aca1a7f117e5fe7fbafe1705c66eca..0e51666e82147428a0291856c919436cda618075 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -63,3 +63,16 @@ scsi-lpfc-fix-the-null-vs-is_err-bug-for-debugfs_cre.patch
  x86-boot-compressed-reserve-more-memory-for-page-tab.patch
  samples-hw_breakpoint-fix-building-without-module-un.patch
  md-raid1-fix-error-iso-c90-forbids-mixed-declaration.patch
+attr-block-mode-changes-of-symlinks.patch
+ovl-fix-incorrect-fdput-on-aio-completion.patch
+btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch
+btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch
+tracing-have-current_trace-inc-the-trace-array-ref-count.patch
+tracing-have-option-files-inc-the-trace-array-ref-count.patch
+nfsd-fix-change_info-in-nfsv4-rename-replies.patch
+tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch
+i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch
+ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch
+scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch
+scsi-pm8001-setup-irqs-on-resume.patch
+ext4-fix-rec_len-verify-error.patch
diff --git a/queue-5.10/tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch b/queue-5.10/tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch

new file mode 100644 (file)

index 0000000..cd6190f
--- /dev/null
+++ b/queue-5.10/tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch
@@ -0,0 +1,43 @@
+From 51aab5ffceb43e05119eb059048fd75765d2bc21 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Tue, 5 Sep 2023 14:26:08 -0400
+Subject: tracefs: Add missing lockdown check to tracefs_create_dir()
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 51aab5ffceb43e05119eb059048fd75765d2bc21 upstream.
+
+The function tracefs_create_dir() was missing a lockdown check and was
+called by the RV code. This gave an inconsistent behavior of this function
+returning success while other tracefs functions failed. This caused the
+inode being freed by the wrong kmem_cache.
+
+Link: https://lkml.kernel.org/r/20230905182711.692687042@goodmis.org
+Link: https://lore.kernel.org/all/202309050916.58201dc6-oliver.sang@intel.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Ajay Kaher <akaher@vmware.com>
+Cc: Ching-lin Yu <chinglinyu@google.com>
+Fixes: bf8e602186ec4 ("tracing: Do not create tracefs files if tracefs lockdown is in effect")
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/tracefs/inode.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -554,6 +554,9 @@ static struct dentry *__create_dir(const
+  */
+ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
+ {
++      if (security_locked_down(LOCKDOWN_TRACEFS))
++              return NULL;
++
+       return __create_dir(name, parent, &simple_dir_inode_operations);
+ }
+ 
diff --git a/queue-5.10/tracing-have-current_trace-inc-the-trace-array-ref-count.patch b/queue-5.10/tracing-have-current_trace-inc-the-trace-array-ref-count.patch

new file mode 100644 (file)

index 0000000..99e715a
--- /dev/null
+++ b/queue-5.10/tracing-have-current_trace-inc-the-trace-array-ref-count.patch
@@ -0,0 +1,47 @@
+From 9b37febc578b2e1ad76a105aab11d00af5ec3d27 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Wed, 6 Sep 2023 22:47:14 -0400
+Subject: tracing: Have current_trace inc the trace array ref count
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 9b37febc578b2e1ad76a105aab11d00af5ec3d27 upstream.
+
+The current_trace updates the trace array tracer. For an instance, if the
+file is opened and the instance is deleted, reading or writing to the file
+will cause a use after free.
+
+Up the ref count of the trace array when current_trace is opened.
+
+Link: https://lkml.kernel.org/r/20230907024803.877687227@goodmis.org
+Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Zheng Yejian <zhengyejian1@huawei.com>
+Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()")
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -7324,10 +7324,11 @@ static const struct file_operations trac
+ #endif
+ 
+ static const struct file_operations set_tracer_fops = {
+-      .open           = tracing_open_generic,
++      .open           = tracing_open_generic_tr,
+       .read           = tracing_set_trace_read,
+       .write          = tracing_set_trace_write,
+       .llseek         = generic_file_llseek,
++      .release        = tracing_release_generic_tr,
+ };
+ 
+ static const struct file_operations tracing_pipe_fops = {
diff --git a/queue-5.10/tracing-have-option-files-inc-the-trace-array-ref-count.patch b/queue-5.10/tracing-have-option-files-inc-the-trace-array-ref-count.patch

new file mode 100644 (file)

index 0000000..1913702
--- /dev/null
+++ b/queue-5.10/tracing-have-option-files-inc-the-trace-array-ref-count.patch
@@ -0,0 +1,69 @@
+From 7e2cfbd2d3c86afcd5c26b5c4b1dd251f63c5838 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Wed, 6 Sep 2023 22:47:15 -0400
+Subject: tracing: Have option files inc the trace array ref count
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit 7e2cfbd2d3c86afcd5c26b5c4b1dd251f63c5838 upstream.
+
+The option files update the options for a given trace array. For an
+instance, if the file is opened and the instance is deleted, reading or
+writing to the file will cause a use after free.
+
+Up the ref count of the trace_array when an option file is opened.
+
+Link: https://lkml.kernel.org/r/20230907024804.086679464@goodmis.org
+Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Zheng Yejian <zhengyejian1@huawei.com>
+Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()")
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace.c |   23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -8367,12 +8367,33 @@ trace_options_write(struct file *filp, c
+       return cnt;
+ }
+ 
++static int tracing_open_options(struct inode *inode, struct file *filp)
++{
++      struct trace_option_dentry *topt = inode->i_private;
++      int ret;
++
++      ret = tracing_check_open_get_tr(topt->tr);
++      if (ret)
++              return ret;
++
++      filp->private_data = inode->i_private;
++      return 0;
++}
++
++static int tracing_release_options(struct inode *inode, struct file *file)
++{
++      struct trace_option_dentry *topt = file->private_data;
++
++      trace_array_put(topt->tr);
++      return 0;
++}
+ 
+ static const struct file_operations trace_options_fops = {
+-      .open = tracing_open_generic,
++      .open = tracing_open_options,
+       .read = trace_options_read,
+       .write = trace_options_write,
+       .llseek = generic_file_llseek,
++      .release = tracing_release_options,
+ };
+ 
+ /*
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 20 Sep 2023 10:51:01 +0000 (12:51 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 20 Sep 2023 10:51:01 +0000 (12:51 +0200)
queue-5.10/ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/attr-block-mode-changes-of-symlinks.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/ext4-fix-rec_len-verify-error.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/nfsd-fix-change_info-in-nfsv4-rename-replies.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/ovl-fix-incorrect-fdput-on-aio-completion.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/scsi-pm8001-setup-irqs-on-resume.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/tracing-have-current_trace-inc-the-trace-array-ref-count.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/tracing-have-option-files-inc-the-trace-array-ref-count.patch	[new file with mode: 0644]	patch \| blob