From: Greg Kroah-Hartman Date: Wed, 20 Sep 2023 10:43:42 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v5.10.196~22 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e3c852b07df090dfd621d91de2d9aac1c5d86ad4;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: ata-libahci-clear-pending-interrupt-status.patch ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch attr-block-mode-changes-of-symlinks.patch btrfs-check-for-btrfs_fs_error-in-pending-ordered-assert.patch btrfs-fix-a-compilation-error-if-debug-is-defined-in-btree_dirty_folio.patch btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch dm-don-t-attempt-to-queue-io-under-rcu-protection.patch drm-amd-display-fix-the-white-screen-issue-when-64gb-dram.patch ext4-fix-rec_len-verify-error.patch i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch io_uring-net-fix-iter-retargeting-for-selected-buf.patch md-put-the-right-device-in-md_seq_next.patch nfsd-fix-change_info-in-nfsv4-rename-replies.patch nvme-avoid-bogus-crto-values.patch ovl-fix-failed-copyup-of-fileattr-on-a-symlink.patch ovl-fix-incorrect-fdput-on-aio-completion.patch revert-drm-amd-disable-s-g-for-apus-when-64gb-or-more-host-memory.patch revert-sunrpc-fail-faster-on-bad-verifier.patch scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch scsi-pm8001-setup-irqs-on-resume.patch selinux-fix-handling-of-empty-opts-in-selinux_fs_context_submount.patch tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch tracing-have-current_trace-inc-the-trace-array-ref-count.patch tracing-have-event-inject-files-inc-the-trace-array-ref-count.patch tracing-have-option-files-inc-the-trace-array-ref-count.patch tracing-have-tracing_max_latency-inc-the-trace-array-ref-count.patch tracing-increase-trace-array-ref-count-on-enable-and-filter-files.patch --- diff --git a/queue-6.1/ata-libahci-clear-pending-interrupt-status.patch b/queue-6.1/ata-libahci-clear-pending-interrupt-status.patch new file mode 100644 index 00000000000..607d9c053df --- /dev/null +++ b/queue-6.1/ata-libahci-clear-pending-interrupt-status.patch @@ -0,0 +1,96 @@ +From 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 Mon Sep 17 00:00:00 2001 +From: Szuying Chen +Date: Thu, 7 Sep 2023 16:17:10 +0800 +Subject: ata: libahci: clear pending interrupt status + +From: Szuying Chen + +commit 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 upstream. + +When a CRC error occurs, the HBA asserts an interrupt to indicate an +interface fatal error (PxIS.IFS). The ISR clears PxIE and PxIS, then +does error recovery. If the adapter receives another SDB FIS +with an error (PxIS.TFES) from the device before the start of the EH +recovery process, the interrupt signaling the new SDB cannot be +serviced as PxIE was cleared already. This in turn results in the HBA +inability to issue any command during the error recovery process after +setting PxCMD.ST to 1 because PxIS.TFES is still set. + +According to AHCI 1.3.1 specifications section 6.2.2, fatal errors +notified by setting PxIS.HBFS, PxIS.HBDS, PxIS.IFS or PxIS.TFES will +cause the HBA to enter the ERR:Fatal state. In this state, the HBA +shall not issue any new commands. + +To avoid this situation, introduce the function +ahci_port_clear_pending_irq() to clear pending interrupts before +executing a COMRESET. This follows the AHCI 1.3.1 - section 6.2.2.2 +specification. + +Signed-off-by: Szuying Chen +Fixes: e0bfd149973d ("[PATCH] ahci: stop engine during hard reset") +Cc: stable@vger.kernel.org +Reviewed-by: Niklas Cassel +Signed-off-by: Damien Le Moal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/libahci.c | 35 +++++++++++++++++++++++------------ + 1 file changed, 23 insertions(+), 12 deletions(-) + +--- a/drivers/ata/libahci.c ++++ b/drivers/ata/libahci.c +@@ -1255,6 +1255,26 @@ static ssize_t ahci_activity_show(struct + return sprintf(buf, "%d\n", emp->blink_policy); + } + ++static void ahci_port_clear_pending_irq(struct ata_port *ap) ++{ ++ struct ahci_host_priv *hpriv = ap->host->private_data; ++ void __iomem *port_mmio = ahci_port_base(ap); ++ u32 tmp; ++ ++ /* clear SError */ ++ tmp = readl(port_mmio + PORT_SCR_ERR); ++ dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp); ++ writel(tmp, port_mmio + PORT_SCR_ERR); ++ ++ /* clear port IRQ */ ++ tmp = readl(port_mmio + PORT_IRQ_STAT); ++ dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp); ++ if (tmp) ++ writel(tmp, port_mmio + PORT_IRQ_STAT); ++ ++ writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT); ++} ++ + static void ahci_port_init(struct device *dev, struct ata_port *ap, + int port_no, void __iomem *mmio, + void __iomem *port_mmio) +@@ -1269,18 +1289,7 @@ static void ahci_port_init(struct device + if (rc) + dev_warn(dev, "%s (%d)\n", emsg, rc); + +- /* clear SError */ +- tmp = readl(port_mmio + PORT_SCR_ERR); +- dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); +- writel(tmp, port_mmio + PORT_SCR_ERR); +- +- /* clear port IRQ */ +- tmp = readl(port_mmio + PORT_IRQ_STAT); +- dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); +- if (tmp) +- writel(tmp, port_mmio + PORT_IRQ_STAT); +- +- writel(1 << port_no, mmio + HOST_IRQ_STAT); ++ ahci_port_clear_pending_irq(ap); + + /* mark esata ports */ + tmp = readl(port_mmio + PORT_CMD); +@@ -1601,6 +1610,8 @@ int ahci_do_hardreset(struct ata_link *l + tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + ++ ahci_port_clear_pending_irq(ap); ++ + rc = sata_link_hardreset(link, timing, deadline, online, + ahci_check_ready); + diff --git a/queue-6.1/ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch b/queue-6.1/ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch new file mode 100644 index 00000000000..609f88a4f35 --- /dev/null +++ b/queue-6.1/ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch @@ -0,0 +1,112 @@ +From 24e0e61db3cb86a66824531989f1df80e0939f26 Mon Sep 17 00:00:00 2001 +From: Niklas Cassel +Date: Mon, 4 Sep 2023 22:42:56 +0200 +Subject: ata: libata: disallow dev-initiated LPM transitions to unsupported states +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Niklas Cassel + +commit 24e0e61db3cb86a66824531989f1df80e0939f26 upstream. + +In AHCI 1.3.1, the register description for CAP.SSC: +"When cleared to ‘0’, software must not allow the HBA to initiate +transitions to the Slumber state via agressive link power management nor +the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port +must be programmed to disallow device initiated Slumber requests." + +In AHCI 1.3.1, the register description for CAP.PSC: +"When cleared to ‘0’, software must not allow the HBA to initiate +transitions to the Partial state via agressive link power management nor +the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port +must be programmed to disallow device initiated Partial requests." + +Ensure that we always set the corresponding bits in PxSCTL.IPM, such that +a device is not allowed to initiate transitions to power states which are +unsupported by the HBA. + +DevSleep is always initiated by the HBA, however, for completeness, set the +corresponding bit in PxSCTL.IPM such that agressive link power management +cannot transition to DevSleep if DevSleep is not supported. + +sata_link_scr_lpm() is used by libahci, ata_piix and libata-pmp. +However, only libahci has the ability to read the CAP/CAP2 register to see +if these features are supported. Therefore, in order to not introduce any +regressions on ata_piix or libata-pmp, create flags that indicate that the +respective feature is NOT supported. This way, the behavior for ata_piix +and libata-pmp should remain unchanged. + +This change is based on a patch originally submitted by Runa Guo-oc. + +Signed-off-by: Niklas Cassel +Fixes: 1152b2617a6e ("libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/ahci.c | 9 +++++++++ + drivers/ata/libata-sata.c | 19 ++++++++++++++++--- + include/linux/libata.h | 4 ++++ + 3 files changed, 29 insertions(+), 3 deletions(-) + +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -1884,6 +1884,15 @@ static int ahci_init_one(struct pci_dev + else + dev_info(&pdev->dev, "SSS flag set, parallel bus scan disabled\n"); + ++ if (!(hpriv->cap & HOST_CAP_PART)) ++ host->flags |= ATA_HOST_NO_PART; ++ ++ if (!(hpriv->cap & HOST_CAP_SSC)) ++ host->flags |= ATA_HOST_NO_SSC; ++ ++ if (!(hpriv->cap2 & HOST_CAP2_SDS)) ++ host->flags |= ATA_HOST_NO_DEVSLP; ++ + if (pi.flags & ATA_FLAG_EM) + ahci_reset_em(host); + +--- a/drivers/ata/libata-sata.c ++++ b/drivers/ata/libata-sata.c +@@ -394,10 +394,23 @@ int sata_link_scr_lpm(struct ata_link *l + case ATA_LPM_MED_POWER_WITH_DIPM: + case ATA_LPM_MIN_POWER_WITH_PARTIAL: + case ATA_LPM_MIN_POWER: +- if (ata_link_nr_enabled(link) > 0) +- /* no restrictions on LPM transitions */ ++ if (ata_link_nr_enabled(link) > 0) { ++ /* assume no restrictions on LPM transitions */ + scontrol &= ~(0x7 << 8); +- else { ++ ++ /* ++ * If the controller does not support partial, slumber, ++ * or devsleep, then disallow these transitions. ++ */ ++ if (link->ap->host->flags & ATA_HOST_NO_PART) ++ scontrol |= (0x1 << 8); ++ ++ if (link->ap->host->flags & ATA_HOST_NO_SSC) ++ scontrol |= (0x2 << 8); ++ ++ if (link->ap->host->flags & ATA_HOST_NO_DEVSLP) ++ scontrol |= (0x4 << 8); ++ } else { + /* empty port, power off */ + scontrol &= ~0xf; + scontrol |= (0x1 << 2); +--- a/include/linux/libata.h ++++ b/include/linux/libata.h +@@ -216,6 +216,10 @@ enum { + ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ + ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ++ ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ ++ ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ ++ ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ ++ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ + + /* various lengths of time */ diff --git a/queue-6.1/attr-block-mode-changes-of-symlinks.patch b/queue-6.1/attr-block-mode-changes-of-symlinks.patch new file mode 100644 index 00000000000..9d1e0843f9d --- /dev/null +++ b/queue-6.1/attr-block-mode-changes-of-symlinks.patch @@ -0,0 +1,140 @@ +From 5d1f903f75a80daa4dfb3d84e114ec8ecbf29956 Mon Sep 17 00:00:00 2001 +From: Christian Brauner +Date: Wed, 12 Jul 2023 20:58:49 +0200 +Subject: attr: block mode changes of symlinks + +From: Christian Brauner + +commit 5d1f903f75a80daa4dfb3d84e114ec8ecbf29956 upstream. + +Changing the mode of symlinks is meaningless as the vfs doesn't take the +mode of a symlink into account during path lookup permission checking. + +However, the vfs doesn't block mode changes on symlinks. This however, +has lead to an untenable mess roughly classifiable into the following +two categories: + +(1) Filesystems that don't implement a i_op->setattr() for symlinks. + + Such filesystems may or may not know that without i_op->setattr() + defined, notify_change() falls back to simple_setattr() causing the + inode's mode in the inode cache to be changed. + + That's a generic issue as this will affect all non-size changing + inode attributes including ownership changes. + + Example: afs + +(2) Filesystems that fail with EOPNOTSUPP but change the mode of the + symlink nonetheless. + + Some filesystems will happily update the mode of a symlink but still + return EOPNOTSUPP. This is the biggest source of confusion for + userspace. + + The EOPNOTSUPP in this case comes from POSIX ACLs. Specifically it + comes from filesystems that call posix_acl_chmod(), e.g., btrfs via + + if (!err && attr->ia_valid & ATTR_MODE) + err = posix_acl_chmod(idmap, dentry, inode->i_mode); + + Filesystems including btrfs don't implement i_op->set_acl() so + posix_acl_chmod() will report EOPNOTSUPP. + + When posix_acl_chmod() is called, most filesystems will have + finished updating the inode. + + Perversely, this has the consequences that this behavior may depend + on two kconfig options and mount options: + + * CONFIG_POSIX_ACL={y,n} + * CONFIG_${FSTYPE}_POSIX_ACL={y,n} + * Opt_acl, Opt_noacl + + Example: btrfs, ext4, xfs + +The only way to change the mode on a symlink currently involves abusing +an O_PATH file descriptor in the following manner: + + fd = openat(-1, "/path/to/link", O_CLOEXEC | O_PATH | O_NOFOLLOW); + + char path[PATH_MAX]; + snprintf(path, sizeof(path), "/proc/self/fd/%d", fd); + chmod(path, 0000); + +But for most major filesystems with POSIX ACL support such as btrfs, +ext4, ceph, tmpfs, xfs and others this will fail with EOPNOTSUPP with +the mode still updated due to the aforementioned posix_acl_chmod() +nonsense. + +So, given that for all major filesystems this would fail with EOPNOTSUPP +and that both glibc (cf. [1]) and musl (cf. [2]) outright block mode +changes on symlinks we should just try and block mode changes on +symlinks directly in the vfs and have a clean break with this nonsense. + +If this causes any regressions, we do the next best thing and fix up all +filesystems that do return EOPNOTSUPP with the mode updated to not call +posix_acl_chmod() on symlinks. + +But as usual, let's try the clean cut solution first. It's a simple +patch that can be easily reverted. Not marking this for backport as I'll +do that manually if we're reasonably sure that this works and there are +no strong objections. + +We could block this in chmod_common() but it's more appropriate to do it +notify_change() as it will also mean that we catch filesystems that +change symlink permissions explicitly or accidently. + +Similar proposals were floated in the past as in [3] and [4] and again +recently in [5]. There's also a couple of bugs about this inconsistency +as in [6] and [7]. + +Link: https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/fchmodat.c;h=99527a3727e44cb8661ee1f743068f108ec93979;hb=HEAD [1] +Link: https://git.musl-libc.org/cgit/musl/tree/src/stat/fchmodat.c [2] +Link: https://lore.kernel.org/all/20200911065733.GA31579@infradead.org [3] +Link: https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00518.html [4] +Link: https://lore.kernel.org/lkml/87lefmbppo.fsf@oldenburg.str.redhat.com [5] +Link: https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html [6] +Link: https://sourceware.org/bugzilla/show_bug.cgi?id=14578#c17 [7] +Reviewed-by: Aleksa Sarai +Reviewed-by: Christoph Hellwig +Cc: stable@vger.kernel.org # please backport to all LTSes but not before v6.6-rc2 is tagged +Suggested-by: Christoph Hellwig +Suggested-by: Florian Weimer +Message-Id: <20230712-vfs-chmod-symlinks-v2-1-08cfb92b61dd@kernel.org> +Signed-off-by: Christian Brauner +Signed-off-by: Greg Kroah-Hartman +--- + fs/attr.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/fs/attr.c ++++ b/fs/attr.c +@@ -394,9 +394,25 @@ int notify_change(struct user_namespace + return error; + + if ((ia_valid & ATTR_MODE)) { +- umode_t amode = attr->ia_mode; ++ /* ++ * Don't allow changing the mode of symlinks: ++ * ++ * (1) The vfs doesn't take the mode of symlinks into account ++ * during permission checking. ++ * (2) This has never worked correctly. Most major filesystems ++ * did return EOPNOTSUPP due to interactions with POSIX ACLs ++ * but did still updated the mode of the symlink. ++ * This inconsistency led system call wrapper providers such ++ * as libc to block changing the mode of symlinks with ++ * EOPNOTSUPP already. ++ * (3) To even do this in the first place one would have to use ++ * specific file descriptors and quite some effort. ++ */ ++ if (S_ISLNK(inode->i_mode)) ++ return -EOPNOTSUPP; ++ + /* Flag setting protected by i_mutex */ +- if (is_sxid(amode)) ++ if (is_sxid(attr->ia_mode)) + inode->i_flags &= ~S_NOSEC; + } + diff --git a/queue-6.1/btrfs-check-for-btrfs_fs_error-in-pending-ordered-assert.patch b/queue-6.1/btrfs-check-for-btrfs_fs_error-in-pending-ordered-assert.patch new file mode 100644 index 00000000000..f43c5a7d552 --- /dev/null +++ b/queue-6.1/btrfs-check-for-btrfs_fs_error-in-pending-ordered-assert.patch @@ -0,0 +1,40 @@ +From 4ca8e03cf2bfaeef7c85939fa1ea0c749cd116ab Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Thu, 24 Aug 2023 16:59:04 -0400 +Subject: btrfs: check for BTRFS_FS_ERROR in pending ordered assert + +From: Josef Bacik + +commit 4ca8e03cf2bfaeef7c85939fa1ea0c749cd116ab upstream. + +If we do fast tree logging we increment a counter on the current +transaction for every ordered extent we need to wait for. This means we +expect the transaction to still be there when we clear pending on the +ordered extent. However if we happen to abort the transaction and clean +it up, there could be no running transaction, and thus we'll trip the +"ASSERT(trans)" check. This is obviously incorrect, and the code +properly deals with the case that the transaction doesn't exist. Fix +this ASSERT() to only fire if there's no trans and we don't have +BTRFS_FS_ERROR() set on the file system. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Filipe Manana +Signed-off-by: Josef Bacik +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ordered-data.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/ordered-data.c ++++ b/fs/btrfs/ordered-data.c +@@ -580,7 +580,7 @@ void btrfs_remove_ordered_extent(struct + refcount_inc(&trans->use_count); + spin_unlock(&fs_info->trans_lock); + +- ASSERT(trans); ++ ASSERT(trans || BTRFS_FS_ERROR(fs_info)); + if (trans) { + if (atomic_dec_and_test(&trans->pending_ordered)) + wake_up(&trans->pending_wait); diff --git a/queue-6.1/btrfs-fix-a-compilation-error-if-debug-is-defined-in-btree_dirty_folio.patch b/queue-6.1/btrfs-fix-a-compilation-error-if-debug-is-defined-in-btree_dirty_folio.patch new file mode 100644 index 00000000000..b1b393365a9 --- /dev/null +++ b/queue-6.1/btrfs-fix-a-compilation-error-if-debug-is-defined-in-btree_dirty_folio.patch @@ -0,0 +1,88 @@ +From 5e0e879926c1ce7e1f5e0dfaacaf2d105f7d8a05 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 22 Aug 2023 13:50:51 +0800 +Subject: btrfs: fix a compilation error if DEBUG is defined in btree_dirty_folio + +From: Qu Wenruo + +commit 5e0e879926c1ce7e1f5e0dfaacaf2d105f7d8a05 upstream. + +[BUG] +After commit 72a69cd03082 ("btrfs: subpage: pack all subpage bitmaps +into a larger bitmap"), the DEBUG section of btree_dirty_folio() would +no longer compile. + +[CAUSE] +If DEBUG is defined, we would do extra checks for btree_dirty_folio(), +mostly to make sure the range we marked dirty has an extent buffer and +that extent buffer is dirty. + +For subpage, we need to iterate through all the extent buffers covered +by that page range, and make sure they all matches the criteria. + +However commit 72a69cd03082 ("btrfs: subpage: pack all subpage bitmaps +into a larger bitmap") changes how we store the bitmap, we pack all the +16 bits bitmaps into a larger bitmap, which would save some space. + +This means we no longer have btrfs_subpage::dirty_bitmap, instead the +dirty bitmap is starting at btrfs_subpage_info::dirty_offset, and has a +length of btrfs_subpage_info::bitmap_nr_bits. + +[FIX] +Although I'm not sure if it still makes sense to maintain such code, at +least let it compile. + +This patch would let us test the bits one by one through the bitmaps. + +CC: stable@vger.kernel.org # 6.1+ +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/disk-io.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -859,6 +859,7 @@ static bool btree_dirty_folio(struct add + struct folio *folio) + { + struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb); ++ struct btrfs_subpage_info *spi = fs_info->subpage_info; + struct btrfs_subpage *subpage; + struct extent_buffer *eb; + int cur_bit = 0; +@@ -872,18 +873,19 @@ static bool btree_dirty_folio(struct add + btrfs_assert_tree_write_locked(eb); + return filemap_dirty_folio(mapping, folio); + } ++ ++ ASSERT(spi); + subpage = folio_get_private(folio); + +- ASSERT(subpage->dirty_bitmap); +- while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) { ++ for (cur_bit = spi->dirty_offset; ++ cur_bit < spi->dirty_offset + spi->bitmap_nr_bits; ++ cur_bit++) { + unsigned long flags; + u64 cur; +- u16 tmp = (1 << cur_bit); + + spin_lock_irqsave(&subpage->lock, flags); +- if (!(tmp & subpage->dirty_bitmap)) { ++ if (!test_bit(cur_bit, subpage->bitmaps)) { + spin_unlock_irqrestore(&subpage->lock, flags); +- cur_bit++; + continue; + } + spin_unlock_irqrestore(&subpage->lock, flags); +@@ -896,7 +898,7 @@ static bool btree_dirty_folio(struct add + btrfs_assert_tree_write_locked(eb); + free_extent_buffer(eb); + +- cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits); ++ cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1; + } + return filemap_dirty_folio(mapping, folio); + } diff --git a/queue-6.1/btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch b/queue-6.1/btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch new file mode 100644 index 00000000000..33950670440 --- /dev/null +++ b/queue-6.1/btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch @@ -0,0 +1,188 @@ +From e110f8911ddb93e6f55da14ccbbe705397b30d0b Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Tue, 29 Aug 2023 11:34:52 +0100 +Subject: btrfs: fix lockdep splat and potential deadlock after failure running delayed items + +From: Filipe Manana + +commit e110f8911ddb93e6f55da14ccbbe705397b30d0b upstream. + +When running delayed items we are holding a delayed node's mutex and then +we will attempt to modify a subvolume btree to insert/update/delete the +delayed items. However if have an error during the insertions for example, +btrfs_insert_delayed_items() may return with a path that has locked extent +buffers (a leaf at the very least), and then we attempt to release the +delayed node at __btrfs_run_delayed_items(), which requires taking the +delayed node's mutex, causing an ABBA type of deadlock. This was reported +by syzbot and the lockdep splat is the following: + + WARNING: possible circular locking dependency detected + 6.5.0-rc7-syzkaller-00024-g93f5de5f648d #0 Not tainted + ------------------------------------------------------ + syz-executor.2/13257 is trying to acquire lock: + ffff88801835c0c0 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node+0x9a/0xaa0 fs/btrfs/delayed-inode.c:256 + + but task is already holding lock: + ffff88802a5ab8e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_lock+0x3c/0x2a0 fs/btrfs/locking.c:198 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #1 (btrfs-tree-00){++++}-{3:3}: + __lock_release kernel/locking/lockdep.c:5475 [inline] + lock_release+0x36f/0x9d0 kernel/locking/lockdep.c:5781 + up_write+0x79/0x580 kernel/locking/rwsem.c:1625 + btrfs_tree_unlock_rw fs/btrfs/locking.h:189 [inline] + btrfs_unlock_up_safe+0x179/0x3b0 fs/btrfs/locking.c:239 + search_leaf fs/btrfs/ctree.c:1986 [inline] + btrfs_search_slot+0x2511/0x2f80 fs/btrfs/ctree.c:2230 + btrfs_insert_empty_items+0x9c/0x180 fs/btrfs/ctree.c:4376 + btrfs_insert_delayed_item fs/btrfs/delayed-inode.c:746 [inline] + btrfs_insert_delayed_items fs/btrfs/delayed-inode.c:824 [inline] + __btrfs_commit_inode_delayed_items+0xd24/0x2410 fs/btrfs/delayed-inode.c:1111 + __btrfs_run_delayed_items+0x1db/0x430 fs/btrfs/delayed-inode.c:1153 + flush_space+0x269/0xe70 fs/btrfs/space-info.c:723 + btrfs_async_reclaim_metadata_space+0x106/0x350 fs/btrfs/space-info.c:1078 + process_one_work+0x92c/0x12c0 kernel/workqueue.c:2600 + worker_thread+0xa63/0x1210 kernel/workqueue.c:2751 + kthread+0x2b8/0x350 kernel/kthread.c:389 + ret_from_fork+0x2e/0x60 arch/x86/kernel/process.c:145 + ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 + + -> #0 (&delayed_node->mutex){+.+.}-{3:3}: + check_prev_add kernel/locking/lockdep.c:3142 [inline] + check_prevs_add kernel/locking/lockdep.c:3261 [inline] + validate_chain kernel/locking/lockdep.c:3876 [inline] + __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144 + lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761 + __mutex_lock_common+0x1d8/0x2530 kernel/locking/mutex.c:603 + __mutex_lock kernel/locking/mutex.c:747 [inline] + mutex_lock_nested+0x1b/0x20 kernel/locking/mutex.c:799 + __btrfs_release_delayed_node+0x9a/0xaa0 fs/btrfs/delayed-inode.c:256 + btrfs_release_delayed_node fs/btrfs/delayed-inode.c:281 [inline] + __btrfs_run_delayed_items+0x2b5/0x430 fs/btrfs/delayed-inode.c:1156 + btrfs_commit_transaction+0x859/0x2ff0 fs/btrfs/transaction.c:2276 + btrfs_sync_file+0xf56/0x1330 fs/btrfs/file.c:1988 + vfs_fsync_range fs/sync.c:188 [inline] + vfs_fsync fs/sync.c:202 [inline] + do_fsync fs/sync.c:212 [inline] + __do_sys_fsync fs/sync.c:220 [inline] + __se_sys_fsync fs/sync.c:218 [inline] + __x64_sys_fsync+0x196/0x1e0 fs/sync.c:218 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + + other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(btrfs-tree-00); + lock(&delayed_node->mutex); + lock(btrfs-tree-00); + lock(&delayed_node->mutex); + + *** DEADLOCK *** + + 3 locks held by syz-executor.2/13257: + #0: ffff88802c1ee370 (btrfs_trans_num_writers){++++}-{0:0}, at: spin_unlock include/linux/spinlock.h:391 [inline] + #0: ffff88802c1ee370 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0xb87/0xe00 fs/btrfs/transaction.c:287 + #1: ffff88802c1ee398 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0xbb2/0xe00 fs/btrfs/transaction.c:288 + #2: ffff88802a5ab8e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_lock+0x3c/0x2a0 fs/btrfs/locking.c:198 + + stack backtrace: + CPU: 0 PID: 13257 Comm: syz-executor.2 Not tainted 6.5.0-rc7-syzkaller-00024-g93f5de5f648d #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 + Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 + check_noncircular+0x375/0x4a0 kernel/locking/lockdep.c:2195 + check_prev_add kernel/locking/lockdep.c:3142 [inline] + check_prevs_add kernel/locking/lockdep.c:3261 [inline] + validate_chain kernel/locking/lockdep.c:3876 [inline] + __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144 + lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761 + __mutex_lock_common+0x1d8/0x2530 kernel/locking/mutex.c:603 + __mutex_lock kernel/locking/mutex.c:747 [inline] + mutex_lock_nested+0x1b/0x20 kernel/locking/mutex.c:799 + __btrfs_release_delayed_node+0x9a/0xaa0 fs/btrfs/delayed-inode.c:256 + btrfs_release_delayed_node fs/btrfs/delayed-inode.c:281 [inline] + __btrfs_run_delayed_items+0x2b5/0x430 fs/btrfs/delayed-inode.c:1156 + btrfs_commit_transaction+0x859/0x2ff0 fs/btrfs/transaction.c:2276 + btrfs_sync_file+0xf56/0x1330 fs/btrfs/file.c:1988 + vfs_fsync_range fs/sync.c:188 [inline] + vfs_fsync fs/sync.c:202 [inline] + do_fsync fs/sync.c:212 [inline] + __do_sys_fsync fs/sync.c:220 [inline] + __se_sys_fsync fs/sync.c:218 [inline] + __x64_sys_fsync+0x196/0x1e0 fs/sync.c:218 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + RIP: 0033:0x7f3ad047cae9 + Code: 28 00 00 00 75 (...) + RSP: 002b:00007f3ad12510c8 EFLAGS: 00000246 ORIG_RAX: 000000000000004a + RAX: ffffffffffffffda RBX: 00007f3ad059bf80 RCX: 00007f3ad047cae9 + RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005 + RBP: 00007f3ad04c847a R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 + R13: 000000000000000b R14: 00007f3ad059bf80 R15: 00007ffe56af92f8 + + ------------[ cut here ]------------ + +Fix this by releasing the path before releasing the delayed node in the +error path at __btrfs_run_delayed_items(). + +Reported-by: syzbot+a379155f07c134ea9879@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/000000000000abba27060403b5bd@google.com/ +CC: stable@vger.kernel.org # 4.14+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/delayed-inode.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/delayed-inode.c ++++ b/fs/btrfs/delayed-inode.c +@@ -1148,20 +1148,33 @@ static int __btrfs_run_delayed_items(str + ret = __btrfs_commit_inode_delayed_items(trans, path, + curr_node); + if (ret) { +- btrfs_release_delayed_node(curr_node); +- curr_node = NULL; + btrfs_abort_transaction(trans, ret); + break; + } + + prev_node = curr_node; + curr_node = btrfs_next_delayed_node(curr_node); ++ /* ++ * See the comment below about releasing path before releasing ++ * node. If the commit of delayed items was successful the path ++ * should always be released, but in case of an error, it may ++ * point to locked extent buffers (a leaf at the very least). ++ */ ++ ASSERT(path->nodes[0] == NULL); + btrfs_release_delayed_node(prev_node); + } + ++ /* ++ * Release the path to avoid a potential deadlock and lockdep splat when ++ * releasing the delayed node, as that requires taking the delayed node's ++ * mutex. If another task starts running delayed items before we take ++ * the mutex, it will first lock the mutex and then it may try to lock ++ * the same btree path (leaf). ++ */ ++ btrfs_free_path(path); ++ + if (curr_node) + btrfs_release_delayed_node(curr_node); +- btrfs_free_path(path); + trans->block_rsv = block_rsv; + + return ret; diff --git a/queue-6.1/btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch b/queue-6.1/btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch new file mode 100644 index 00000000000..1ac578b7f85 --- /dev/null +++ b/queue-6.1/btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch @@ -0,0 +1,169 @@ +From ee34a82e890a7babb5585daf1a6dd7d4d1cf142a Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Sat, 26 Aug 2023 11:28:20 +0100 +Subject: btrfs: release path before inode lookup during the ino lookup ioctl + +From: Filipe Manana + +commit ee34a82e890a7babb5585daf1a6dd7d4d1cf142a upstream. + +During the ino lookup ioctl we can end up calling btrfs_iget() to get an +inode reference while we are holding on a root's btree. If btrfs_iget() +needs to lookup the inode from the root's btree, because it's not +currently loaded in memory, then it will need to lock another or the +same path in the same root btree. This may result in a deadlock and +trigger the following lockdep splat: + + WARNING: possible circular locking dependency detected + 6.5.0-rc7-syzkaller-00004-gf7757129e3de #0 Not tainted + ------------------------------------------------------ + syz-executor277/5012 is trying to acquire lock: + ffff88802df41710 (btrfs-tree-01){++++}-{3:3}, at: __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136 + + but task is already holding lock: + ffff88802df418e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136 + + which lock already depends on the new lock. + + the existing dependency chain (in reverse order) is: + + -> #1 (btrfs-tree-00){++++}-{3:3}: + down_read_nested+0x49/0x2f0 kernel/locking/rwsem.c:1645 + __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136 + btrfs_search_slot+0x13a4/0x2f80 fs/btrfs/ctree.c:2302 + btrfs_init_root_free_objectid+0x148/0x320 fs/btrfs/disk-io.c:4955 + btrfs_init_fs_root fs/btrfs/disk-io.c:1128 [inline] + btrfs_get_root_ref+0x5ae/0xae0 fs/btrfs/disk-io.c:1338 + btrfs_get_fs_root fs/btrfs/disk-io.c:1390 [inline] + open_ctree+0x29c8/0x3030 fs/btrfs/disk-io.c:3494 + btrfs_fill_super+0x1c7/0x2f0 fs/btrfs/super.c:1154 + btrfs_mount_root+0x7e0/0x910 fs/btrfs/super.c:1519 + legacy_get_tree+0xef/0x190 fs/fs_context.c:611 + vfs_get_tree+0x8c/0x270 fs/super.c:1519 + fc_mount fs/namespace.c:1112 [inline] + vfs_kern_mount+0xbc/0x150 fs/namespace.c:1142 + btrfs_mount+0x39f/0xb50 fs/btrfs/super.c:1579 + legacy_get_tree+0xef/0x190 fs/fs_context.c:611 + vfs_get_tree+0x8c/0x270 fs/super.c:1519 + do_new_mount+0x28f/0xae0 fs/namespace.c:3335 + do_mount fs/namespace.c:3675 [inline] + __do_sys_mount fs/namespace.c:3884 [inline] + __se_sys_mount+0x2d9/0x3c0 fs/namespace.c:3861 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + + -> #0 (btrfs-tree-01){++++}-{3:3}: + check_prev_add kernel/locking/lockdep.c:3142 [inline] + check_prevs_add kernel/locking/lockdep.c:3261 [inline] + validate_chain kernel/locking/lockdep.c:3876 [inline] + __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144 + lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761 + down_read_nested+0x49/0x2f0 kernel/locking/rwsem.c:1645 + __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136 + btrfs_tree_read_lock fs/btrfs/locking.c:142 [inline] + btrfs_read_lock_root_node+0x292/0x3c0 fs/btrfs/locking.c:281 + btrfs_search_slot_get_root fs/btrfs/ctree.c:1832 [inline] + btrfs_search_slot+0x4ff/0x2f80 fs/btrfs/ctree.c:2154 + btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:412 + btrfs_read_locked_inode fs/btrfs/inode.c:3892 [inline] + btrfs_iget_path+0x2d9/0x1520 fs/btrfs/inode.c:5716 + btrfs_search_path_in_tree_user fs/btrfs/ioctl.c:1961 [inline] + btrfs_ioctl_ino_lookup_user+0x77a/0xf50 fs/btrfs/ioctl.c:2105 + btrfs_ioctl+0xb0b/0xd40 fs/btrfs/ioctl.c:4683 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:870 [inline] + __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:856 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + + other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + rlock(btrfs-tree-00); + lock(btrfs-tree-01); + lock(btrfs-tree-00); + rlock(btrfs-tree-01); + + *** DEADLOCK *** + + 1 lock held by syz-executor277/5012: + #0: ffff88802df418e8 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136 + + stack backtrace: + CPU: 1 PID: 5012 Comm: syz-executor277 Not tainted 6.5.0-rc7-syzkaller-00004-gf7757129e3de #0 + Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 + Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 + check_noncircular+0x375/0x4a0 kernel/locking/lockdep.c:2195 + check_prev_add kernel/locking/lockdep.c:3142 [inline] + check_prevs_add kernel/locking/lockdep.c:3261 [inline] + validate_chain kernel/locking/lockdep.c:3876 [inline] + __lock_acquire+0x39ff/0x7f70 kernel/locking/lockdep.c:5144 + lock_acquire+0x1e3/0x520 kernel/locking/lockdep.c:5761 + down_read_nested+0x49/0x2f0 kernel/locking/rwsem.c:1645 + __btrfs_tree_read_lock+0x2f/0x220 fs/btrfs/locking.c:136 + btrfs_tree_read_lock fs/btrfs/locking.c:142 [inline] + btrfs_read_lock_root_node+0x292/0x3c0 fs/btrfs/locking.c:281 + btrfs_search_slot_get_root fs/btrfs/ctree.c:1832 [inline] + btrfs_search_slot+0x4ff/0x2f80 fs/btrfs/ctree.c:2154 + btrfs_lookup_inode+0xdc/0x480 fs/btrfs/inode-item.c:412 + btrfs_read_locked_inode fs/btrfs/inode.c:3892 [inline] + btrfs_iget_path+0x2d9/0x1520 fs/btrfs/inode.c:5716 + btrfs_search_path_in_tree_user fs/btrfs/ioctl.c:1961 [inline] + btrfs_ioctl_ino_lookup_user+0x77a/0xf50 fs/btrfs/ioctl.c:2105 + btrfs_ioctl+0xb0b/0xd40 fs/btrfs/ioctl.c:4683 + vfs_ioctl fs/ioctl.c:51 [inline] + __do_sys_ioctl fs/ioctl.c:870 [inline] + __se_sys_ioctl+0xf8/0x170 fs/ioctl.c:856 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + RIP: 0033:0x7f0bec94ea39 + +Fix this simply by releasing the path before calling btrfs_iget() as at +point we don't need the path anymore. + +Reported-by: syzbot+bf66ad948981797d2f1d@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/00000000000045fa140603c4a969@google.com/ +Fixes: 23d0b79dfaed ("btrfs: Add unprivileged version of ino_lookup ioctl") +CC: stable@vger.kernel.org # 4.19+ +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -2850,6 +2850,13 @@ static int btrfs_search_path_in_tree_use + goto out_put; + } + ++ /* ++ * We don't need the path anymore, so release it and ++ * avoid deadlocks and lockdep warnings in case ++ * btrfs_iget() needs to lookup the inode from its root ++ * btree and lock the same leaf. ++ */ ++ btrfs_release_path(path); + temp_inode = btrfs_iget(sb, key2.objectid, root); + if (IS_ERR(temp_inode)) { + ret = PTR_ERR(temp_inode); +@@ -2870,7 +2877,6 @@ static int btrfs_search_path_in_tree_use + goto out_put; + } + +- btrfs_release_path(path); + key.objectid = key.offset; + key.offset = (u64)-1; + dirid = key.objectid; diff --git a/queue-6.1/dm-don-t-attempt-to-queue-io-under-rcu-protection.patch b/queue-6.1/dm-don-t-attempt-to-queue-io-under-rcu-protection.patch new file mode 100644 index 00000000000..f6f97b1007e --- /dev/null +++ b/queue-6.1/dm-don-t-attempt-to-queue-io-under-rcu-protection.patch @@ -0,0 +1,180 @@ +From a9ce385344f916cd1c36a33905e564f5581beae9 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 15 Sep 2023 13:14:23 -0600 +Subject: dm: don't attempt to queue IO under RCU protection + +From: Jens Axboe + +commit a9ce385344f916cd1c36a33905e564f5581beae9 upstream. + +dm looks up the table for IO based on the request type, with an +assumption that if the request is marked REQ_NOWAIT, it's fine to +attempt to submit that IO while under RCU read lock protection. This +is not OK, as REQ_NOWAIT just means that we should not be sleeping +waiting on other IO, it does not mean that we can't potentially +schedule. + +A simple test case demonstrates this quite nicely: + +int main(int argc, char *argv[]) +{ + struct iovec iov; + int fd; + + fd = open("/dev/dm-0", O_RDONLY | O_DIRECT); + posix_memalign(&iov.iov_base, 4096, 4096); + iov.iov_len = 4096; + preadv2(fd, &iov, 1, 0, RWF_NOWAIT); + return 0; +} + +which will instantly spew: + +BUG: sleeping function called from invalid context at include/linux/sched/mm.h:306 +in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 5580, name: dm-nowait +preempt_count: 0, expected: 0 +RCU nest depth: 1, expected: 0 +INFO: lockdep is turned off. +CPU: 7 PID: 5580 Comm: dm-nowait Not tainted 6.6.0-rc1-g39956d2dcd81 #132 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 +Call Trace: + + dump_stack_lvl+0x11d/0x1b0 + __might_resched+0x3c3/0x5e0 + ? preempt_count_sub+0x150/0x150 + mempool_alloc+0x1e2/0x390 + ? mempool_resize+0x7d0/0x7d0 + ? lock_sync+0x190/0x190 + ? lock_release+0x4b7/0x670 + ? internal_get_user_pages_fast+0x868/0x2d40 + bio_alloc_bioset+0x417/0x8c0 + ? bvec_alloc+0x200/0x200 + ? internal_get_user_pages_fast+0xb8c/0x2d40 + bio_alloc_clone+0x53/0x100 + dm_submit_bio+0x27f/0x1a20 + ? lock_release+0x4b7/0x670 + ? blk_try_enter_queue+0x1a0/0x4d0 + ? dm_dax_direct_access+0x260/0x260 + ? rcu_is_watching+0x12/0xb0 + ? blk_try_enter_queue+0x1cc/0x4d0 + __submit_bio+0x239/0x310 + ? __bio_queue_enter+0x700/0x700 + ? kvm_clock_get_cycles+0x40/0x60 + ? ktime_get+0x285/0x470 + submit_bio_noacct_nocheck+0x4d9/0xb80 + ? should_fail_request+0x80/0x80 + ? preempt_count_sub+0x150/0x150 + ? lock_release+0x4b7/0x670 + ? __bio_add_page+0x143/0x2d0 + ? iov_iter_revert+0x27/0x360 + submit_bio_noacct+0x53e/0x1b30 + submit_bio_wait+0x10a/0x230 + ? submit_bio_wait_endio+0x40/0x40 + __blkdev_direct_IO_simple+0x4f8/0x780 + ? blkdev_bio_end_io+0x4c0/0x4c0 + ? stack_trace_save+0x90/0xc0 + ? __bio_clone+0x3c0/0x3c0 + ? lock_release+0x4b7/0x670 + ? lock_sync+0x190/0x190 + ? atime_needs_update+0x3bf/0x7e0 + ? timestamp_truncate+0x21b/0x2d0 + ? inode_owner_or_capable+0x240/0x240 + blkdev_direct_IO.part.0+0x84a/0x1810 + ? rcu_is_watching+0x12/0xb0 + ? lock_release+0x4b7/0x670 + ? blkdev_read_iter+0x40d/0x530 + ? reacquire_held_locks+0x4e0/0x4e0 + ? __blkdev_direct_IO_simple+0x780/0x780 + ? rcu_is_watching+0x12/0xb0 + ? __mark_inode_dirty+0x297/0xd50 + ? preempt_count_add+0x72/0x140 + blkdev_read_iter+0x2a4/0x530 + do_iter_readv_writev+0x2f2/0x3c0 + ? generic_copy_file_range+0x1d0/0x1d0 + ? fsnotify_perm.part.0+0x25d/0x630 + ? security_file_permission+0xd8/0x100 + do_iter_read+0x31b/0x880 + ? import_iovec+0x10b/0x140 + vfs_readv+0x12d/0x1a0 + ? vfs_iter_read+0xb0/0xb0 + ? rcu_is_watching+0x12/0xb0 + ? rcu_is_watching+0x12/0xb0 + ? lock_release+0x4b7/0x670 + do_preadv+0x1b3/0x260 + ? do_readv+0x370/0x370 + __x64_sys_preadv2+0xef/0x150 + do_syscall_64+0x39/0xb0 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f5af41ad806 +Code: 41 54 41 89 fc 55 44 89 c5 53 48 89 cb 48 83 ec 18 80 3d e4 dd 0d 00 00 74 7a 45 89 c1 49 89 ca 45 31 c0 b8 47 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 be 00 00 00 48 85 c0 79 4a 48 8b 0d da 55 +RSP: 002b:00007ffd3145c7f0 EFLAGS: 00000246 ORIG_RAX: 0000000000000147 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5af41ad806 +RDX: 0000000000000001 RSI: 00007ffd3145c850 RDI: 0000000000000003 +RBP: 0000000000000008 R08: 0000000000000000 R09: 0000000000000008 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003 +R13: 00007ffd3145c850 R14: 000055f5f0431dd8 R15: 0000000000000001 + + +where in fact it is dm itself that attempts to allocate a bio clone with +GFP_NOIO under the rcu read lock, regardless of the request type. + +Fix this by getting rid of the special casing for REQ_NOWAIT, and just +use the normal SRCU protected table lookup. Get rid of the bio based +table locking helpers at the same time, as they are now unused. + +Cc: stable@vger.kernel.org +Fixes: 563a225c9fd2 ("dm: introduce dm_{get,put}_live_table_bio called from dm_submit_bio") +Signed-off-by: Jens Axboe +Signed-off-by: Mike Snitzer +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/dm.c | 23 ++--------------------- + 1 file changed, 2 insertions(+), 21 deletions(-) + +--- a/drivers/md/dm.c ++++ b/drivers/md/dm.c +@@ -707,24 +707,6 @@ static void dm_put_live_table_fast(struc + rcu_read_unlock(); + } + +-static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md, +- int *srcu_idx, blk_opf_t bio_opf) +-{ +- if (bio_opf & REQ_NOWAIT) +- return dm_get_live_table_fast(md); +- else +- return dm_get_live_table(md, srcu_idx); +-} +- +-static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx, +- blk_opf_t bio_opf) +-{ +- if (bio_opf & REQ_NOWAIT) +- dm_put_live_table_fast(md); +- else +- dm_put_live_table(md, srcu_idx); +-} +- + static char *_dm_claim_ptr = "I belong to device-mapper"; + + /* +@@ -1805,9 +1787,8 @@ static void dm_submit_bio(struct bio *bi + struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; + int srcu_idx; + struct dm_table *map; +- blk_opf_t bio_opf = bio->bi_opf; + +- map = dm_get_live_table_bio(md, &srcu_idx, bio_opf); ++ map = dm_get_live_table(md, &srcu_idx); + + /* If suspended, or map not yet available, queue this IO for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || +@@ -1823,7 +1804,7 @@ static void dm_submit_bio(struct bio *bi + + dm_split_and_process_bio(md, map, bio); + out: +- dm_put_live_table_bio(md, srcu_idx, bio_opf); ++ dm_put_live_table(md, srcu_idx); + } + + static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob, diff --git a/queue-6.1/drm-amd-display-fix-the-white-screen-issue-when-64gb-dram.patch b/queue-6.1/drm-amd-display-fix-the-white-screen-issue-when-64gb-dram.patch new file mode 100644 index 00000000000..5d96f3a1dc7 --- /dev/null +++ b/queue-6.1/drm-amd-display-fix-the-white-screen-issue-when-64gb-dram.patch @@ -0,0 +1,52 @@ +From ef064187a9709393a981a56cce1e31880fd97107 Mon Sep 17 00:00:00 2001 +From: Yifan Zhang +Date: Fri, 8 Sep 2023 16:46:39 +0800 +Subject: drm/amd/display: fix the white screen issue when >= 64GB DRAM + +From: Yifan Zhang + +commit ef064187a9709393a981a56cce1e31880fd97107 upstream. + +Dropping bit 31:4 of page table base is wrong, it makes page table +base points to wrong address if phys addr is beyond 64GB; dropping +page_table_start/end bit 31:4 is unnecessary since dcn20_vmid_setup +will do that. Also, while we are at it, cleanup the assignments using +upper_32_bits()/lower_32_bits() and AMDGPU_GPU_PAGE_SHIFT. + +Cc: stable@vger.kernel.org +Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2354 +Fixes: 81d0bcf99009 ("drm/amdgpu: make display pinning more flexible (v2)") +Acked-by: Harry Wentland +Reviewed-by: Alex Deucher +Signed-off-by: Yifan Zhang +Co-developed-by: Hamza Mahfooz +Signed-off-by: Hamza Mahfooz +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -1265,11 +1265,15 @@ static void mmhub_read_system_context(st + + pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + +- page_table_start.high_part = (u32)(adev->gmc.gart_start >> 44) & 0xF; +- page_table_start.low_part = (u32)(adev->gmc.gart_start >> 12); +- page_table_end.high_part = (u32)(adev->gmc.gart_end >> 44) & 0xF; +- page_table_end.low_part = (u32)(adev->gmc.gart_end >> 12); +- page_table_base.high_part = upper_32_bits(pt_base) & 0xF; ++ page_table_start.high_part = upper_32_bits(adev->gmc.gart_start >> ++ AMDGPU_GPU_PAGE_SHIFT); ++ page_table_start.low_part = lower_32_bits(adev->gmc.gart_start >> ++ AMDGPU_GPU_PAGE_SHIFT); ++ page_table_end.high_part = upper_32_bits(adev->gmc.gart_end >> ++ AMDGPU_GPU_PAGE_SHIFT); ++ page_table_end.low_part = lower_32_bits(adev->gmc.gart_end >> ++ AMDGPU_GPU_PAGE_SHIFT); ++ page_table_base.high_part = upper_32_bits(pt_base); + page_table_base.low_part = lower_32_bits(pt_base); + + pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18; diff --git a/queue-6.1/ext4-fix-rec_len-verify-error.patch b/queue-6.1/ext4-fix-rec_len-verify-error.patch new file mode 100644 index 00000000000..e159c904d8d --- /dev/null +++ b/queue-6.1/ext4-fix-rec_len-verify-error.patch @@ -0,0 +1,122 @@ +From 7fda67e8c3ab6069f75888f67958a6d30454a9f6 Mon Sep 17 00:00:00 2001 +From: Shida Zhang +Date: Thu, 3 Aug 2023 14:09:38 +0800 +Subject: ext4: fix rec_len verify error + +From: Shida Zhang + +commit 7fda67e8c3ab6069f75888f67958a6d30454a9f6 upstream. + +With the configuration PAGE_SIZE 64k and filesystem blocksize 64k, +a problem occurred when more than 13 million files were directly created +under a directory: + +EXT4-fs error (device xx): ext4_dx_csum_set:492: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D. +EXT4-fs error (device xx): ext4_dx_csum_verify:463: inode #xxxx: comm xxxxx: dir seems corrupt? Run e2fsck -D. +EXT4-fs error (device xx): dx_probe:856: inode #xxxx: block 8188: comm xxxxx: Directory index failed checksum + +When enough files are created, the fake_dirent->reclen will be 0xffff. +it doesn't equal to the blocksize 65536, i.e. 0x10000. + +But it is not the same condition when blocksize equals to 4k. +when enough files are created, the fake_dirent->reclen will be 0x1000. +it equals to the blocksize 4k, i.e. 0x1000. + +The problem seems to be related to the limitation of the 16-bit field +when the blocksize is set to 64k. +To address this, helpers like ext4_rec_len_{from,to}_disk has already +been introduced to complete the conversion between the encoded and the +plain form of rec_len. + +So fix this one by using the helper, and all the other in this file too. + +Cc: stable@kernel.org +Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes") +Suggested-by: Andreas Dilger +Suggested-by: Darrick J. Wong +Signed-off-by: Shida Zhang +Reviewed-by: Andreas Dilger +Reviewed-by: Darrick J. Wong +Link: https://lore.kernel.org/r/20230803060938.1929759-1-zhangshida@kylinos.cn +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -343,17 +343,17 @@ static struct ext4_dir_entry_tail *get_d + struct buffer_head *bh) + { + struct ext4_dir_entry_tail *t; ++ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); + + #ifdef PARANOID + struct ext4_dir_entry *d, *top; + + d = (struct ext4_dir_entry *)bh->b_data; + top = (struct ext4_dir_entry *)(bh->b_data + +- (EXT4_BLOCK_SIZE(inode->i_sb) - +- sizeof(struct ext4_dir_entry_tail))); +- while (d < top && d->rec_len) ++ (blocksize - sizeof(struct ext4_dir_entry_tail))); ++ while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize)) + d = (struct ext4_dir_entry *)(((void *)d) + +- le16_to_cpu(d->rec_len)); ++ ext4_rec_len_from_disk(d->rec_len, blocksize)); + + if (d != top) + return NULL; +@@ -364,7 +364,8 @@ static struct ext4_dir_entry_tail *get_d + #endif + + if (t->det_reserved_zero1 || +- le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) || ++ (ext4_rec_len_from_disk(t->det_rec_len, blocksize) != ++ sizeof(struct ext4_dir_entry_tail)) || + t->det_reserved_zero2 || + t->det_reserved_ft != EXT4_FT_DIR_CSUM) + return NULL; +@@ -445,13 +446,14 @@ static struct dx_countlimit *get_dx_coun + struct ext4_dir_entry *dp; + struct dx_root_info *root; + int count_offset; ++ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb); ++ unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize); + +- if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb)) ++ if (rlen == blocksize) + count_offset = 8; +- else if (le16_to_cpu(dirent->rec_len) == 12) { ++ else if (rlen == 12) { + dp = (struct ext4_dir_entry *)(((void *)dirent) + 12); +- if (le16_to_cpu(dp->rec_len) != +- EXT4_BLOCK_SIZE(inode->i_sb) - 12) ++ if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12) + return NULL; + root = (struct dx_root_info *)(((void *)dp + 12)); + if (root->reserved_zero || +@@ -1315,6 +1317,7 @@ static int dx_make_map(struct inode *dir + unsigned int buflen = bh->b_size; + char *base = bh->b_data; + struct dx_hash_info h = *hinfo; ++ int blocksize = EXT4_BLOCK_SIZE(dir->i_sb); + + if (ext4_has_metadata_csum(dir->i_sb)) + buflen -= sizeof(struct ext4_dir_entry_tail); +@@ -1335,11 +1338,12 @@ static int dx_make_map(struct inode *dir + map_tail--; + map_tail->hash = h.hash; + map_tail->offs = ((char *) de - base)>>2; +- map_tail->size = le16_to_cpu(de->rec_len); ++ map_tail->size = ext4_rec_len_from_disk(de->rec_len, ++ blocksize); + count++; + cond_resched(); + } +- de = ext4_next_entry(de, dir->i_sb->s_blocksize); ++ de = ext4_next_entry(de, blocksize); + } + return count; + } diff --git a/queue-6.1/i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch b/queue-6.1/i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch new file mode 100644 index 00000000000..45251739d1c --- /dev/null +++ b/queue-6.1/i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch @@ -0,0 +1,44 @@ +From fee465150b458351b6d9b9f66084f3cc3022b88b Mon Sep 17 00:00:00 2001 +From: Tommy Huang +Date: Wed, 6 Sep 2023 08:49:10 +0800 +Subject: i2c: aspeed: Reset the i2c controller when timeout occurs + +From: Tommy Huang + +commit fee465150b458351b6d9b9f66084f3cc3022b88b upstream. + +Reset the i2c controller when an i2c transfer timeout occurs. +The remaining interrupts and device should be reset to avoid +unpredictable controller behavior. + +Fixes: 2e57b7cebb98 ("i2c: aspeed: Add multi-master use case support") +Cc: # v5.1+ +Signed-off-by: Tommy Huang +Reviewed-by: Andi Shyti +Signed-off-by: Wolfram Sang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/i2c/busses/i2c-aspeed.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/i2c/busses/i2c-aspeed.c ++++ b/drivers/i2c/busses/i2c-aspeed.c +@@ -698,13 +698,16 @@ static int aspeed_i2c_master_xfer(struct + + if (time_left == 0) { + /* +- * If timed out and bus is still busy in a multi master +- * environment, attempt recovery at here. ++ * In a multi-master setup, if a timeout occurs, attempt ++ * recovery. But if the bus is idle, we still need to reset the ++ * i2c controller to clear the remaining interrupts. + */ + if (bus->multi_master && + (readl(bus->base + ASPEED_I2C_CMD_REG) & + ASPEED_I2CD_BUS_BUSY_STS)) + aspeed_i2c_recover_bus(bus); ++ else ++ aspeed_i2c_reset(bus); + + /* + * If timed out and the state is still pending, drop the pending diff --git a/queue-6.1/io_uring-net-fix-iter-retargeting-for-selected-buf.patch b/queue-6.1/io_uring-net-fix-iter-retargeting-for-selected-buf.patch new file mode 100644 index 00000000000..aff4a62d65f --- /dev/null +++ b/queue-6.1/io_uring-net-fix-iter-retargeting-for-selected-buf.patch @@ -0,0 +1,46 @@ +From c21a8027ad8a68c340d0d58bf1cc61dcb0bc4d2f Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Thu, 14 Sep 2023 16:51:09 +0100 +Subject: io_uring/net: fix iter retargeting for selected buf + +From: Pavel Begunkov + +commit c21a8027ad8a68c340d0d58bf1cc61dcb0bc4d2f upstream. + +When using selected buffer feature, io_uring delays data iter setup +until later. If io_setup_async_msg() is called before that it might see +not correctly setup iterator. Pre-init nr_segs and judge from its state +whether we repointing. + +Cc: stable@vger.kernel.org +Reported-by: syzbot+a4c6e5ef999b68b26ed1@syzkaller.appspotmail.com +Fixes: 0455d4ccec548 ("io_uring: add POLL_FIRST support for send/sendmsg and recv/recvmsg") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/0000000000002770be06053c7757@google.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/net.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/io_uring/net.c ++++ b/io_uring/net.c +@@ -170,6 +170,10 @@ static int io_setup_async_msg(struct io_ + memcpy(async_msg, kmsg, sizeof(*kmsg)); + if (async_msg->msg.msg_name) + async_msg->msg.msg_name = &async_msg->addr; ++ ++ if ((req->flags & REQ_F_BUFFER_SELECT) && !async_msg->msg.msg_iter.nr_segs) ++ return -EAGAIN; ++ + /* if were using fast_iov, set it to the new one */ + if (!kmsg->free_iov) { + size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; +@@ -529,6 +533,7 @@ static int io_recvmsg_copy_hdr(struct io + struct io_async_msghdr *iomsg) + { + iomsg->msg.msg_name = &iomsg->addr; ++ iomsg->msg.msg_iter.nr_segs = 0; + + #ifdef CONFIG_COMPAT + if (req->ctx->compat) diff --git a/queue-6.1/md-put-the-right-device-in-md_seq_next.patch b/queue-6.1/md-put-the-right-device-in-md_seq_next.patch new file mode 100644 index 00000000000..2bf2c0578d7 --- /dev/null +++ b/queue-6.1/md-put-the-right-device-in-md_seq_next.patch @@ -0,0 +1,41 @@ +From c8870379a21fbd9ad14ca36204ccfbe9d25def43 Mon Sep 17 00:00:00 2001 +From: Mariusz Tkaczyk +Date: Thu, 14 Sep 2023 17:24:16 +0200 +Subject: md: Put the right device in md_seq_next + +From: Mariusz Tkaczyk + +commit c8870379a21fbd9ad14ca36204ccfbe9d25def43 upstream. + +If there are multiple arrays in system and one mddevice is marked +with MD_DELETED and md_seq_next() is called in the middle of removal +then it _get()s proper device but it may _put() deleted one. As a result, +active counter may never be zeroed for mddevice and it cannot +be removed. + +Put the device which has been _get with previous md_seq_next() call. + +Cc: stable@vger.kernel.org +Fixes: 12a6caf27324 ("md: only delete entries from all_mddevs when the disk is freed") +Reported-by: AceLan Kao +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217798 +Cc: Yu Kuai +Signed-off-by: Mariusz Tkaczyk +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20230914152416.10819-1-mariusz.tkaczyk@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/md/md.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -8228,7 +8228,7 @@ static void *md_seq_next(struct seq_file + spin_unlock(&all_mddevs_lock); + + if (to_put) +- mddev_put(mddev); ++ mddev_put(to_put); + return next_mddev; + + } diff --git a/queue-6.1/nfsd-fix-change_info-in-nfsv4-rename-replies.patch b/queue-6.1/nfsd-fix-change_info-in-nfsv4-rename-replies.patch new file mode 100644 index 00000000000..2d61fc7f1ae --- /dev/null +++ b/queue-6.1/nfsd-fix-change_info-in-nfsv4-rename-replies.patch @@ -0,0 +1,36 @@ +From fdd2630a7398191e84822612e589062063bd4f3d Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Sat, 9 Sep 2023 07:12:30 -0400 +Subject: nfsd: fix change_info in NFSv4 RENAME replies + +From: Jeff Layton + +commit fdd2630a7398191e84822612e589062063bd4f3d upstream. + +nfsd sends the transposed directory change info in the RENAME reply. The +source directory is in save_fh and the target is in current_fh. + +Reported-by: Zhi Li +Reported-by: Benjamin Coddington +Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2218844 +Signed-off-by: Jeff Layton +Cc: +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfsd/nfs4proc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/nfsd/nfs4proc.c ++++ b/fs/nfsd/nfs4proc.c +@@ -1029,8 +1029,8 @@ nfsd4_rename(struct svc_rqst *rqstp, str + rename->rn_tname, rename->rn_tnamelen); + if (status) + return status; +- set_change_info(&rename->rn_sinfo, &cstate->current_fh); +- set_change_info(&rename->rn_tinfo, &cstate->save_fh); ++ set_change_info(&rename->rn_sinfo, &cstate->save_fh); ++ set_change_info(&rename->rn_tinfo, &cstate->current_fh); + return nfs_ok; + } + diff --git a/queue-6.1/nvme-avoid-bogus-crto-values.patch b/queue-6.1/nvme-avoid-bogus-crto-values.patch new file mode 100644 index 00000000000..ae178de5600 --- /dev/null +++ b/queue-6.1/nvme-avoid-bogus-crto-values.patch @@ -0,0 +1,104 @@ +From 6cc834ba62998c65c42d0c63499bdd35067151ec Mon Sep 17 00:00:00 2001 +From: Keith Busch +Date: Tue, 12 Sep 2023 14:38:58 -0700 +Subject: nvme: avoid bogus CRTO values +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Keith Busch + +commit 6cc834ba62998c65c42d0c63499bdd35067151ec upstream. + +Some devices are reporting controller ready mode support, but return 0 +for CRTO. These devices require a much higher time to ready than that, +so they are failing to initialize after the driver starter preferring +that value over CAP.TO. + +The spec requires that CAP.TO match the appropritate CRTO value, or be +set to 0xff if CRTO is larger than that. This means that CAP.TO can be +used to validate if CRTO is reliable, and provides an appropriate +fallback for setting the timeout value if not. Use whichever is larger. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217863 +Reported-by: Cláudio Sampaio +Reported-by: Felix Yan +Tested-by: Felix Yan +Based-on-a-patch-by: Felix Yan +Cc: stable@vger.kernel.org +Signed-off-by: Keith Busch +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/core.c | 54 ++++++++++++++++++++++++++++++----------------- + 1 file changed, 35 insertions(+), 19 deletions(-) + +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -2368,25 +2368,8 @@ int nvme_enable_ctrl(struct nvme_ctrl *c + else + ctrl->ctrl_config = NVME_CC_CSS_NVM; + +- if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { +- u32 crto; +- +- ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); +- if (ret) { +- dev_err(ctrl->device, "Reading CRTO failed (%d)\n", +- ret); +- return ret; +- } +- +- if (ctrl->cap & NVME_CAP_CRMS_CRIMS) { +- ctrl->ctrl_config |= NVME_CC_CRIME; +- timeout = NVME_CRTO_CRIMT(crto); +- } else { +- timeout = NVME_CRTO_CRWMT(crto); +- } +- } else { +- timeout = NVME_CAP_TIMEOUT(ctrl->cap); +- } ++ if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS) ++ ctrl->ctrl_config |= NVME_CC_CRIME; + + ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; + ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; +@@ -2400,6 +2383,39 @@ int nvme_enable_ctrl(struct nvme_ctrl *c + if (ret) + return ret; + ++ /* CAP value may change after initial CC write */ ++ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); ++ if (ret) ++ return ret; ++ ++ timeout = NVME_CAP_TIMEOUT(ctrl->cap); ++ if (ctrl->cap & NVME_CAP_CRMS_CRWMS) { ++ u32 crto, ready_timeout; ++ ++ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto); ++ if (ret) { ++ dev_err(ctrl->device, "Reading CRTO failed (%d)\n", ++ ret); ++ return ret; ++ } ++ ++ /* ++ * CRTO should always be greater or equal to CAP.TO, but some ++ * devices are known to get this wrong. Use the larger of the ++ * two values. ++ */ ++ if (ctrl->ctrl_config & NVME_CC_CRIME) ++ ready_timeout = NVME_CRTO_CRIMT(crto); ++ else ++ ready_timeout = NVME_CRTO_CRWMT(crto); ++ ++ if (ready_timeout < timeout) ++ dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n", ++ crto, ctrl->cap); ++ else ++ timeout = ready_timeout; ++ } ++ + ctrl->ctrl_config |= NVME_CC_ENABLE; + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); + if (ret) diff --git a/queue-6.1/ovl-fix-failed-copyup-of-fileattr-on-a-symlink.patch b/queue-6.1/ovl-fix-failed-copyup-of-fileattr-on-a-symlink.patch new file mode 100644 index 00000000000..15e778fa329 --- /dev/null +++ b/queue-6.1/ovl-fix-failed-copyup-of-fileattr-on-a-symlink.patch @@ -0,0 +1,57 @@ +From ab048302026d7701e7fbd718917e0dbcff0c4223 Mon Sep 17 00:00:00 2001 +From: Amir Goldstein +Date: Mon, 4 Sep 2023 14:17:56 +0300 +Subject: ovl: fix failed copyup of fileattr on a symlink + +From: Amir Goldstein + +commit ab048302026d7701e7fbd718917e0dbcff0c4223 upstream. + +Some local filesystems support setting persistent fileattr flags +(e.g. FS_NOATIME_FL) on directories and regular files via ioctl. +Some of those persistent fileattr flags are reflected to vfs as +in-memory inode flags (e.g. S_NOATIME). + +Overlayfs uses the in-memory inode flags (e.g. S_NOATIME) on a lower file +as an indication that a the lower file may have persistent inode fileattr +flags (e.g. FS_NOATIME_FL) that need to be copied to upper file. + +However, in some cases, the S_NOATIME in-memory flag could be a false +indication for persistent FS_NOATIME_FL fileattr. For example, with NFS +and FUSE lower fs, as was the case in the two bug reports, the S_NOATIME +flag is set unconditionally for all inodes. + +Users cannot set persistent fileattr flags on symlinks and special files, +but in some local fs, such as ext4/btrfs/tmpfs, the FS_NOATIME_FL fileattr +flag are inheritted to symlinks and special files from parent directory. + +In both cases described above, when lower symlink has the S_NOATIME flag, +overlayfs will try to copy the symlink's fileattrs and fail with error +ENOXIO, because it could not open the symlink for the ioctl security hook. + +To solve this failure, do not attempt to copyup fileattrs for anything +other than directories and regular files. + +Reported-by: Ruiwen Zhao +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217850 +Fixes: 72db82115d2b ("ovl: copy up sync/noatime fileattr flags") +Cc: # v5.15 +Reviewed-by: Miklos Szeredi +Signed-off-by: Amir Goldstein +Signed-off-by: Greg Kroah-Hartman +--- + fs/overlayfs/copy_up.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/overlayfs/copy_up.c ++++ b/fs/overlayfs/copy_up.c +@@ -580,7 +580,8 @@ static int ovl_copy_up_metadata(struct o + if (err) + return err; + +- if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) { ++ if (inode->i_flags & OVL_COPY_I_FLAGS_MASK && ++ (S_ISREG(c->stat.mode) || S_ISDIR(c->stat.mode))) { + /* + * Copy the fileattr inode flags that are the source of already + * copied i_flags diff --git a/queue-6.1/ovl-fix-incorrect-fdput-on-aio-completion.patch b/queue-6.1/ovl-fix-incorrect-fdput-on-aio-completion.patch new file mode 100644 index 00000000000..4df4500f73f --- /dev/null +++ b/queue-6.1/ovl-fix-incorrect-fdput-on-aio-completion.patch @@ -0,0 +1,69 @@ +From 724768a39374d35b70eaeae8dd87048a2ec7ae8e Mon Sep 17 00:00:00 2001 +From: Amir Goldstein +Date: Tue, 22 Aug 2023 20:50:59 +0300 +Subject: ovl: fix incorrect fdput() on aio completion + +From: Amir Goldstein + +commit 724768a39374d35b70eaeae8dd87048a2ec7ae8e upstream. + +ovl_{read,write}_iter() always call fdput(real) to put one or zero +refcounts of the real file, but for aio, whether it was submitted or not, +ovl_aio_put() also calls fdput(), which is not balanced. This is only a +problem in the less common case when FDPUT_FPUT flag is set. + +To fix the problem use get_file() to take file refcount and use fput() +instead of fdput() in ovl_aio_put(). + +Fixes: 2406a307ac7d ("ovl: implement async IO routines") +Cc: # v5.6 +Reviewed-by: Miklos Szeredi +Signed-off-by: Amir Goldstein +Signed-off-by: Greg Kroah-Hartman +--- + fs/overlayfs/file.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +--- a/fs/overlayfs/file.c ++++ b/fs/overlayfs/file.c +@@ -19,7 +19,6 @@ struct ovl_aio_req { + struct kiocb iocb; + refcount_t ref; + struct kiocb *orig_iocb; +- struct fd fd; + }; + + static struct kmem_cache *ovl_aio_request_cachep; +@@ -260,7 +259,7 @@ static rwf_t ovl_iocb_to_rwf(int ifl) + static inline void ovl_aio_put(struct ovl_aio_req *aio_req) + { + if (refcount_dec_and_test(&aio_req->ref)) { +- fdput(aio_req->fd); ++ fput(aio_req->iocb.ki_filp); + kmem_cache_free(ovl_aio_request_cachep, aio_req); + } + } +@@ -325,10 +324,9 @@ static ssize_t ovl_read_iter(struct kioc + if (!aio_req) + goto out; + +- aio_req->fd = real; + real.flags = 0; + aio_req->orig_iocb = iocb; +- kiocb_clone(&aio_req->iocb, iocb, real.file); ++ kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); + ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); +@@ -396,10 +394,9 @@ static ssize_t ovl_write_iter(struct kio + /* Pacify lockdep, same trick as done in aio_write() */ + __sb_writers_release(file_inode(real.file)->i_sb, + SB_FREEZE_WRITE); +- aio_req->fd = real; + real.flags = 0; + aio_req->orig_iocb = iocb; +- kiocb_clone(&aio_req->iocb, iocb, real.file); ++ kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); + aio_req->iocb.ki_flags = ifl; + aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); diff --git a/queue-6.1/revert-drm-amd-disable-s-g-for-apus-when-64gb-or-more-host-memory.patch b/queue-6.1/revert-drm-amd-disable-s-g-for-apus-when-64gb-or-more-host-memory.patch new file mode 100644 index 00000000000..d672ee44c81 --- /dev/null +++ b/queue-6.1/revert-drm-amd-disable-s-g-for-apus-when-64gb-or-more-host-memory.patch @@ -0,0 +1,86 @@ +From 169ed4ece8373f02f10642eae5240e3d1ef5c038 Mon Sep 17 00:00:00 2001 +From: Hamza Mahfooz +Date: Fri, 8 Sep 2023 10:36:44 -0400 +Subject: Revert "drm/amd: Disable S/G for APUs when 64GB or more host memory" + +From: Hamza Mahfooz + +commit 169ed4ece8373f02f10642eae5240e3d1ef5c038 upstream. + +This reverts commit 70e64c4d522b732e31c6475a3be2349de337d321. + +Since, we now have an actual fix for this issue, we can get rid of this +workaround as it can cause pin failures if enough VRAM isn't carved out +by the BIOS. + +Cc: stable@vger.kernel.org # 6.1+ +Acked-by: Harry Wentland +Reviewed-by: Alex Deucher +Signed-off-by: Hamza Mahfooz +Signed-off-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 ---------------------- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++-- + 3 files changed, 3 insertions(+), 29 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1266,7 +1266,6 @@ int amdgpu_device_gpu_recover(struct amd + void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); + int amdgpu_device_pci_reset(struct amdgpu_device *adev); + bool amdgpu_device_need_post(struct amdgpu_device *adev); +-bool amdgpu_sg_display_supported(struct amdgpu_device *adev); + bool amdgpu_device_pcie_dynamic_switching_supported(void); + bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); + bool amdgpu_device_aspm_support_quirk(void); +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -1337,32 +1337,6 @@ bool amdgpu_device_need_post(struct amdg + } + + /* +- * On APUs with >= 64GB white flickering has been observed w/ SG enabled. +- * Disable S/G on such systems until we have a proper fix. +- * https://gitlab.freedesktop.org/drm/amd/-/issues/2354 +- * https://gitlab.freedesktop.org/drm/amd/-/issues/2735 +- */ +-bool amdgpu_sg_display_supported(struct amdgpu_device *adev) +-{ +- switch (amdgpu_sg_display) { +- case -1: +- break; +- case 0: +- return false; +- case 1: +- return true; +- default: +- return false; +- } +- if ((totalram_pages() << (PAGE_SHIFT - 10)) + +- (adev->gmc.real_vram_size / 1024) >= 64000000) { +- DRM_WARN("Disabling S/G due to >=64GB RAM\n"); +- return false; +- } +- return true; +-} +- +-/* + * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic + * speed switching. Until we have confirmation from Intel that a specific host + * supports it, it's safer that we keep it disabled for all. +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -1634,8 +1634,9 @@ static int amdgpu_dm_init(struct amdgpu_ + } + break; + } +- if (init_data.flags.gpu_vm_support) +- init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev); ++ if (init_data.flags.gpu_vm_support && ++ (amdgpu_sg_display == 0)) ++ init_data.flags.gpu_vm_support = false; + + if (init_data.flags.gpu_vm_support) + adev->mode_info.gpu_vm_support = true; diff --git a/queue-6.1/revert-sunrpc-fail-faster-on-bad-verifier.patch b/queue-6.1/revert-sunrpc-fail-faster-on-bad-verifier.patch new file mode 100644 index 00000000000..698c858a505 --- /dev/null +++ b/queue-6.1/revert-sunrpc-fail-faster-on-bad-verifier.patch @@ -0,0 +1,48 @@ +From e86fcf0820d914389b46658a5a7e8969c3af2d53 Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Tue, 5 Sep 2023 21:03:28 -0400 +Subject: Revert "SUNRPC: Fail faster on bad verifier" + +From: Trond Myklebust + +commit e86fcf0820d914389b46658a5a7e8969c3af2d53 upstream. + +This reverts commit 0701214cd6e66585a999b132eb72ae0489beb724. + +The premise of this commit was incorrect. There are exactly 2 cases +where rpcauth_checkverf() will return an error: + +1) If there was an XDR decode problem (i.e. garbage data). +2) If gss_validate() had a problem verifying the RPCSEC_GSS MIC. + +In the second case, there are again 2 subcases: + +a) The GSS context expires, in which case gss_validate() will force a + new context negotiation on retry by invalidating the cred. +b) The sequence number check failed because an RPC call timed out, and + the client retransmitted the request using a new sequence number, + as required by RFC2203. + +In neither subcase is this a fatal error. + +Reported-by: Russell Cattelan +Fixes: 0701214cd6e6 ("SUNRPC: Fail faster on bad verifier") +Cc: stable@vger.kernel.org +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Greg Kroah-Hartman +--- + net/sunrpc/clnt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -2710,7 +2710,7 @@ out_unparsable: + + out_verifier: + trace_rpc_bad_verifier(task); +- goto out_err; ++ goto out_garbage; + + out_msg_denied: + error = -EACCES; diff --git a/queue-6.1/scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch b/queue-6.1/scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch new file mode 100644 index 00000000000..e913af03ee6 --- /dev/null +++ b/queue-6.1/scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch @@ -0,0 +1,175 @@ +From 0b0747d507bffb827e40fc0f9fb5883fffc23477 Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Mon, 28 Aug 2023 15:10:18 -0700 +Subject: scsi: megaraid_sas: Fix deadlock on firmware crashdump + +From: Junxiao Bi + +commit 0b0747d507bffb827e40fc0f9fb5883fffc23477 upstream. + +The following processes run into a deadlock. CPU 41 was waiting for CPU 29 +to handle a CSD request while holding spinlock "crashdump_lock", but CPU 29 +was hung by that spinlock with IRQs disabled. + + PID: 17360 TASK: ffff95c1090c5c40 CPU: 41 COMMAND: "mrdiagd" + !# 0 [ffffb80edbf37b58] __read_once_size at ffffffff9b871a40 include/linux/compiler.h:185:0 + !# 1 [ffffb80edbf37b58] atomic_read at ffffffff9b871a40 arch/x86/include/asm/atomic.h:27:0 + !# 2 [ffffb80edbf37b58] dump_stack at ffffffff9b871a40 lib/dump_stack.c:54:0 + # 3 [ffffb80edbf37b78] csd_lock_wait_toolong at ffffffff9b131ad5 kernel/smp.c:364:0 + # 4 [ffffb80edbf37b78] __csd_lock_wait at ffffffff9b131ad5 kernel/smp.c:384:0 + # 5 [ffffb80edbf37bf8] csd_lock_wait at ffffffff9b13267a kernel/smp.c:394:0 + # 6 [ffffb80edbf37bf8] smp_call_function_many at ffffffff9b13267a kernel/smp.c:843:0 + # 7 [ffffb80edbf37c50] smp_call_function at ffffffff9b13279d kernel/smp.c:867:0 + # 8 [ffffb80edbf37c50] on_each_cpu at ffffffff9b13279d kernel/smp.c:976:0 + # 9 [ffffb80edbf37c78] flush_tlb_kernel_range at ffffffff9b085c4b arch/x86/mm/tlb.c:742:0 + #10 [ffffb80edbf37cb8] __purge_vmap_area_lazy at ffffffff9b23a1e0 mm/vmalloc.c:701:0 + #11 [ffffb80edbf37ce0] try_purge_vmap_area_lazy at ffffffff9b23a2cc mm/vmalloc.c:722:0 + #12 [ffffb80edbf37ce0] free_vmap_area_noflush at ffffffff9b23a2cc mm/vmalloc.c:754:0 + #13 [ffffb80edbf37cf8] free_unmap_vmap_area at ffffffff9b23bb3b mm/vmalloc.c:764:0 + #14 [ffffb80edbf37cf8] remove_vm_area at ffffffff9b23bb3b mm/vmalloc.c:1509:0 + #15 [ffffb80edbf37d18] __vunmap at ffffffff9b23bb8a mm/vmalloc.c:1537:0 + #16 [ffffb80edbf37d40] vfree at ffffffff9b23bc85 mm/vmalloc.c:1612:0 + #17 [ffffb80edbf37d58] megasas_free_host_crash_buffer [megaraid_sas] at ffffffffc020b7f2 drivers/scsi/megaraid/megaraid_sas_fusion.c:3932:0 + #18 [ffffb80edbf37d80] fw_crash_state_store [megaraid_sas] at ffffffffc01f804d drivers/scsi/megaraid/megaraid_sas_base.c:3291:0 + #19 [ffffb80edbf37dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 + #20 [ffffb80edbf37dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 + #21 [ffffb80edbf37de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 + #22 [ffffb80edbf37e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 + #23 [ffffb80edbf37ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 + #24 [ffffb80edbf37ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 + #25 [ffffb80edbf37ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 + #26 [ffffb80edbf37f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 + #27 [ffffb80edbf37f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 + + PID: 17355 TASK: ffff95c1090c3d80 CPU: 29 COMMAND: "mrdiagd" + !# 0 [ffffb80f2d3c7d30] __read_once_size at ffffffff9b0f2ab0 include/linux/compiler.h:185:0 + !# 1 [ffffb80f2d3c7d30] native_queued_spin_lock_slowpath at ffffffff9b0f2ab0 kernel/locking/qspinlock.c:368:0 + # 2 [ffffb80f2d3c7d58] pv_queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/paravirt.h:674:0 + # 3 [ffffb80f2d3c7d58] queued_spin_lock_slowpath at ffffffff9b0f244b arch/x86/include/asm/qspinlock.h:53:0 + # 4 [ffffb80f2d3c7d68] queued_spin_lock at ffffffff9b8961a6 include/asm-generic/qspinlock.h:90:0 + # 5 [ffffb80f2d3c7d68] do_raw_spin_lock_flags at ffffffff9b8961a6 include/linux/spinlock.h:173:0 + # 6 [ffffb80f2d3c7d68] __raw_spin_lock_irqsave at ffffffff9b8961a6 include/linux/spinlock_api_smp.h:122:0 + # 7 [ffffb80f2d3c7d68] _raw_spin_lock_irqsave at ffffffff9b8961a6 kernel/locking/spinlock.c:160:0 + # 8 [ffffb80f2d3c7d88] fw_crash_buffer_store [megaraid_sas] at ffffffffc01f8129 drivers/scsi/megaraid/megaraid_sas_base.c:3205:0 + # 9 [ffffb80f2d3c7dc0] dev_attr_store at ffffffff9b56dd7b drivers/base/core.c:758:0 + #10 [ffffb80f2d3c7dd0] sysfs_kf_write at ffffffff9b326acf fs/sysfs/file.c:144:0 + #11 [ffffb80f2d3c7de0] kernfs_fop_write at ffffffff9b325fd4 fs/kernfs/file.c:316:0 + #12 [ffffb80f2d3c7e20] __vfs_write at ffffffff9b29418a fs/read_write.c:480:0 + #13 [ffffb80f2d3c7ea8] vfs_write at ffffffff9b294462 fs/read_write.c:544:0 + #14 [ffffb80f2d3c7ee8] SYSC_write at ffffffff9b2946ec fs/read_write.c:590:0 + #15 [ffffb80f2d3c7ee8] SyS_write at ffffffff9b2946ec fs/read_write.c:582:0 + #16 [ffffb80f2d3c7f30] do_syscall_64 at ffffffff9b003ca9 arch/x86/entry/common.c:298:0 + #17 [ffffb80f2d3c7f58] entry_SYSCALL_64 at ffffffff9ba001b1 arch/x86/entry/entry_64.S:238:0 + +The lock is used to synchronize different sysfs operations, it doesn't +protect any resource that will be touched by an interrupt. Consequently +it's not required to disable IRQs. Replace the spinlock with a mutex to fix +the deadlock. + +Signed-off-by: Junxiao Bi +Link: https://lore.kernel.org/r/20230828221018.19471-1-junxiao.bi@oracle.com +Reviewed-by: Mike Christie +Cc: stable@vger.kernel.org +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/megaraid/megaraid_sas.h | 2 +- + drivers/scsi/megaraid/megaraid_sas_base.c | 21 +++++++++------------ + 2 files changed, 10 insertions(+), 13 deletions(-) + +--- a/drivers/scsi/megaraid/megaraid_sas.h ++++ b/drivers/scsi/megaraid/megaraid_sas.h +@@ -2332,7 +2332,7 @@ struct megasas_instance { + u32 support_morethan256jbod; /* FW support for more than 256 PD/JBOD */ + bool use_seqnum_jbod_fp; /* Added for PD sequence */ + bool smp_affinity_enable; +- spinlock_t crashdump_lock; ++ struct mutex crashdump_lock; + + struct megasas_register_set __iomem *reg_set; + u32 __iomem *reply_post_host_index_addr[MR_MAX_MSIX_REG_ARRAY]; +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -3272,14 +3272,13 @@ fw_crash_buffer_store(struct device *cde + struct megasas_instance *instance = + (struct megasas_instance *) shost->hostdata; + int val = 0; +- unsigned long flags; + + if (kstrtoint(buf, 0, &val) != 0) + return -EINVAL; + +- spin_lock_irqsave(&instance->crashdump_lock, flags); ++ mutex_lock(&instance->crashdump_lock); + instance->fw_crash_buffer_offset = val; +- spin_unlock_irqrestore(&instance->crashdump_lock, flags); ++ mutex_unlock(&instance->crashdump_lock); + return strlen(buf); + } + +@@ -3294,24 +3293,23 @@ fw_crash_buffer_show(struct device *cdev + unsigned long dmachunk = CRASH_DMA_BUF_SIZE; + unsigned long chunk_left_bytes; + unsigned long src_addr; +- unsigned long flags; + u32 buff_offset; + +- spin_lock_irqsave(&instance->crashdump_lock, flags); ++ mutex_lock(&instance->crashdump_lock); + buff_offset = instance->fw_crash_buffer_offset; + if (!instance->crash_dump_buf || + !((instance->fw_crash_state == AVAILABLE) || + (instance->fw_crash_state == COPYING))) { + dev_err(&instance->pdev->dev, + "Firmware crash dump is not available\n"); +- spin_unlock_irqrestore(&instance->crashdump_lock, flags); ++ mutex_unlock(&instance->crashdump_lock); + return -EINVAL; + } + + if (buff_offset > (instance->fw_crash_buffer_size * dmachunk)) { + dev_err(&instance->pdev->dev, + "Firmware crash dump offset is out of range\n"); +- spin_unlock_irqrestore(&instance->crashdump_lock, flags); ++ mutex_unlock(&instance->crashdump_lock); + return 0; + } + +@@ -3323,7 +3321,7 @@ fw_crash_buffer_show(struct device *cdev + src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] + + (buff_offset % dmachunk); + memcpy(buf, (void *)src_addr, size); +- spin_unlock_irqrestore(&instance->crashdump_lock, flags); ++ mutex_unlock(&instance->crashdump_lock); + + return size; + } +@@ -3348,7 +3346,6 @@ fw_crash_state_store(struct device *cdev + struct megasas_instance *instance = + (struct megasas_instance *) shost->hostdata; + int val = 0; +- unsigned long flags; + + if (kstrtoint(buf, 0, &val) != 0) + return -EINVAL; +@@ -3362,9 +3359,9 @@ fw_crash_state_store(struct device *cdev + instance->fw_crash_state = val; + + if ((val == COPIED) || (val == COPY_ERROR)) { +- spin_lock_irqsave(&instance->crashdump_lock, flags); ++ mutex_lock(&instance->crashdump_lock); + megasas_free_host_crash_buffer(instance); +- spin_unlock_irqrestore(&instance->crashdump_lock, flags); ++ mutex_unlock(&instance->crashdump_lock); + if (val == COPY_ERROR) + dev_info(&instance->pdev->dev, "application failed to " + "copy Firmware crash dump\n"); +@@ -7423,7 +7420,7 @@ static inline void megasas_init_ctrl_par + init_waitqueue_head(&instance->int_cmd_wait_q); + init_waitqueue_head(&instance->abort_cmd_wait_q); + +- spin_lock_init(&instance->crashdump_lock); ++ mutex_init(&instance->crashdump_lock); + spin_lock_init(&instance->mfi_pool_lock); + spin_lock_init(&instance->hba_lock); + spin_lock_init(&instance->stream_lock); diff --git a/queue-6.1/scsi-pm8001-setup-irqs-on-resume.patch b/queue-6.1/scsi-pm8001-setup-irqs-on-resume.patch new file mode 100644 index 00000000000..63f870da48a --- /dev/null +++ b/queue-6.1/scsi-pm8001-setup-irqs-on-resume.patch @@ -0,0 +1,117 @@ +From c91774818b041ed290df29fb1dc0725be9b12e83 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Tue, 12 Sep 2023 08:27:36 +0900 +Subject: scsi: pm8001: Setup IRQs on resume + +From: Damien Le Moal + +commit c91774818b041ed290df29fb1dc0725be9b12e83 upstream. + +The function pm8001_pci_resume() only calls pm8001_request_irq() without +calling pm8001_setup_irq(). This causes the IRQ allocation to fail, which +leads all drives being removed from the system. + +Fix this issue by integrating the code for pm8001_setup_irq() directly +inside pm8001_request_irq() so that MSI-X setup is performed both during +normal initialization and resume operations. + +Fixes: dbf9bfe61571 ("[SCSI] pm8001: add SAS/SATA HBA driver") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Link: https://lore.kernel.org/r/20230911232745.325149-2-dlemoal@kernel.org +Acked-by: Jack Wang +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/pm8001/pm8001_init.c | 51 ++++++++++++-------------------------- + 1 file changed, 17 insertions(+), 34 deletions(-) + +--- a/drivers/scsi/pm8001/pm8001_init.c ++++ b/drivers/scsi/pm8001/pm8001_init.c +@@ -274,7 +274,6 @@ static irqreturn_t pm8001_interrupt_hand + return ret; + } + +-static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha); + static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha); + + /** +@@ -295,13 +294,6 @@ static int pm8001_alloc(struct pm8001_hb + pm8001_dbg(pm8001_ha, INIT, "pm8001_alloc: PHY:%x\n", + pm8001_ha->chip->n_phy); + +- /* Setup Interrupt */ +- rc = pm8001_setup_irq(pm8001_ha); +- if (rc) { +- pm8001_dbg(pm8001_ha, FAIL, +- "pm8001_setup_irq failed [ret: %d]\n", rc); +- goto err_out; +- } + /* Request Interrupt */ + rc = pm8001_request_irq(pm8001_ha); + if (rc) +@@ -1021,47 +1013,38 @@ static u32 pm8001_request_msix(struct pm + } + #endif + +-static u32 pm8001_setup_irq(struct pm8001_hba_info *pm8001_ha) +-{ +- struct pci_dev *pdev; +- +- pdev = pm8001_ha->pdev; +- +-#ifdef PM8001_USE_MSIX +- if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) +- return pm8001_setup_msix(pm8001_ha); +- pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); +-#endif +- return 0; +-} +- + /** + * pm8001_request_irq - register interrupt + * @pm8001_ha: our ha struct. + */ + static u32 pm8001_request_irq(struct pm8001_hba_info *pm8001_ha) + { +- struct pci_dev *pdev; ++ struct pci_dev *pdev = pm8001_ha->pdev; ++#ifdef PM8001_USE_MSIX + int rc; + +- pdev = pm8001_ha->pdev; ++ if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) { ++ rc = pm8001_setup_msix(pm8001_ha); ++ if (rc) { ++ pm8001_dbg(pm8001_ha, FAIL, ++ "pm8001_setup_irq failed [ret: %d]\n", rc); ++ return rc; ++ } + +-#ifdef PM8001_USE_MSIX +- if (pdev->msix_cap && pci_msi_enabled()) +- return pm8001_request_msix(pm8001_ha); +- else { +- pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); +- goto intx; ++ if (pdev->msix_cap && pci_msi_enabled()) ++ return pm8001_request_msix(pm8001_ha); + } ++ ++ pm8001_dbg(pm8001_ha, INIT, "MSIX not supported!!!\n"); + #endif + +-intx: + /* initialize the INT-X interrupt */ + pm8001_ha->irq_vector[0].irq_id = 0; + pm8001_ha->irq_vector[0].drv_inst = pm8001_ha; +- rc = request_irq(pdev->irq, pm8001_interrupt_handler_intx, IRQF_SHARED, +- pm8001_ha->name, SHOST_TO_SAS_HA(pm8001_ha->shost)); +- return rc; ++ ++ return request_irq(pdev->irq, pm8001_interrupt_handler_intx, ++ IRQF_SHARED, pm8001_ha->name, ++ SHOST_TO_SAS_HA(pm8001_ha->shost)); + } + + /** diff --git a/queue-6.1/selinux-fix-handling-of-empty-opts-in-selinux_fs_context_submount.patch b/queue-6.1/selinux-fix-handling-of-empty-opts-in-selinux_fs_context_submount.patch new file mode 100644 index 00000000000..af26bcb300d --- /dev/null +++ b/queue-6.1/selinux-fix-handling-of-empty-opts-in-selinux_fs_context_submount.patch @@ -0,0 +1,58 @@ +From ccf1dab96be4caed7c5235b1cfdb606ac161b996 Mon Sep 17 00:00:00 2001 +From: Ondrej Mosnacek +Date: Mon, 11 Sep 2023 16:23:58 +0200 +Subject: selinux: fix handling of empty opts in selinux_fs_context_submount() + +From: Ondrej Mosnacek + +commit ccf1dab96be4caed7c5235b1cfdb606ac161b996 upstream. + +selinux_set_mnt_opts() relies on the fact that the mount options pointer +is always NULL when all options are unset (specifically in its +!selinux_initialized() branch. However, the new +selinux_fs_context_submount() hook breaks this rule by allocating a new +structure even if no options are set. That causes any submount created +before a SELinux policy is loaded to be rejected in +selinux_set_mnt_opts(). + +Fix this by making selinux_fs_context_submount() leave fc->security +set to NULL when there are no options to be copied from the reference +superblock. + +Cc: +Reported-by: Adam Williamson +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2236345 +Fixes: d80a8f1b58c2 ("vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing") +Signed-off-by: Ondrej Mosnacek +Reviewed-by: Jeff Layton +Signed-off-by: Paul Moore +Signed-off-by: Greg Kroah-Hartman +--- + security/selinux/hooks.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/security/selinux/hooks.c ++++ b/security/selinux/hooks.c +@@ -2769,14 +2769,20 @@ static int selinux_umount(struct vfsmoun + static int selinux_fs_context_submount(struct fs_context *fc, + struct super_block *reference) + { +- const struct superblock_security_struct *sbsec; ++ const struct superblock_security_struct *sbsec = selinux_superblock(reference); + struct selinux_mnt_opts *opts; + ++ /* ++ * Ensure that fc->security remains NULL when no options are set ++ * as expected by selinux_set_mnt_opts(). ++ */ ++ if (!(sbsec->flags & (FSCONTEXT_MNT|CONTEXT_MNT|DEFCONTEXT_MNT))) ++ return 0; ++ + opts = kzalloc(sizeof(*opts), GFP_KERNEL); + if (!opts) + return -ENOMEM; + +- sbsec = selinux_superblock(reference); + if (sbsec->flags & FSCONTEXT_MNT) + opts->fscontext_sid = sbsec->sid; + if (sbsec->flags & CONTEXT_MNT) diff --git a/queue-6.1/series b/queue-6.1/series index 6895a87ef1a..e7a5f5eb630 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -106,3 +106,31 @@ x86-boot-compressed-reserve-more-memory-for-page-tab.patch x86-purgatory-remove-lto-flags.patch samples-hw_breakpoint-fix-building-without-module-un.patch md-raid1-fix-error-iso-c90-forbids-mixed-declaration.patch +revert-sunrpc-fail-faster-on-bad-verifier.patch +attr-block-mode-changes-of-symlinks.patch +ovl-fix-failed-copyup-of-fileattr-on-a-symlink.patch +ovl-fix-incorrect-fdput-on-aio-completion.patch +io_uring-net-fix-iter-retargeting-for-selected-buf.patch +nvme-avoid-bogus-crto-values.patch +md-put-the-right-device-in-md_seq_next.patch +revert-drm-amd-disable-s-g-for-apus-when-64gb-or-more-host-memory.patch +dm-don-t-attempt-to-queue-io-under-rcu-protection.patch +btrfs-fix-lockdep-splat-and-potential-deadlock-after-failure-running-delayed-items.patch +btrfs-fix-a-compilation-error-if-debug-is-defined-in-btree_dirty_folio.patch +btrfs-release-path-before-inode-lookup-during-the-ino-lookup-ioctl.patch +btrfs-check-for-btrfs_fs_error-in-pending-ordered-assert.patch +tracing-have-tracing_max_latency-inc-the-trace-array-ref-count.patch +tracing-have-event-inject-files-inc-the-trace-array-ref-count.patch +tracing-increase-trace-array-ref-count-on-enable-and-filter-files.patch +tracing-have-current_trace-inc-the-trace-array-ref-count.patch +tracing-have-option-files-inc-the-trace-array-ref-count.patch +selinux-fix-handling-of-empty-opts-in-selinux_fs_context_submount.patch +nfsd-fix-change_info-in-nfsv4-rename-replies.patch +tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch +i2c-aspeed-reset-the-i2c-controller-when-timeout-occurs.patch +ata-libata-disallow-dev-initiated-lpm-transitions-to-unsupported-states.patch +ata-libahci-clear-pending-interrupt-status.patch +scsi-megaraid_sas-fix-deadlock-on-firmware-crashdump.patch +scsi-pm8001-setup-irqs-on-resume.patch +ext4-fix-rec_len-verify-error.patch +drm-amd-display-fix-the-white-screen-issue-when-64gb-dram.patch diff --git a/queue-6.1/tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch b/queue-6.1/tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch new file mode 100644 index 00000000000..9d9bef443aa --- /dev/null +++ b/queue-6.1/tracefs-add-missing-lockdown-check-to-tracefs_create_dir.patch @@ -0,0 +1,43 @@ +From 51aab5ffceb43e05119eb059048fd75765d2bc21 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Tue, 5 Sep 2023 14:26:08 -0400 +Subject: tracefs: Add missing lockdown check to tracefs_create_dir() + +From: Steven Rostedt (Google) + +commit 51aab5ffceb43e05119eb059048fd75765d2bc21 upstream. + +The function tracefs_create_dir() was missing a lockdown check and was +called by the RV code. This gave an inconsistent behavior of this function +returning success while other tracefs functions failed. This caused the +inode being freed by the wrong kmem_cache. + +Link: https://lkml.kernel.org/r/20230905182711.692687042@goodmis.org +Link: https://lore.kernel.org/all/202309050916.58201dc6-oliver.sang@intel.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Ajay Kaher +Cc: Ching-lin Yu +Fixes: bf8e602186ec4 ("tracing: Do not create tracefs files if tracefs lockdown is in effect") +Reported-by: kernel test robot +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + fs/tracefs/inode.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/tracefs/inode.c ++++ b/fs/tracefs/inode.c +@@ -556,6 +556,9 @@ static struct dentry *__create_dir(const + */ + struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) + { ++ if (security_locked_down(LOCKDOWN_TRACEFS)) ++ return NULL; ++ + return __create_dir(name, parent, &simple_dir_inode_operations); + } + diff --git a/queue-6.1/tracing-have-current_trace-inc-the-trace-array-ref-count.patch b/queue-6.1/tracing-have-current_trace-inc-the-trace-array-ref-count.patch new file mode 100644 index 00000000000..9e6edf1d037 --- /dev/null +++ b/queue-6.1/tracing-have-current_trace-inc-the-trace-array-ref-count.patch @@ -0,0 +1,47 @@ +From 9b37febc578b2e1ad76a105aab11d00af5ec3d27 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Wed, 6 Sep 2023 22:47:14 -0400 +Subject: tracing: Have current_trace inc the trace array ref count + +From: Steven Rostedt (Google) + +commit 9b37febc578b2e1ad76a105aab11d00af5ec3d27 upstream. + +The current_trace updates the trace array tracer. For an instance, if the +file is opened and the instance is deleted, reading or writing to the file +will cause a use after free. + +Up the ref count of the trace array when current_trace is opened. + +Link: https://lkml.kernel.org/r/20230907024803.877687227@goodmis.org +Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Zheng Yejian +Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()") +Tested-by: Linux Kernel Functional Testing +Tested-by: Naresh Kamboju +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7708,10 +7708,11 @@ static const struct file_operations trac + #endif + + static const struct file_operations set_tracer_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_generic_tr, + .read = tracing_set_trace_read, + .write = tracing_set_trace_write, + .llseek = generic_file_llseek, ++ .release = tracing_release_generic_tr, + }; + + static const struct file_operations tracing_pipe_fops = { diff --git a/queue-6.1/tracing-have-event-inject-files-inc-the-trace-array-ref-count.patch b/queue-6.1/tracing-have-event-inject-files-inc-the-trace-array-ref-count.patch new file mode 100644 index 00000000000..b8245f31767 --- /dev/null +++ b/queue-6.1/tracing-have-event-inject-files-inc-the-trace-array-ref-count.patch @@ -0,0 +1,44 @@ +From e5c624f027ac74f97e97c8f36c69228ac9f1102d Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Wed, 6 Sep 2023 22:47:16 -0400 +Subject: tracing: Have event inject files inc the trace array ref count + +From: Steven Rostedt (Google) + +commit e5c624f027ac74f97e97c8f36c69228ac9f1102d upstream. + +The event inject files add events for a specific trace array. For an +instance, if the file is opened and the instance is deleted, reading or +writing to the file will cause a use after free. + +Up the ref count of the trace_array when a event inject file is opened. + +Link: https://lkml.kernel.org/r/20230907024804.292337868@goodmis.org +Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Zheng Yejian +Fixes: 6c3edaf9fd6a ("tracing: Introduce trace event injection") +Tested-by: Linux Kernel Functional Testing +Tested-by: Naresh Kamboju +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace_events_inject.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace_events_inject.c ++++ b/kernel/trace/trace_events_inject.c +@@ -328,7 +328,8 @@ event_inject_read(struct file *file, cha + } + + const struct file_operations event_inject_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_file_tr, + .read = event_inject_read, + .write = event_inject_write, ++ .release = tracing_release_file_tr, + }; diff --git a/queue-6.1/tracing-have-option-files-inc-the-trace-array-ref-count.patch b/queue-6.1/tracing-have-option-files-inc-the-trace-array-ref-count.patch new file mode 100644 index 00000000000..a48b7a43015 --- /dev/null +++ b/queue-6.1/tracing-have-option-files-inc-the-trace-array-ref-count.patch @@ -0,0 +1,69 @@ +From 7e2cfbd2d3c86afcd5c26b5c4b1dd251f63c5838 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Wed, 6 Sep 2023 22:47:15 -0400 +Subject: tracing: Have option files inc the trace array ref count + +From: Steven Rostedt (Google) + +commit 7e2cfbd2d3c86afcd5c26b5c4b1dd251f63c5838 upstream. + +The option files update the options for a given trace array. For an +instance, if the file is opened and the instance is deleted, reading or +writing to the file will cause a use after free. + +Up the ref count of the trace_array when an option file is opened. + +Link: https://lkml.kernel.org/r/20230907024804.086679464@goodmis.org +Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Zheng Yejian +Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()") +Tested-by: Linux Kernel Functional Testing +Tested-by: Naresh Kamboju +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -8905,12 +8905,33 @@ trace_options_write(struct file *filp, c + return cnt; + } + ++static int tracing_open_options(struct inode *inode, struct file *filp) ++{ ++ struct trace_option_dentry *topt = inode->i_private; ++ int ret; ++ ++ ret = tracing_check_open_get_tr(topt->tr); ++ if (ret) ++ return ret; ++ ++ filp->private_data = inode->i_private; ++ return 0; ++} ++ ++static int tracing_release_options(struct inode *inode, struct file *file) ++{ ++ struct trace_option_dentry *topt = file->private_data; ++ ++ trace_array_put(topt->tr); ++ return 0; ++} + + static const struct file_operations trace_options_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_options, + .read = trace_options_read, + .write = trace_options_write, + .llseek = generic_file_llseek, ++ .release = tracing_release_options, + }; + + /* diff --git a/queue-6.1/tracing-have-tracing_max_latency-inc-the-trace-array-ref-count.patch b/queue-6.1/tracing-have-tracing_max_latency-inc-the-trace-array-ref-count.patch new file mode 100644 index 00000000000..300868d7723 --- /dev/null +++ b/queue-6.1/tracing-have-tracing_max_latency-inc-the-trace-array-ref-count.patch @@ -0,0 +1,86 @@ +From 7d660c9b2bc95107f90a9f4c4759be85309a6550 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Wed, 6 Sep 2023 22:47:13 -0400 +Subject: tracing: Have tracing_max_latency inc the trace array ref count + +From: Steven Rostedt (Google) + +commit 7d660c9b2bc95107f90a9f4c4759be85309a6550 upstream. + +The tracing_max_latency file points to the trace_array max_latency field. +For an instance, if the file is opened and the instance is deleted, +reading or writing to the file will cause a use after free. + +Up the ref count of the trace_array when tracing_max_latency is opened. + +Link: https://lkml.kernel.org/r/20230907024803.666889383@goodmis.org +Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Zheng Yejian +Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()") +Tested-by: Linux Kernel Functional Testing +Tested-by: Naresh Kamboju +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -1729,7 +1729,7 @@ static void trace_create_maxlat_file(str + init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); + tr->d_max_latency = trace_create_file("tracing_max_latency", + TRACE_MODE_WRITE, +- d_tracer, &tr->max_latency, ++ d_tracer, tr, + &tracing_max_lat_fops); + } + +@@ -1762,7 +1762,7 @@ void latency_fsnotify(struct trace_array + + #define trace_create_maxlat_file(tr, d_tracer) \ + trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ +- d_tracer, &tr->max_latency, &tracing_max_lat_fops) ++ d_tracer, tr, &tracing_max_lat_fops) + + #endif + +@@ -6604,14 +6604,18 @@ static ssize_t + tracing_max_lat_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) + { +- return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos); ++ struct trace_array *tr = filp->private_data; ++ ++ return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); + } + + static ssize_t + tracing_max_lat_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) + { +- return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos); ++ struct trace_array *tr = filp->private_data; ++ ++ return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); + } + + #endif +@@ -7668,10 +7672,11 @@ static const struct file_operations trac + + #ifdef CONFIG_TRACER_MAX_TRACE + static const struct file_operations tracing_max_lat_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_generic_tr, + .read = tracing_max_lat_read, + .write = tracing_max_lat_write, + .llseek = generic_file_llseek, ++ .release = tracing_release_generic_tr, + }; + #endif + diff --git a/queue-6.1/tracing-increase-trace-array-ref-count-on-enable-and-filter-files.patch b/queue-6.1/tracing-increase-trace-array-ref-count-on-enable-and-filter-files.patch new file mode 100644 index 00000000000..bd677e3d0f9 --- /dev/null +++ b/queue-6.1/tracing-increase-trace-array-ref-count-on-enable-and-filter-files.patch @@ -0,0 +1,106 @@ +From f5ca233e2e66dc1c249bf07eefa37e34a6c9346a Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Google)" +Date: Wed, 6 Sep 2023 22:47:12 -0400 +Subject: tracing: Increase trace array ref count on enable and filter files + +From: Steven Rostedt (Google) + +commit f5ca233e2e66dc1c249bf07eefa37e34a6c9346a upstream. + +When the trace event enable and filter files are opened, increment the +trace array ref counter, otherwise they can be accessed when the trace +array is being deleted. The ref counter keeps the trace array from being +deleted while those files are opened. + +Link: https://lkml.kernel.org/r/20230907024803.456187066@goodmis.org +Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()") +Tested-by: Linux Kernel Functional Testing +Tested-by: Naresh Kamboju +Reported-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/trace.c | 27 +++++++++++++++++++++++++++ + kernel/trace/trace.h | 2 ++ + kernel/trace/trace_events.c | 6 ++++-- + 3 files changed, 33 insertions(+), 2 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -4899,6 +4899,33 @@ int tracing_open_generic_tr(struct inode + return 0; + } + ++/* ++ * The private pointer of the inode is the trace_event_file. ++ * Update the tr ref count associated to it. ++ */ ++int tracing_open_file_tr(struct inode *inode, struct file *filp) ++{ ++ struct trace_event_file *file = inode->i_private; ++ int ret; ++ ++ ret = tracing_check_open_get_tr(file->tr); ++ if (ret) ++ return ret; ++ ++ filp->private_data = inode->i_private; ++ ++ return 0; ++} ++ ++int tracing_release_file_tr(struct inode *inode, struct file *filp) ++{ ++ struct trace_event_file *file = inode->i_private; ++ ++ trace_array_put(file->tr); ++ ++ return 0; ++} ++ + static int tracing_mark_open(struct inode *inode, struct file *filp) + { + stream_open(inode, filp); +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -590,6 +590,8 @@ void tracing_reset_all_online_cpus(void) + void tracing_reset_all_online_cpus_unlocked(void); + int tracing_open_generic(struct inode *inode, struct file *filp); + int tracing_open_generic_tr(struct inode *inode, struct file *filp); ++int tracing_open_file_tr(struct inode *inode, struct file *filp); ++int tracing_release_file_tr(struct inode *inode, struct file *filp); + bool tracing_is_disabled(void); + bool tracer_tracing_is_on(struct trace_array *tr); + void tracer_tracing_on(struct trace_array *tr); +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -2101,9 +2101,10 @@ static const struct file_operations ftra + }; + + static const struct file_operations ftrace_enable_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_file_tr, + .read = event_enable_read, + .write = event_enable_write, ++ .release = tracing_release_file_tr, + .llseek = default_llseek, + }; + +@@ -2120,9 +2121,10 @@ static const struct file_operations ftra + }; + + static const struct file_operations ftrace_event_filter_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_file_tr, + .read = event_filter_read, + .write = event_filter_write, ++ .release = tracing_release_file_tr, + .llseek = default_llseek, + }; +