From: Sasha Levin <sashal@kernel.org>
Date: Sat, 23 Sep 2023 12:16:13 +0000 (-0400)
Subject: Fixes for 5.15
X-Git-Tag: v6.5.6~113
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5359babfe81c4a89fb31f1ea9d6cdbacea1f4fc0;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 5.15

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch b/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch
new file mode 100644
index 00000000000..20e98f29305
--- /dev/null
+++ b/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch
@@ -0,0 +1,155 @@
+From 2a41b882175a8811688ca7c43b2cfed663f7a974 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Dec 2021 08:20:47 +0100
+Subject: ata: ahci: Drop pointless VPRINTK() calls and convert the remaining
+ ones
+
+From: Hannes Reinecke <hare@suse.de>
+
+[ Upstream commit 93c7711494f47f9c829321e2a8711671b02f6e4c ]
+
+Drop pointless VPRINTK() calls for entering and existing interrupt
+routines and convert the remaining calls to dev_dbg().
+
+Signed-off-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Stable-dep-of: 737dd811a3db ("ata: libahci: clear pending interrupt status")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/ahci.c       |  4 +---
+ drivers/ata/ahci_xgene.c |  4 ----
+ drivers/ata/libahci.c    | 18 ++++--------------
+ 3 files changed, 5 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
+index acc028414ee94..719fe2e2b36c2 100644
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -707,7 +707,7 @@ static void ahci_pci_init_controller(struct ata_host *host)
+ 
+ 		/* clear port IRQ */
+ 		tmp = readl(port_mmio + PORT_IRQ_STAT);
+-		VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp);
++		dev_dbg(&pdev->dev, "PORT_IRQ_STAT 0x%x\n", tmp);
+ 		if (tmp)
+ 			writel(tmp, port_mmio + PORT_IRQ_STAT);
+ 	}
+@@ -1499,7 +1499,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+ 	u32 irq_stat, irq_masked;
+ 	unsigned int handled = 1;
+ 
+-	VPRINTK("ENTER\n");
+ 	hpriv = host->private_data;
+ 	mmio = hpriv->mmio;
+ 	irq_stat = readl(mmio + HOST_IRQ_STAT);
+@@ -1516,7 +1515,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+ 		irq_stat = readl(mmio + HOST_IRQ_STAT);
+ 		spin_unlock(&host->lock);
+ 	} while (irq_stat);
+-	VPRINTK("EXIT\n");
+ 
+ 	return IRQ_RETVAL(handled);
+ }
+diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
+index 292099410cf68..c1f61d255bc31 100644
+--- a/drivers/ata/ahci_xgene.c
++++ b/drivers/ata/ahci_xgene.c
+@@ -588,8 +588,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
+ 	void __iomem *mmio;
+ 	u32 irq_stat, irq_masked;
+ 
+-	VPRINTK("ENTER\n");
+-
+ 	hpriv = host->private_data;
+ 	mmio = hpriv->mmio;
+ 
+@@ -612,8 +610,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
+ 
+ 	spin_unlock(&host->lock);
+ 
+-	VPRINTK("EXIT\n");
+-
+ 	return IRQ_RETVAL(rc);
+ }
+ 
+diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
+index 192115a45dd78..b591a05768744 100644
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -1216,12 +1216,12 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
+ 
+ 	/* clear SError */
+ 	tmp = readl(port_mmio + PORT_SCR_ERR);
+-	VPRINTK("PORT_SCR_ERR 0x%x\n", tmp);
++	dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp);
+ 	writel(tmp, port_mmio + PORT_SCR_ERR);
+ 
+ 	/* clear port IRQ */
+ 	tmp = readl(port_mmio + PORT_IRQ_STAT);
+-	VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp);
++	dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp);
+ 	if (tmp)
+ 		writel(tmp, port_mmio + PORT_IRQ_STAT);
+ 
+@@ -1252,10 +1252,10 @@ void ahci_init_controller(struct ata_host *host)
+ 	}
+ 
+ 	tmp = readl(mmio + HOST_CTL);
+-	VPRINTK("HOST_CTL 0x%x\n", tmp);
++	dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp);
+ 	writel(tmp | HOST_IRQ_EN, mmio + HOST_CTL);
+ 	tmp = readl(mmio + HOST_CTL);
+-	VPRINTK("HOST_CTL 0x%x\n", tmp);
++	dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp);
+ }
+ EXPORT_SYMBOL_GPL(ahci_init_controller);
+ 
+@@ -1906,8 +1906,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
+ 	void __iomem *port_mmio = ahci_port_base(ap);
+ 	u32 status;
+ 
+-	VPRINTK("ENTER\n");
+-
+ 	status = readl(port_mmio + PORT_IRQ_STAT);
+ 	writel(status, port_mmio + PORT_IRQ_STAT);
+ 
+@@ -1915,8 +1913,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
+ 	ahci_handle_port_interrupt(ap, port_mmio, status);
+ 	spin_unlock(ap->lock);
+ 
+-	VPRINTK("EXIT\n");
+-
+ 	return IRQ_HANDLED;
+ }
+ 
+@@ -1933,9 +1929,7 @@ u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
+ 		ap = host->ports[i];
+ 		if (ap) {
+ 			ahci_port_intr(ap);
+-			VPRINTK("port %u\n", i);
+ 		} else {
+-			VPRINTK("port %u (no irq)\n", i);
+ 			if (ata_ratelimit())
+ 				dev_warn(host->dev,
+ 					 "interrupt on disabled port %u\n", i);
+@@ -1956,8 +1950,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
+ 	void __iomem *mmio;
+ 	u32 irq_stat, irq_masked;
+ 
+-	VPRINTK("ENTER\n");
+-
+ 	hpriv = host->private_data;
+ 	mmio = hpriv->mmio;
+ 
+@@ -1985,8 +1977,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
+ 
+ 	spin_unlock(&host->lock);
+ 
+-	VPRINTK("EXIT\n");
+-
+ 	return IRQ_RETVAL(rc);
+ }
+ 
+-- 
+2.40.1
+
diff --git a/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch b/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch
new file mode 100644
index 00000000000..351a6840612
--- /dev/null
+++ b/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch
@@ -0,0 +1,101 @@
+From de70500d95c937f534fb005a2378ca680e99f871 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Sep 2023 16:17:10 +0800
+Subject: ata: libahci: clear pending interrupt status
+
+From: Szuying Chen <chensiying21@gmail.com>
+
+[ Upstream commit 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 ]
+
+When a CRC error occurs, the HBA asserts an interrupt to indicate an
+interface fatal error (PxIS.IFS). The ISR clears PxIE and PxIS, then
+does error recovery. If the adapter receives another SDB FIS
+with an error (PxIS.TFES) from the device before the start of the EH
+recovery process, the interrupt signaling the new SDB cannot be
+serviced as PxIE was cleared already. This in turn results in the HBA
+inability to issue any command during the error recovery process after
+setting PxCMD.ST to 1 because PxIS.TFES is still set.
+
+According to AHCI 1.3.1 specifications section 6.2.2, fatal errors
+notified by setting PxIS.HBFS, PxIS.HBDS, PxIS.IFS or PxIS.TFES will
+cause the HBA to enter the ERR:Fatal state. In this state, the HBA
+shall not issue any new commands.
+
+To avoid this situation, introduce the function
+ahci_port_clear_pending_irq() to clear pending interrupts before
+executing a COMRESET. This follows the AHCI 1.3.1 - section 6.2.2.2
+specification.
+
+Signed-off-by: Szuying Chen <Chloe_Chen@asmedia.com.tw>
+Fixes: e0bfd149973d ("[PATCH] ahci: stop engine during hard reset")
+Cc: stable@vger.kernel.org
+Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libahci.c | 35 +++++++++++++++++++++++------------
+ 1 file changed, 23 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
+index b591a05768744..e22d45fb8ebdc 100644
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -1200,6 +1200,26 @@ static ssize_t ahci_activity_show(struct ata_device *dev, char *buf)
+ 	return sprintf(buf, "%d\n", emp->blink_policy);
+ }
+ 
++static void ahci_port_clear_pending_irq(struct ata_port *ap)
++{
++	struct ahci_host_priv *hpriv = ap->host->private_data;
++	void __iomem *port_mmio = ahci_port_base(ap);
++	u32 tmp;
++
++	/* clear SError */
++	tmp = readl(port_mmio + PORT_SCR_ERR);
++	dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp);
++	writel(tmp, port_mmio + PORT_SCR_ERR);
++
++	/* clear port IRQ */
++	tmp = readl(port_mmio + PORT_IRQ_STAT);
++	dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp);
++	if (tmp)
++		writel(tmp, port_mmio + PORT_IRQ_STAT);
++
++	writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT);
++}
++
+ static void ahci_port_init(struct device *dev, struct ata_port *ap,
+ 			   int port_no, void __iomem *mmio,
+ 			   void __iomem *port_mmio)
+@@ -1214,18 +1234,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
+ 	if (rc)
+ 		dev_warn(dev, "%s (%d)\n", emsg, rc);
+ 
+-	/* clear SError */
+-	tmp = readl(port_mmio + PORT_SCR_ERR);
+-	dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp);
+-	writel(tmp, port_mmio + PORT_SCR_ERR);
+-
+-	/* clear port IRQ */
+-	tmp = readl(port_mmio + PORT_IRQ_STAT);
+-	dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp);
+-	if (tmp)
+-		writel(tmp, port_mmio + PORT_IRQ_STAT);
+-
+-	writel(1 << port_no, mmio + HOST_IRQ_STAT);
++	ahci_port_clear_pending_irq(ap);
+ 
+ 	/* mark esata ports */
+ 	tmp = readl(port_mmio + PORT_CMD);
+@@ -1555,6 +1564,8 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
+ 	tf.status = ATA_BUSY;
+ 	ata_tf_to_fis(&tf, 0, 0, d2h_fis);
+ 
++	ahci_port_clear_pending_irq(ap);
++
+ 	rc = sata_link_hardreset(link, timing, deadline, online,
+ 				 ahci_check_ready);
+ 
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch b/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch
new file mode 100644
index 00000000000..117602b8a0f
--- /dev/null
+++ b/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch
@@ -0,0 +1,64 @@
+From 00c71696b428f0e1be9d4dd14165a57e03b848d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Nov 2021 15:51:21 +0100
+Subject: ext4: change s_last_trim_minblks type to unsigned long
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+[ Upstream commit 2327fb2e23416cfb2795ccca2f77d4d65925be99 ]
+
+There is no good reason for the s_last_trim_minblks to be atomic. There is
+no data integrity needed and there is no real danger in setting and
+reading it in a racy manner. Change it to be unsigned long, the same type
+as s_clusters_per_group which is the maximum that's allowed.
+
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Suggested-by: Andreas Dilger <adilger@dilger.ca>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20211103145122.17338-1-lczerner@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/ext4.h    | 2 +-
+ fs/ext4/mballoc.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 976cb4b3ff660..e1a5ec7362ad6 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1656,7 +1656,7 @@ struct ext4_sb_info {
+ 	struct task_struct *s_mmp_tsk;
+ 
+ 	/* record the last minlen when FITRIM is called. */
+-	atomic_t s_last_trim_minblks;
++	unsigned long s_last_trim_minblks;
+ 
+ 	/* Reference to checksum algorithm driver via cryptoapi */
+ 	struct crypto_shash *s_chksum_driver;
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 6eb445bbb2be8..782a13aca4e4a 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6516,7 +6516,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+ 	ext4_lock_group(sb, group);
+ 
+ 	if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+-	    minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
++	    minblocks < EXT4_SB(sb)->s_last_trim_minblks) {
+ 		ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+ 		if (ret >= 0 && set_trimmed)
+ 			EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+@@ -6632,7 +6632,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ 	}
+ 
+ 	if (!ret)
+-		atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
++		EXT4_SB(sb)->s_last_trim_minblks = minlen;
+ 
+ out:
+ 	range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch b/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch
new file mode 100644
index 00000000000..11af3df12ba
--- /dev/null
+++ b/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch
@@ -0,0 +1,76 @@
+From ebb24b8150ab0e2720fe014fec55dd8cd2d98e64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Sep 2023 17:04:55 +0200
+Subject: ext4: do not let fstrim block system suspend
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 5229a658f6453362fbb9da6bf96872ef25a7097e ]
+
+Len Brown has reported that system suspend sometimes fail due to
+inability to freeze a task working in ext4_trim_fs() for one minute.
+Trimming a large filesystem on a disk that slowly processes discard
+requests can indeed take a long time. Since discard is just an advisory
+call, it is perfectly fine to interrupt it at any time and the return
+number of discarded blocks until that moment. Do that when we detect the
+task is being frozen.
+
+Cc: stable@kernel.org
+Reported-by: Len Brown <lenb@kernel.org>
+Suggested-by: Dave Chinner <david@fromorbit.com>
+References: https://bugzilla.kernel.org/show_bug.cgi?id=216322
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230913150504.9054-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 8503b9aa34daf..e5b81d8be2324 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -16,6 +16,7 @@
+ #include <linux/slab.h>
+ #include <linux/nospec.h>
+ #include <linux/backing-dev.h>
++#include <linux/freezer.h>
+ #include <trace/events/ext4.h>
+ 
+ /*
+@@ -6443,6 +6444,11 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
+ 					EXT4_CLUSTER_BITS(sb);
+ }
+ 
++static bool ext4_trim_interrupted(void)
++{
++	return fatal_signal_pending(current) || freezing(current);
++}
++
+ static int ext4_try_to_trim_range(struct super_block *sb,
+ 		struct ext4_buddy *e4b, ext4_grpblk_t start,
+ 		ext4_grpblk_t max, ext4_grpblk_t minblocks)
+@@ -6476,8 +6482,8 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ 		free_count += next - start;
+ 		start = next + 1;
+ 
+-		if (fatal_signal_pending(current))
+-			return -ERESTARTSYS;
++		if (ext4_trim_interrupted())
++			return count;
+ 
+ 		if (need_resched()) {
+ 			ext4_unlock_group(sb, e4b->bd_group);
+@@ -6599,6 +6605,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ 	end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+ 
+ 	for (group = first_group; group <= last_group; group++) {
++		if (ext4_trim_interrupted())
++			break;
+ 		grp = ext4_get_group_info(sb, group);
+ 		if (!grp)
+ 			continue;
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch b/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch
new file mode 100644
index 00000000000..3bd3cf22dab
--- /dev/null
+++ b/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch
@@ -0,0 +1,170 @@
+From 882f4c9c7ea777927a1e0775763193b0fcb68522 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Sep 2023 17:04:54 +0200
+Subject: ext4: move setting of trimmed bit into ext4_try_to_trim_range()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 45e4ab320c9b5fa67b1fc3b6a9b381cfcc0c8488 ]
+
+Currently we set the group's trimmed bit in ext4_trim_all_free() based
+on return value of ext4_try_to_trim_range(). However when we will want
+to abort trimming because of suspend attempt, we want to return success
+from ext4_try_to_trim_range() but not set the trimmed bit. Instead
+implementing awkward propagation of this information, just move setting
+of trimmed bit into ext4_try_to_trim_range() when the whole group is
+trimmed.
+
+Cc: stable@kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230913150504.9054-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 46 +++++++++++++++++++++++++---------------------
+ 1 file changed, 25 insertions(+), 21 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index d68ff5df6f668..8503b9aa34daf 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6433,6 +6433,16 @@ __acquires(bitlock)
+ 	return ret;
+ }
+ 
++static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
++					   ext4_group_t grp)
++{
++	if (grp < ext4_get_groups_count(sb))
++		return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
++	return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
++		ext4_group_first_block_no(sb, grp) - 1) >>
++					EXT4_CLUSTER_BITS(sb);
++}
++
+ static int ext4_try_to_trim_range(struct super_block *sb,
+ 		struct ext4_buddy *e4b, ext4_grpblk_t start,
+ 		ext4_grpblk_t max, ext4_grpblk_t minblocks)
+@@ -6440,9 +6450,12 @@ __acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
+ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ {
+ 	ext4_grpblk_t next, count, free_count;
++	bool set_trimmed = false;
+ 	void *bitmap;
+ 
+ 	bitmap = e4b->bd_bitmap;
++	if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
++		set_trimmed = true;
+ 	start = max(e4b->bd_info->bb_first_free, start);
+ 	count = 0;
+ 	free_count = 0;
+@@ -6457,16 +6470,14 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ 			int ret = ext4_trim_extent(sb, start, next - start, e4b);
+ 
+ 			if (ret && ret != -EOPNOTSUPP)
+-				break;
++				return count;
+ 			count += next - start;
+ 		}
+ 		free_count += next - start;
+ 		start = next + 1;
+ 
+-		if (fatal_signal_pending(current)) {
+-			count = -ERESTARTSYS;
+-			break;
+-		}
++		if (fatal_signal_pending(current))
++			return -ERESTARTSYS;
+ 
+ 		if (need_resched()) {
+ 			ext4_unlock_group(sb, e4b->bd_group);
+@@ -6478,6 +6489,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ 			break;
+ 	}
+ 
++	if (set_trimmed)
++		EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info);
++
+ 	return count;
+ }
+ 
+@@ -6488,7 +6502,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+  * @start:		first group block to examine
+  * @max:		last group block to examine
+  * @minblocks:		minimum extent block count
+- * @set_trimmed:	set the trimmed flag if at least one block is trimmed
+  *
+  * ext4_trim_all_free walks through group's block bitmap searching for free
+  * extents. When the free extent is found, mark it as used in group buddy
+@@ -6498,7 +6511,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ static ext4_grpblk_t
+ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+ 		   ext4_grpblk_t start, ext4_grpblk_t max,
+-		   ext4_grpblk_t minblocks, bool set_trimmed)
++		   ext4_grpblk_t minblocks)
+ {
+ 	struct ext4_buddy e4b;
+ 	int ret;
+@@ -6515,13 +6528,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+ 	ext4_lock_group(sb, group);
+ 
+ 	if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+-	    minblocks < EXT4_SB(sb)->s_last_trim_minblks) {
++	    minblocks < EXT4_SB(sb)->s_last_trim_minblks)
+ 		ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+-		if (ret >= 0 && set_trimmed)
+-			EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+-	} else {
++	else
+ 		ret = 0;
+-	}
+ 
+ 	ext4_unlock_group(sb, group);
+ 	ext4_mb_unload_buddy(&e4b);
+@@ -6554,7 +6564,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ 	ext4_fsblk_t first_data_blk =
+ 			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+ 	ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
+-	bool whole_group, eof = false;
+ 	int ret = 0;
+ 
+ 	start = range->start >> sb->s_blocksize_bits;
+@@ -6573,10 +6582,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ 		if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
+ 			goto out;
+ 	}
+-	if (end >= max_blks - 1) {
++	if (end >= max_blks - 1)
+ 		end = max_blks - 1;
+-		eof = true;
+-	}
+ 	if (end <= first_data_blk)
+ 		goto out;
+ 	if (start < first_data_blk)
+@@ -6590,7 +6597,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ 
+ 	/* end now represents the last cluster to discard in this group */
+ 	end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+-	whole_group = true;
+ 
+ 	for (group = first_group; group <= last_group; group++) {
+ 		grp = ext4_get_group_info(sb, group);
+@@ -6609,13 +6615,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ 		 * change it for the last group, note that last_cluster is
+ 		 * already computed earlier by ext4_get_group_no_and_offset()
+ 		 */
+-		if (group == last_group) {
++		if (group == last_group)
+ 			end = last_cluster;
+-			whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+-		}
+ 		if (grp->bb_free >= minlen) {
+ 			cnt = ext4_trim_all_free(sb, group, first_cluster,
+-						 end, minlen, whole_group);
++						 end, minlen);
+ 			if (cnt < 0) {
+ 				ret = cnt;
+ 				break;
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch b/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch
new file mode 100644
index 00000000000..791c3507eee
--- /dev/null
+++ b/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch
@@ -0,0 +1,49 @@
+From b45d178235c822d1b3ea03ff86370868b519392c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 22:32:00 +0800
+Subject: ext4: replace the traditional ternary conditional operator with with
+ max()/min()
+
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+
+[ Upstream commit de8bf0e5ee7482585450357c6d4eddec8efc5cb7 ]
+
+Replace the traditional ternary conditional operator with with max()/min()
+
+Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20230801143204.2284343-7-shikemeng@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 782a13aca4e4a..d68ff5df6f668 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6443,8 +6443,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ 	void *bitmap;
+ 
+ 	bitmap = e4b->bd_bitmap;
+-	start = (e4b->bd_info->bb_first_free > start) ?
+-		e4b->bd_info->bb_first_free : start;
++	start = max(e4b->bd_info->bb_first_free, start);
+ 	count = 0;
+ 	free_count = 0;
+ 
+@@ -6661,8 +6660,7 @@ ext4_mballoc_query_range(
+ 
+ 	ext4_lock_group(sb, group);
+ 
+-	start = (e4b.bd_info->bb_first_free > start) ?
+-		e4b.bd_info->bb_first_free : start;
++	start = max(e4b.bd_info->bb_first_free, start);
+ 	if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
+ 		end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+ 
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch b/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch
new file mode 100644
index 00000000000..fcf05abb316
--- /dev/null
+++ b/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch
@@ -0,0 +1,55 @@
+From 729efa407d2267c2ab0778f291cff3f71260da96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Aug 2021 14:08:53 +0200
+Subject: ext4: scope ret locally in ext4_try_to_trim_range()
+
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+
+[ Upstream commit afcc4e32f606dbfb47aa7309172c89174b86e74c ]
+
+As commit 6920b3913235 ("ext4: add new helper interface
+ext4_try_to_trim_range()") moves some code into the separate function
+ext4_try_to_trim_range(), the use of the variable ret within that
+function is more limited and can be adjusted as well.
+
+Scope the use of the variable ret locally and drop dead assignments.
+
+No functional change.
+
+Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Link: https://lore.kernel.org/r/20210820120853.23134-1-lukas.bulwahn@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 7e7153c673c0d..6eb445bbb2be8 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6441,7 +6441,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ {
+ 	ext4_grpblk_t next, count, free_count;
+ 	void *bitmap;
+-	int ret = 0;
+ 
+ 	bitmap = e4b->bd_bitmap;
+ 	start = (e4b->bd_info->bb_first_free > start) ?
+@@ -6456,10 +6455,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ 		next = mb_find_next_bit(bitmap, max + 1, start);
+ 
+ 		if ((next - start) >= minblocks) {
+-			ret = ext4_trim_extent(sb, start, next - start, e4b);
++			int ret = ext4_trim_extent(sb, start, next - start, e4b);
++
+ 			if (ret && ret != -EOPNOTSUPP)
+ 				break;
+-			ret = 0;
+ 			count += next - start;
+ 		}
+ 		free_count += next - start;
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch b/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch
new file mode 100644
index 00000000000..2d33173fb75
--- /dev/null
+++ b/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch
@@ -0,0 +1,55 @@
+From d79d0562fe7befd6610837c6ccbbd5cb39bc1e5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:34:41 -0400
+Subject: NFS: More fixes for nfs_direct_write_reschedule_io()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit b11243f720ee5f9376861099019c8542969b6318 ]
+
+Ensure that all requests are put back onto the commit list so that they
+can be rescheduled.
+
+Fixes: 4daaeba93822 ("NFS: Fix nfs_direct_write_reschedule_io()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/direct.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index 5d86ffa72ceab..bbe2a5cc49f68 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -786,16 +786,21 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error)
+ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
+ {
+ 	struct nfs_direct_req *dreq = hdr->dreq;
++	struct nfs_page *req;
++	struct nfs_commit_info cinfo;
+ 
++	nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ 	spin_lock(&dreq->lock);
+-	if (dreq->error == 0) {
++	if (dreq->error == 0)
+ 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+-		/* fake unstable write to let common nfs resend pages */
+-		hdr->verf.committed = NFS_UNSTABLE;
+-		hdr->good_bytes = hdr->args.offset + hdr->args.count -
+-			hdr->io_start;
+-	}
++	set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ 	spin_unlock(&dreq->lock);
++	while (!list_empty(&hdr->pages)) {
++		req = nfs_list_entry(hdr->pages.next);
++		nfs_list_remove_request(req);
++		nfs_unlock_request(req);
++		nfs_mark_request_commit(req, NULL, &cinfo, 0);
++	}
+ }
+ 
+ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch b/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch
new file mode 100644
index 00000000000..edb2d117205
--- /dev/null
+++ b/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch
@@ -0,0 +1,36 @@
+From 01ea2e4b380297a95e5c66df6655c2343745230e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:43:58 -0400
+Subject: NFS/pNFS: Report EINVAL errors from connect() to the server
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit dd7d7ee3ba2a70d12d02defb478790cf57d5b87b ]
+
+With IPv6, connect() can occasionally return EINVAL if a route is
+unavailable. If this happens during I/O to a data server, we want to
+report it using LAYOUTERROR as an inability to connect.
+
+Fixes: dd52128afdde ("NFSv4.1/pnfs Ensure flexfiles reports all connection related errors")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/flexfilelayout/flexfilelayout.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
+index ceef75b4d2494..4269df0f0ffa5 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -1238,6 +1238,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
+ 		case -EPFNOSUPPORT:
+ 		case -EPROTONOSUPPORT:
+ 		case -EOPNOTSUPP:
++		case -EINVAL:
+ 		case -ECONNREFUSED:
+ 		case -ECONNRESET:
+ 		case -EHOSTDOWN:
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch b/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch
new file mode 100644
index 00000000000..64d47d6e6d0
--- /dev/null
+++ b/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch
@@ -0,0 +1,150 @@
+From caa4d1a0e12550437e774ce7960816f63a5f1a27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:34:40 -0400
+Subject: NFS: Use the correct commit info in nfs_join_page_group()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit b193a78ddb5ee7dba074d3f28dc050069ba083c0 ]
+
+Ensure that nfs_clear_request_commit() updates the correct counters when
+it removes them from the commit list.
+
+Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/direct.c          |  8 +++++---
+ fs/nfs/write.c           | 23 ++++++++++++-----------
+ include/linux/nfs_page.h |  4 +++-
+ 3 files changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index 018af6ec97b40..5d86ffa72ceab 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -525,7 +525,9 @@ static void nfs_direct_add_page_head(struct list_head *list,
+ 	kref_get(&head->wb_kref);
+ }
+ 
+-static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
++static void nfs_direct_join_group(struct list_head *list,
++				  struct nfs_commit_info *cinfo,
++				  struct inode *inode)
+ {
+ 	struct nfs_page *req, *subreq;
+ 
+@@ -547,7 +549,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
+ 				nfs_release_request(subreq);
+ 			}
+ 		} while ((subreq = subreq->wb_this_page) != req);
+-		nfs_join_page_group(req, inode);
++		nfs_join_page_group(req, cinfo, inode);
+ 	}
+ }
+ 
+@@ -573,7 +575,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+ 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
+ 
+-	nfs_direct_join_group(&reqs, dreq->inode);
++	nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
+ 
+ 	dreq->count = 0;
+ 	dreq->max_count = 0;
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index be70874bc3292..4231d51fc1add 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -58,7 +58,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
+ static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+ static const struct nfs_rw_ops nfs_rw_write_ops;
+ static void nfs_inode_remove_request(struct nfs_page *req);
+-static void nfs_clear_request_commit(struct nfs_page *req);
++static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
++				     struct nfs_page *req);
+ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+ 				      struct inode *inode);
+ static struct nfs_page *
+@@ -500,8 +501,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+  * the (former) group.  All subrequests are removed from any write or commit
+  * lists, unlinked from the group and destroyed.
+  */
+-void
+-nfs_join_page_group(struct nfs_page *head, struct inode *inode)
++void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
++			 struct inode *inode)
+ {
+ 	struct nfs_page *subreq;
+ 	struct nfs_page *destroy_list = NULL;
+@@ -531,7 +532,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
+ 	 * Commit list removal accounting is done after locks are dropped */
+ 	subreq = head;
+ 	do {
+-		nfs_clear_request_commit(subreq);
++		nfs_clear_request_commit(cinfo, subreq);
+ 		subreq = subreq->wb_this_page;
+ 	} while (subreq != head);
+ 
+@@ -565,8 +566,10 @@ nfs_lock_and_join_requests(struct page *page)
+ {
+ 	struct inode *inode = page_file_mapping(page)->host;
+ 	struct nfs_page *head;
++	struct nfs_commit_info cinfo;
+ 	int ret;
+ 
++	nfs_init_cinfo_from_inode(&cinfo, inode);
+ 	/*
+ 	 * A reference is taken only on the head request which acts as a
+ 	 * reference to the whole page group - the group will not be destroyed
+@@ -583,7 +586,7 @@ nfs_lock_and_join_requests(struct page *page)
+ 		return ERR_PTR(ret);
+ 	}
+ 
+-	nfs_join_page_group(head, inode);
++	nfs_join_page_group(head, &cinfo, inode);
+ 
+ 	return head;
+ }
+@@ -945,18 +948,16 @@ nfs_clear_page_commit(struct page *page)
+ }
+ 
+ /* Called holding the request lock on @req */
+-static void
+-nfs_clear_request_commit(struct nfs_page *req)
++static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
++				     struct nfs_page *req)
+ {
+ 	if (test_bit(PG_CLEAN, &req->wb_flags)) {
+ 		struct nfs_open_context *ctx = nfs_req_openctx(req);
+ 		struct inode *inode = d_inode(ctx->dentry);
+-		struct nfs_commit_info cinfo;
+ 
+-		nfs_init_cinfo_from_inode(&cinfo, inode);
+ 		mutex_lock(&NFS_I(inode)->commit_mutex);
+-		if (!pnfs_clear_request_commit(req, &cinfo)) {
+-			nfs_request_remove_commit_list(req, &cinfo);
++		if (!pnfs_clear_request_commit(req, cinfo)) {
++			nfs_request_remove_commit_list(req, cinfo);
+ 		}
+ 		mutex_unlock(&NFS_I(inode)->commit_mutex);
+ 		nfs_clear_page_commit(req->wb_page);
+diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
+index f0373a6cb5fb6..40aa09a21f75d 100644
+--- a/include/linux/nfs_page.h
++++ b/include/linux/nfs_page.h
+@@ -145,7 +145,9 @@ extern	void nfs_unlock_request(struct nfs_page *req);
+ extern	void nfs_unlock_and_release_request(struct nfs_page *);
+ extern	struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
+ extern	int nfs_page_group_lock_subrequests(struct nfs_page *head);
+-extern	void nfs_join_page_group(struct nfs_page *head, struct inode *inode);
++extern void nfs_join_page_group(struct nfs_page *head,
++				struct nfs_commit_info *cinfo,
++				struct inode *inode);
+ extern int nfs_page_group_lock(struct nfs_page *);
+ extern void nfs_page_group_unlock(struct nfs_page *);
+ extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch b/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch
new file mode 100644
index 00000000000..4d43e9d5542
--- /dev/null
+++ b/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch
@@ -0,0 +1,134 @@
+From a81a7077c38eab34c1f87754b6805791bd8cac1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 15:29:34 -0400
+Subject: NFSv4.1: fix pnfs MDS=DS session trunking
+
+From: Olga Kornievskaia <kolga@netapp.com>
+
+[ Upstream commit 806a3bc421a115fbb287c1efce63a48c54ee804b ]
+
+Currently, when GETDEVICEINFO returns multiple locations where each
+is a different IP but the server's identity is same as MDS, then
+nfs4_set_ds_client() finds the existing nfs_client structure which
+has the MDS's max_connect value (and if it's 1), then the 1st IP
+on the DS's list will get dropped due to MDS trunking rules. Other
+IPs would be added as they fall under the pnfs trunking rules.
+
+For the list of IPs the 1st goes thru calling nfs4_set_ds_client()
+which will eventually call nfs4_add_trunk() and call into
+rpc_clnt_test_and_add_xprt() which has the check for MDS trunking.
+The other IPs (after the 1st one), would call rpc_clnt_add_xprt()
+which doesn't go thru that check.
+
+nfs4_add_trunk() is called when MDS trunking is happening and it
+needs to enforce the usage of max_connect mount option of the
+1st mount. However, this shouldn't be applied to pnfs flow.
+
+Instead, this patch proposed to treat MDS=DS as DS trunking and
+make sure that MDS's max_connect limit does not apply to the
+1st IP returned in the GETDEVICEINFO list. It does so by
+marking the newly created client with a new flag NFS_CS_PNFS
+which then used to pass max_connect value to use into the
+rpc_clnt_test_and_add_xprt() instead of the existing rpc
+client's max_connect value set by the MDS connection.
+
+For example, mount was done without max_connect value set
+so MDS's rpc client has cl_max_connect=1. Upon calling into
+rpc_clnt_test_and_add_xprt() and using rpc client's value,
+the caller passes in max_connect value which is previously
+been set in the pnfs path (as a part of handling
+GETDEVICEINFO list of IPs) in nfs4_set_ds_client().
+
+However, when NFS_CS_PNFS flag is not set and we know we
+are doing MDS trunking, comparing a new IP of the same
+server, we then set the max_connect value to the
+existing MDS's value and pass that into
+rpc_clnt_test_and_add_xprt().
+
+Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts")
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4client.c       |  6 +++++-
+ include/linux/nfs_fs_sb.h |  1 +
+ net/sunrpc/clnt.c         | 11 +++++++----
+ 3 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
+index 81cda46d5829d..cba8b4c1fb4a3 100644
+--- a/fs/nfs/nfs4client.c
++++ b/fs/nfs/nfs4client.c
+@@ -416,6 +416,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
+ 		.net = old->cl_net,
+ 		.servername = old->cl_hostname,
+ 	};
++	int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ?
++		clp->cl_max_connect : old->cl_max_connect;
+ 
+ 	if (clp->cl_proto != old->cl_proto)
+ 		return;
+@@ -429,7 +431,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
+ 	xprt_args.addrlen = clp_salen;
+ 
+ 	rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
+-			  rpc_clnt_test_and_add_xprt, NULL);
++			  rpc_clnt_test_and_add_xprt, &max_connect);
+ }
+ 
+ /**
+@@ -996,6 +998,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
+ 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ 
+ 	__set_bit(NFS_CS_DS, &cl_init.init_flags);
++	__set_bit(NFS_CS_PNFS, &cl_init.init_flags);
++	cl_init.max_connect = NFS_MAX_TRANSPORTS;
+ 	/*
+ 	 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
+ 	 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
+diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
+index da9ef0ab9b4b6..5e065f16d061d 100644
+--- a/include/linux/nfs_fs_sb.h
++++ b/include/linux/nfs_fs_sb.h
+@@ -48,6 +48,7 @@ struct nfs_client {
+ #define NFS_CS_NOPING		6		/* - don't ping on connect */
+ #define NFS_CS_DS		7		/* - Server is a DS */
+ #define NFS_CS_REUSEPORT	8		/* - reuse src port on reconnect */
++#define NFS_CS_PNFS		9		/* - Server used for pnfs */
+ 	struct sockaddr_storage	cl_addr;	/* server identifier */
+ 	size_t			cl_addrlen;
+ 	char *			cl_hostname;	/* hostname of server */
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index 130d8166b9ce8..f8750683bded4 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2822,19 +2822,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+  * @clnt: pointer to struct rpc_clnt
+  * @xps: pointer to struct rpc_xprt_switch,
+  * @xprt: pointer struct rpc_xprt
+- * @dummy: unused
++ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport
+  */
+ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
+ 		struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
+-		void *dummy)
++		void *in_max_connect)
+ {
+ 	struct rpc_cb_add_xprt_calldata *data;
+ 	struct rpc_task *task;
++	int max_connect = clnt->cl_max_connect;
+ 
+-	if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
++	if (in_max_connect)
++		max_connect = *(int *)in_max_connect;
++	if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
+ 		rcu_read_lock();
+ 		pr_warn("SUNRPC: reached max allowed number (%d) did not add "
+-			"transport to server: %s\n", clnt->cl_max_connect,
++			"transport to server: %s\n", max_connect,
+ 			rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+ 		rcu_read_unlock();
+ 		return -EINVAL;
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch b/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch
new file mode 100644
index 00000000000..f87d15b3258
--- /dev/null
+++ b/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch
@@ -0,0 +1,68 @@
+From 024249d6037797c2eab917e37bfbe9586933c1fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 13:02:38 -0400
+Subject: NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server
+
+From: Olga Kornievskaia <kolga@netapp.com>
+
+[ Upstream commit 51d674a5e4889f1c8e223ac131cf218e1631e423 ]
+
+After receiving the location(s) of the DS server(s) in the
+GETDEVINCEINFO, create the request for the clientid to such
+server and indicate that the client is connecting to a DS.
+
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Stable-dep-of: 806a3bc421a1 ("NFSv4.1: fix pnfs MDS=DS session trunking")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4client.c | 3 +++
+ fs/nfs/nfs4proc.c   | 4 ++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
+index 1bf7a72ebda6e..81cda46d5829d 100644
+--- a/fs/nfs/nfs4client.c
++++ b/fs/nfs/nfs4client.c
+@@ -231,6 +231,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
+ 	__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+ 	__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
+ 
++	if (test_bit(NFS_CS_DS, &cl_init->init_flags))
++		__set_bit(NFS_CS_DS, &clp->cl_flags);
+ 	/*
+ 	 * Set up the connection to the server before we add add to the
+ 	 * global list.
+@@ -993,6 +995,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
+ 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
+ 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ 
++	__set_bit(NFS_CS_DS, &cl_init.init_flags);
+ 	/*
+ 	 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
+ 	 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index a21e25cbd4515..32204c0b3d098 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -8715,6 +8715,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
+ #ifdef CONFIG_NFS_V4_1_MIGRATION
+ 	calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
+ #endif
++	if (test_bit(NFS_CS_DS, &clp->cl_flags))
++		calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
+ 	msg.rpc_argp = &calldata->args;
+ 	msg.rpc_resp = &calldata->res;
+ 	task_setup_data.callback_data = calldata;
+@@ -8792,6 +8794,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
+ 	/* Save the EXCHANGE_ID verifier session trunk tests */
+ 	memcpy(clp->cl_confirm.data, argp->verifier.data,
+ 	       sizeof(clp->cl_confirm.data));
++	if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS)
++		set_bit(NFS_CS_DS, &clp->cl_flags);
+ out:
+ 	trace_nfs4_exchange_id(clp, status);
+ 	rpc_put_task(task);
+-- 
+2.40.1
+
diff --git a/queue-5.15/series b/queue-5.15/series
new file mode 100644
index 00000000000..86755ffe71b
--- /dev/null
+++ b/queue-5.15/series
@@ -0,0 +1,16 @@
+nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch
+nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch
+nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch
+sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch
+nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch
+nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch
+tracing-make-trace_marker-_raw-stream-like.patch
+tracing-increase-trace-array-ref-count-on-enable-and.patch
+ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch
+ata-libahci-clear-pending-interrupt-status.patch
+ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch
+ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch
+ext4-replace-the-traditional-ternary-conditional-ope.patch
+ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch
+ext4-do-not-let-fstrim-block-system-suspend.patch
+tracing-have-event-inject-files-inc-the-trace-array-.patch
diff --git a/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch b/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch
new file mode 100644
index 00000000000..2dc41a32fdf
--- /dev/null
+++ b/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch
@@ -0,0 +1,35 @@
+From 1b253e40266ec9f6b078a4d109e04d5579e4875e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:50:09 -0400
+Subject: SUNRPC: Mark the cred for revalidation if the server rejects it
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 611fa42dfa9d2f3918ac5f4dd5705dfad81b323d ]
+
+If the server rejects the credential as being stale, or bad, then we
+should mark it for revalidation before retransmitting.
+
+Fixes: 7f5667a5f8c4 ("SUNRPC: Clean up rpc_verify_header()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/clnt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index b9c54c03c30a6..130d8166b9ce8 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2668,6 +2668,7 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
+ 	case rpc_autherr_rejectedverf:
+ 	case rpcsec_gsserr_credproblem:
+ 	case rpcsec_gsserr_ctxproblem:
++		rpcauth_invalcred(task);
+ 		if (!task->tk_cred_retry)
+ 			break;
+ 		task->tk_cred_retry--;
+-- 
+2.40.1
+
diff --git a/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch b/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch
new file mode 100644
index 00000000000..82e7a8f62da
--- /dev/null
+++ b/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch
@@ -0,0 +1,49 @@
+From 929153db87153eccdddcaa3fa8ca51a644dda458 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 22:47:16 -0400
+Subject: tracing: Have event inject files inc the trace array ref count
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit e5c624f027ac74f97e97c8f36c69228ac9f1102d ]
+
+The event inject files add events for a specific trace array. For an
+instance, if the file is opened and the instance is deleted, reading or
+writing to the file will cause a use after free.
+
+Up the ref count of the trace_array when a event inject file is opened.
+
+Link: https://lkml.kernel.org/r/20230907024804.292337868@goodmis.org
+Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Zheng Yejian <zhengyejian1@huawei.com>
+Fixes: 6c3edaf9fd6a ("tracing: Introduce trace event injection")
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events_inject.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
+index c188045c5f976..b1fce64e126c0 100644
+--- a/kernel/trace/trace_events_inject.c
++++ b/kernel/trace/trace_events_inject.c
+@@ -321,7 +321,8 @@ event_inject_read(struct file *file, char __user *buf, size_t size,
+ }
+ 
+ const struct file_operations event_inject_fops = {
+-	.open = tracing_open_generic,
++	.open = tracing_open_file_tr,
+ 	.read = event_inject_read,
+ 	.write = event_inject_write,
++	.release = tracing_release_file_tr,
+ };
+-- 
+2.40.1
+
diff --git a/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch b/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch
new file mode 100644
index 00000000000..fca2c7826bd
--- /dev/null
+++ b/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch
@@ -0,0 +1,115 @@
+From 247d21f337455384310fd8ef7089a969b4d90181 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 22:47:12 -0400
+Subject: tracing: Increase trace array ref count on enable and filter files
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit f5ca233e2e66dc1c249bf07eefa37e34a6c9346a ]
+
+When the trace event enable and filter files are opened, increment the
+trace array ref counter, otherwise they can be accessed when the trace
+array is being deleted. The ref counter keeps the trace array from being
+deleted while those files are opened.
+
+Link: https://lkml.kernel.org/r/20230907024803.456187066@goodmis.org
+Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()")
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reported-by: Zheng Yejian <zhengyejian1@huawei.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c        | 27 +++++++++++++++++++++++++++
+ kernel/trace/trace.h        |  2 ++
+ kernel/trace/trace_events.c |  6 ++++--
+ 3 files changed, 33 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 5aa23a4382c5e..7453840c77be2 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -4887,6 +4887,33 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+ 	return 0;
+ }
+ 
++/*
++ * The private pointer of the inode is the trace_event_file.
++ * Update the tr ref count associated to it.
++ */
++int tracing_open_file_tr(struct inode *inode, struct file *filp)
++{
++	struct trace_event_file *file = inode->i_private;
++	int ret;
++
++	ret = tracing_check_open_get_tr(file->tr);
++	if (ret)
++		return ret;
++
++	filp->private_data = inode->i_private;
++
++	return 0;
++}
++
++int tracing_release_file_tr(struct inode *inode, struct file *filp)
++{
++	struct trace_event_file *file = inode->i_private;
++
++	trace_array_put(file->tr);
++
++	return 0;
++}
++
+ static int tracing_mark_open(struct inode *inode, struct file *filp)
+ {
+ 	stream_open(inode, filp);
+diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
+index 90ab921884b10..a4a90bd3373be 100644
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -591,6 +591,8 @@ void tracing_reset_all_online_cpus(void);
+ void tracing_reset_all_online_cpus_unlocked(void);
+ int tracing_open_generic(struct inode *inode, struct file *filp);
+ int tracing_open_generic_tr(struct inode *inode, struct file *filp);
++int tracing_open_file_tr(struct inode *inode, struct file *filp);
++int tracing_release_file_tr(struct inode *inode, struct file *filp);
+ bool tracing_is_disabled(void);
+ bool tracer_tracing_is_on(struct trace_array *tr);
+ void tracer_tracing_on(struct trace_array *tr);
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 2a2a599997671..c626d02776a54 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -2078,9 +2078,10 @@ static const struct file_operations ftrace_set_event_notrace_pid_fops = {
+ };
+ 
+ static const struct file_operations ftrace_enable_fops = {
+-	.open = tracing_open_generic,
++	.open = tracing_open_file_tr,
+ 	.read = event_enable_read,
+ 	.write = event_enable_write,
++	.release = tracing_release_file_tr,
+ 	.llseek = default_llseek,
+ };
+ 
+@@ -2097,9 +2098,10 @@ static const struct file_operations ftrace_event_id_fops = {
+ };
+ 
+ static const struct file_operations ftrace_event_filter_fops = {
+-	.open = tracing_open_generic,
++	.open = tracing_open_file_tr,
+ 	.read = event_filter_read,
+ 	.write = event_filter_write,
++	.release = tracing_release_file_tr,
+ 	.llseek = default_llseek,
+ };
+ 
+-- 
+2.40.1
+
diff --git a/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch b/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch
new file mode 100644
index 00000000000..fb56bf1b977
--- /dev/null
+++ b/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch
@@ -0,0 +1,97 @@
+From 742e4e870af5dfe2b163eb5e6b7ba745d4e4f139 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Dec 2021 14:25:58 +0000
+Subject: tracing: Make trace_marker{,_raw} stream-like
+
+From: John Keeping <john@metanate.com>
+
+[ Upstream commit 2972e3050e3517a85ca1813b227d4c302e804343 ]
+
+The tracing marker files are write-only streams with no meaningful
+concept of file position.  Using stream_open() to mark them as
+stream-link indicates this and has the added advantage that a single
+file descriptor can now be used from multiple threads without contention
+thanks to clearing FMODE_ATOMIC_POS.
+
+Note that this has the potential to break existing userspace by since
+both lseek(2) and pwrite(2) will now return ESPIPE when previously lseek
+would have updated the stored offset and pwrite would have appended to
+the trace.  A survey of libtracefs and several other projects found to
+use trace_marker(_raw) [1][2][3] suggests that everyone limits
+themselves to calling write(2) and close(2) on these file descriptors so
+there is a good chance this will go unnoticed and the benefits of
+reduced overhead and lock contention seem worth the risk.
+
+[1] https://github.com/google/perfetto
+[2] https://github.com/intel/media-driver/
+[3] https://w1.fi/cgit/hostap/
+
+Link: https://lkml.kernel.org/r/20211207142558.347029-1-john@metanate.com
+
+Signed-off-by: John Keeping <john@metanate.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Stable-dep-of: f5ca233e2e66 ("tracing: Increase trace array ref count on enable and filter files")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c | 18 ++++++++----------
+ 1 file changed, 8 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 6adacfc880d6c..5aa23a4382c5e 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -4887,6 +4887,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+ 	return 0;
+ }
+ 
++static int tracing_mark_open(struct inode *inode, struct file *filp)
++{
++	stream_open(inode, filp);
++	return tracing_open_generic_tr(inode, filp);
++}
++
+ static int tracing_release(struct inode *inode, struct file *file)
+ {
+ 	struct trace_array *tr = inode->i_private;
+@@ -7225,9 +7231,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
+ 	if (tt)
+ 		event_triggers_post_call(tr->trace_marker_file, tt);
+ 
+-	if (written > 0)
+-		*fpos += written;
+-
+ 	return written;
+ }
+ 
+@@ -7286,9 +7289,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+ 
+ 	__buffer_unlock_commit(buffer, event);
+ 
+-	if (written > 0)
+-		*fpos += written;
+-
+ 	return written;
+ }
+ 
+@@ -7699,16 +7699,14 @@ static const struct file_operations tracing_free_buffer_fops = {
+ };
+ 
+ static const struct file_operations tracing_mark_fops = {
+-	.open		= tracing_open_generic_tr,
++	.open		= tracing_mark_open,
+ 	.write		= tracing_mark_write,
+-	.llseek		= generic_file_llseek,
+ 	.release	= tracing_release_generic_tr,
+ };
+ 
+ static const struct file_operations tracing_mark_raw_fops = {
+-	.open		= tracing_open_generic_tr,
++	.open		= tracing_mark_open,
+ 	.write		= tracing_mark_raw_write,
+-	.llseek		= generic_file_llseek,
+ 	.release	= tracing_release_generic_tr,
+ };
+ 
+-- 
+2.40.1
+