]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Sat, 23 Sep 2023 12:16:13 +0000 (08:16 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 23 Sep 2023 12:16:13 +0000 (08:16 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
17 files changed:
queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch [new file with mode: 0644]
queue-5.15/ata-libahci-clear-pending-interrupt-status.patch [new file with mode: 0644]
queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch [new file with mode: 0644]
queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch [new file with mode: 0644]
queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch [new file with mode: 0644]
queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch [new file with mode: 0644]
queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch [new file with mode: 0644]
queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch [new file with mode: 0644]
queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch [new file with mode: 0644]
queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch [new file with mode: 0644]
queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch [new file with mode: 0644]
queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch [new file with mode: 0644]
queue-5.15/series [new file with mode: 0644]
queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch [new file with mode: 0644]
queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch [new file with mode: 0644]
queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch [new file with mode: 0644]
queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch [new file with mode: 0644]

diff --git a/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch b/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch
new file mode 100644 (file)
index 0000000..20e98f2
--- /dev/null
@@ -0,0 +1,155 @@
+From 2a41b882175a8811688ca7c43b2cfed663f7a974 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Dec 2021 08:20:47 +0100
+Subject: ata: ahci: Drop pointless VPRINTK() calls and convert the remaining
+ ones
+
+From: Hannes Reinecke <hare@suse.de>
+
+[ Upstream commit 93c7711494f47f9c829321e2a8711671b02f6e4c ]
+
+Drop pointless VPRINTK() calls for entering and existing interrupt
+routines and convert the remaining calls to dev_dbg().
+
+Signed-off-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Stable-dep-of: 737dd811a3db ("ata: libahci: clear pending interrupt status")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/ahci.c       |  4 +---
+ drivers/ata/ahci_xgene.c |  4 ----
+ drivers/ata/libahci.c    | 18 ++++--------------
+ 3 files changed, 5 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
+index acc028414ee94..719fe2e2b36c2 100644
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -707,7 +707,7 @@ static void ahci_pci_init_controller(struct ata_host *host)
+               /* clear port IRQ */
+               tmp = readl(port_mmio + PORT_IRQ_STAT);
+-              VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp);
++              dev_dbg(&pdev->dev, "PORT_IRQ_STAT 0x%x\n", tmp);
+               if (tmp)
+                       writel(tmp, port_mmio + PORT_IRQ_STAT);
+       }
+@@ -1499,7 +1499,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+       u32 irq_stat, irq_masked;
+       unsigned int handled = 1;
+-      VPRINTK("ENTER\n");
+       hpriv = host->private_data;
+       mmio = hpriv->mmio;
+       irq_stat = readl(mmio + HOST_IRQ_STAT);
+@@ -1516,7 +1515,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+               irq_stat = readl(mmio + HOST_IRQ_STAT);
+               spin_unlock(&host->lock);
+       } while (irq_stat);
+-      VPRINTK("EXIT\n");
+       return IRQ_RETVAL(handled);
+ }
+diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
+index 292099410cf68..c1f61d255bc31 100644
+--- a/drivers/ata/ahci_xgene.c
++++ b/drivers/ata/ahci_xgene.c
+@@ -588,8 +588,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
+       void __iomem *mmio;
+       u32 irq_stat, irq_masked;
+-      VPRINTK("ENTER\n");
+-
+       hpriv = host->private_data;
+       mmio = hpriv->mmio;
+@@ -612,8 +610,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
+       spin_unlock(&host->lock);
+-      VPRINTK("EXIT\n");
+-
+       return IRQ_RETVAL(rc);
+ }
+diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
+index 192115a45dd78..b591a05768744 100644
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -1216,12 +1216,12 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
+       /* clear SError */
+       tmp = readl(port_mmio + PORT_SCR_ERR);
+-      VPRINTK("PORT_SCR_ERR 0x%x\n", tmp);
++      dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp);
+       writel(tmp, port_mmio + PORT_SCR_ERR);
+       /* clear port IRQ */
+       tmp = readl(port_mmio + PORT_IRQ_STAT);
+-      VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp);
++      dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp);
+       if (tmp)
+               writel(tmp, port_mmio + PORT_IRQ_STAT);
+@@ -1252,10 +1252,10 @@ void ahci_init_controller(struct ata_host *host)
+       }
+       tmp = readl(mmio + HOST_CTL);
+-      VPRINTK("HOST_CTL 0x%x\n", tmp);
++      dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp);
+       writel(tmp | HOST_IRQ_EN, mmio + HOST_CTL);
+       tmp = readl(mmio + HOST_CTL);
+-      VPRINTK("HOST_CTL 0x%x\n", tmp);
++      dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp);
+ }
+ EXPORT_SYMBOL_GPL(ahci_init_controller);
+@@ -1906,8 +1906,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
+       void __iomem *port_mmio = ahci_port_base(ap);
+       u32 status;
+-      VPRINTK("ENTER\n");
+-
+       status = readl(port_mmio + PORT_IRQ_STAT);
+       writel(status, port_mmio + PORT_IRQ_STAT);
+@@ -1915,8 +1913,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
+       ahci_handle_port_interrupt(ap, port_mmio, status);
+       spin_unlock(ap->lock);
+-      VPRINTK("EXIT\n");
+-
+       return IRQ_HANDLED;
+ }
+@@ -1933,9 +1929,7 @@ u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
+               ap = host->ports[i];
+               if (ap) {
+                       ahci_port_intr(ap);
+-                      VPRINTK("port %u\n", i);
+               } else {
+-                      VPRINTK("port %u (no irq)\n", i);
+                       if (ata_ratelimit())
+                               dev_warn(host->dev,
+                                        "interrupt on disabled port %u\n", i);
+@@ -1956,8 +1950,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
+       void __iomem *mmio;
+       u32 irq_stat, irq_masked;
+-      VPRINTK("ENTER\n");
+-
+       hpriv = host->private_data;
+       mmio = hpriv->mmio;
+@@ -1985,8 +1977,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
+       spin_unlock(&host->lock);
+-      VPRINTK("EXIT\n");
+-
+       return IRQ_RETVAL(rc);
+ }
+-- 
+2.40.1
+
diff --git a/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch b/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch
new file mode 100644 (file)
index 0000000..351a684
--- /dev/null
@@ -0,0 +1,101 @@
+From de70500d95c937f534fb005a2378ca680e99f871 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Sep 2023 16:17:10 +0800
+Subject: ata: libahci: clear pending interrupt status
+
+From: Szuying Chen <chensiying21@gmail.com>
+
+[ Upstream commit 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 ]
+
+When a CRC error occurs, the HBA asserts an interrupt to indicate an
+interface fatal error (PxIS.IFS). The ISR clears PxIE and PxIS, then
+does error recovery. If the adapter receives another SDB FIS
+with an error (PxIS.TFES) from the device before the start of the EH
+recovery process, the interrupt signaling the new SDB cannot be
+serviced as PxIE was cleared already. This in turn results in the HBA
+inability to issue any command during the error recovery process after
+setting PxCMD.ST to 1 because PxIS.TFES is still set.
+
+According to AHCI 1.3.1 specifications section 6.2.2, fatal errors
+notified by setting PxIS.HBFS, PxIS.HBDS, PxIS.IFS or PxIS.TFES will
+cause the HBA to enter the ERR:Fatal state. In this state, the HBA
+shall not issue any new commands.
+
+To avoid this situation, introduce the function
+ahci_port_clear_pending_irq() to clear pending interrupts before
+executing a COMRESET. This follows the AHCI 1.3.1 - section 6.2.2.2
+specification.
+
+Signed-off-by: Szuying Chen <Chloe_Chen@asmedia.com.tw>
+Fixes: e0bfd149973d ("[PATCH] ahci: stop engine during hard reset")
+Cc: stable@vger.kernel.org
+Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/libahci.c | 35 +++++++++++++++++++++++------------
+ 1 file changed, 23 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
+index b591a05768744..e22d45fb8ebdc 100644
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -1200,6 +1200,26 @@ static ssize_t ahci_activity_show(struct ata_device *dev, char *buf)
+       return sprintf(buf, "%d\n", emp->blink_policy);
+ }
++static void ahci_port_clear_pending_irq(struct ata_port *ap)
++{
++      struct ahci_host_priv *hpriv = ap->host->private_data;
++      void __iomem *port_mmio = ahci_port_base(ap);
++      u32 tmp;
++
++      /* clear SError */
++      tmp = readl(port_mmio + PORT_SCR_ERR);
++      dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp);
++      writel(tmp, port_mmio + PORT_SCR_ERR);
++
++      /* clear port IRQ */
++      tmp = readl(port_mmio + PORT_IRQ_STAT);
++      dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp);
++      if (tmp)
++              writel(tmp, port_mmio + PORT_IRQ_STAT);
++
++      writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT);
++}
++
+ static void ahci_port_init(struct device *dev, struct ata_port *ap,
+                          int port_no, void __iomem *mmio,
+                          void __iomem *port_mmio)
+@@ -1214,18 +1234,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
+       if (rc)
+               dev_warn(dev, "%s (%d)\n", emsg, rc);
+-      /* clear SError */
+-      tmp = readl(port_mmio + PORT_SCR_ERR);
+-      dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp);
+-      writel(tmp, port_mmio + PORT_SCR_ERR);
+-
+-      /* clear port IRQ */
+-      tmp = readl(port_mmio + PORT_IRQ_STAT);
+-      dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp);
+-      if (tmp)
+-              writel(tmp, port_mmio + PORT_IRQ_STAT);
+-
+-      writel(1 << port_no, mmio + HOST_IRQ_STAT);
++      ahci_port_clear_pending_irq(ap);
+       /* mark esata ports */
+       tmp = readl(port_mmio + PORT_CMD);
+@@ -1555,6 +1564,8 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
+       tf.status = ATA_BUSY;
+       ata_tf_to_fis(&tf, 0, 0, d2h_fis);
++      ahci_port_clear_pending_irq(ap);
++
+       rc = sata_link_hardreset(link, timing, deadline, online,
+                                ahci_check_ready);
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch b/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch
new file mode 100644 (file)
index 0000000..117602b
--- /dev/null
@@ -0,0 +1,64 @@
+From 00c71696b428f0e1be9d4dd14165a57e03b848d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Nov 2021 15:51:21 +0100
+Subject: ext4: change s_last_trim_minblks type to unsigned long
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+[ Upstream commit 2327fb2e23416cfb2795ccca2f77d4d65925be99 ]
+
+There is no good reason for the s_last_trim_minblks to be atomic. There is
+no data integrity needed and there is no real danger in setting and
+reading it in a racy manner. Change it to be unsigned long, the same type
+as s_clusters_per_group which is the maximum that's allowed.
+
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Suggested-by: Andreas Dilger <adilger@dilger.ca>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20211103145122.17338-1-lczerner@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/ext4.h    | 2 +-
+ fs/ext4/mballoc.c | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 976cb4b3ff660..e1a5ec7362ad6 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1656,7 +1656,7 @@ struct ext4_sb_info {
+       struct task_struct *s_mmp_tsk;
+       /* record the last minlen when FITRIM is called. */
+-      atomic_t s_last_trim_minblks;
++      unsigned long s_last_trim_minblks;
+       /* Reference to checksum algorithm driver via cryptoapi */
+       struct crypto_shash *s_chksum_driver;
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 6eb445bbb2be8..782a13aca4e4a 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6516,7 +6516,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+       ext4_lock_group(sb, group);
+       if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+-          minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) {
++          minblocks < EXT4_SB(sb)->s_last_trim_minblks) {
+               ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+               if (ret >= 0 && set_trimmed)
+                       EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+@@ -6632,7 +6632,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+       }
+       if (!ret)
+-              atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
++              EXT4_SB(sb)->s_last_trim_minblks = minlen;
+ out:
+       range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch b/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch
new file mode 100644 (file)
index 0000000..11af3df
--- /dev/null
@@ -0,0 +1,76 @@
+From ebb24b8150ab0e2720fe014fec55dd8cd2d98e64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Sep 2023 17:04:55 +0200
+Subject: ext4: do not let fstrim block system suspend
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 5229a658f6453362fbb9da6bf96872ef25a7097e ]
+
+Len Brown has reported that system suspend sometimes fail due to
+inability to freeze a task working in ext4_trim_fs() for one minute.
+Trimming a large filesystem on a disk that slowly processes discard
+requests can indeed take a long time. Since discard is just an advisory
+call, it is perfectly fine to interrupt it at any time and the return
+number of discarded blocks until that moment. Do that when we detect the
+task is being frozen.
+
+Cc: stable@kernel.org
+Reported-by: Len Brown <lenb@kernel.org>
+Suggested-by: Dave Chinner <david@fromorbit.com>
+References: https://bugzilla.kernel.org/show_bug.cgi?id=216322
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230913150504.9054-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 8503b9aa34daf..e5b81d8be2324 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -16,6 +16,7 @@
+ #include <linux/slab.h>
+ #include <linux/nospec.h>
+ #include <linux/backing-dev.h>
++#include <linux/freezer.h>
+ #include <trace/events/ext4.h>
+ /*
+@@ -6443,6 +6444,11 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
+                                       EXT4_CLUSTER_BITS(sb);
+ }
++static bool ext4_trim_interrupted(void)
++{
++      return fatal_signal_pending(current) || freezing(current);
++}
++
+ static int ext4_try_to_trim_range(struct super_block *sb,
+               struct ext4_buddy *e4b, ext4_grpblk_t start,
+               ext4_grpblk_t max, ext4_grpblk_t minblocks)
+@@ -6476,8 +6482,8 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+               free_count += next - start;
+               start = next + 1;
+-              if (fatal_signal_pending(current))
+-                      return -ERESTARTSYS;
++              if (ext4_trim_interrupted())
++                      return count;
+               if (need_resched()) {
+                       ext4_unlock_group(sb, e4b->bd_group);
+@@ -6599,6 +6605,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+       end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+       for (group = first_group; group <= last_group; group++) {
++              if (ext4_trim_interrupted())
++                      break;
+               grp = ext4_get_group_info(sb, group);
+               if (!grp)
+                       continue;
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch b/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch
new file mode 100644 (file)
index 0000000..3bd3cf2
--- /dev/null
@@ -0,0 +1,170 @@
+From 882f4c9c7ea777927a1e0775763193b0fcb68522 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Sep 2023 17:04:54 +0200
+Subject: ext4: move setting of trimmed bit into ext4_try_to_trim_range()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 45e4ab320c9b5fa67b1fc3b6a9b381cfcc0c8488 ]
+
+Currently we set the group's trimmed bit in ext4_trim_all_free() based
+on return value of ext4_try_to_trim_range(). However when we will want
+to abort trimming because of suspend attempt, we want to return success
+from ext4_try_to_trim_range() but not set the trimmed bit. Instead
+implementing awkward propagation of this information, just move setting
+of trimmed bit into ext4_try_to_trim_range() when the whole group is
+trimmed.
+
+Cc: stable@kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230913150504.9054-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 46 +++++++++++++++++++++++++---------------------
+ 1 file changed, 25 insertions(+), 21 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index d68ff5df6f668..8503b9aa34daf 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6433,6 +6433,16 @@ __acquires(bitlock)
+       return ret;
+ }
++static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
++                                         ext4_group_t grp)
++{
++      if (grp < ext4_get_groups_count(sb))
++              return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
++      return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
++              ext4_group_first_block_no(sb, grp) - 1) >>
++                                      EXT4_CLUSTER_BITS(sb);
++}
++
+ static int ext4_try_to_trim_range(struct super_block *sb,
+               struct ext4_buddy *e4b, ext4_grpblk_t start,
+               ext4_grpblk_t max, ext4_grpblk_t minblocks)
+@@ -6440,9 +6450,12 @@ __acquires(ext4_group_lock_ptr(sb, e4b->bd_group))
+ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ {
+       ext4_grpblk_t next, count, free_count;
++      bool set_trimmed = false;
+       void *bitmap;
+       bitmap = e4b->bd_bitmap;
++      if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
++              set_trimmed = true;
+       start = max(e4b->bd_info->bb_first_free, start);
+       count = 0;
+       free_count = 0;
+@@ -6457,16 +6470,14 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+                       int ret = ext4_trim_extent(sb, start, next - start, e4b);
+                       if (ret && ret != -EOPNOTSUPP)
+-                              break;
++                              return count;
+                       count += next - start;
+               }
+               free_count += next - start;
+               start = next + 1;
+-              if (fatal_signal_pending(current)) {
+-                      count = -ERESTARTSYS;
+-                      break;
+-              }
++              if (fatal_signal_pending(current))
++                      return -ERESTARTSYS;
+               if (need_resched()) {
+                       ext4_unlock_group(sb, e4b->bd_group);
+@@ -6478,6 +6489,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+                       break;
+       }
++      if (set_trimmed)
++              EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info);
++
+       return count;
+ }
+@@ -6488,7 +6502,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+  * @start:            first group block to examine
+  * @max:              last group block to examine
+  * @minblocks:                minimum extent block count
+- * @set_trimmed:      set the trimmed flag if at least one block is trimmed
+  *
+  * ext4_trim_all_free walks through group's block bitmap searching for free
+  * extents. When the free extent is found, mark it as used in group buddy
+@@ -6498,7 +6511,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ static ext4_grpblk_t
+ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+                  ext4_grpblk_t start, ext4_grpblk_t max,
+-                 ext4_grpblk_t minblocks, bool set_trimmed)
++                 ext4_grpblk_t minblocks)
+ {
+       struct ext4_buddy e4b;
+       int ret;
+@@ -6515,13 +6528,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+       ext4_lock_group(sb, group);
+       if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+-          minblocks < EXT4_SB(sb)->s_last_trim_minblks) {
++          minblocks < EXT4_SB(sb)->s_last_trim_minblks)
+               ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+-              if (ret >= 0 && set_trimmed)
+-                      EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+-      } else {
++      else
+               ret = 0;
+-      }
+       ext4_unlock_group(sb, group);
+       ext4_mb_unload_buddy(&e4b);
+@@ -6554,7 +6564,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+       ext4_fsblk_t first_data_blk =
+                       le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+       ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
+-      bool whole_group, eof = false;
+       int ret = 0;
+       start = range->start >> sb->s_blocksize_bits;
+@@ -6573,10 +6582,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+               if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
+                       goto out;
+       }
+-      if (end >= max_blks - 1) {
++      if (end >= max_blks - 1)
+               end = max_blks - 1;
+-              eof = true;
+-      }
+       if (end <= first_data_blk)
+               goto out;
+       if (start < first_data_blk)
+@@ -6590,7 +6597,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+       /* end now represents the last cluster to discard in this group */
+       end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+-      whole_group = true;
+       for (group = first_group; group <= last_group; group++) {
+               grp = ext4_get_group_info(sb, group);
+@@ -6609,13 +6615,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+                * change it for the last group, note that last_cluster is
+                * already computed earlier by ext4_get_group_no_and_offset()
+                */
+-              if (group == last_group) {
++              if (group == last_group)
+                       end = last_cluster;
+-                      whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+-              }
+               if (grp->bb_free >= minlen) {
+                       cnt = ext4_trim_all_free(sb, group, first_cluster,
+-                                               end, minlen, whole_group);
++                                               end, minlen);
+                       if (cnt < 0) {
+                               ret = cnt;
+                               break;
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch b/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch
new file mode 100644 (file)
index 0000000..791c350
--- /dev/null
@@ -0,0 +1,49 @@
+From b45d178235c822d1b3ea03ff86370868b519392c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 22:32:00 +0800
+Subject: ext4: replace the traditional ternary conditional operator with with
+ max()/min()
+
+From: Kemeng Shi <shikemeng@huaweicloud.com>
+
+[ Upstream commit de8bf0e5ee7482585450357c6d4eddec8efc5cb7 ]
+
+Replace the traditional ternary conditional operator with with max()/min()
+
+Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20230801143204.2284343-7-shikemeng@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 782a13aca4e4a..d68ff5df6f668 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6443,8 +6443,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+       void *bitmap;
+       bitmap = e4b->bd_bitmap;
+-      start = (e4b->bd_info->bb_first_free > start) ?
+-              e4b->bd_info->bb_first_free : start;
++      start = max(e4b->bd_info->bb_first_free, start);
+       count = 0;
+       free_count = 0;
+@@ -6661,8 +6660,7 @@ ext4_mballoc_query_range(
+       ext4_lock_group(sb, group);
+-      start = (e4b.bd_info->bb_first_free > start) ?
+-              e4b.bd_info->bb_first_free : start;
++      start = max(e4b.bd_info->bb_first_free, start);
+       if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
+               end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+-- 
+2.40.1
+
diff --git a/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch b/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch
new file mode 100644 (file)
index 0000000..fcf05ab
--- /dev/null
@@ -0,0 +1,55 @@
+From 729efa407d2267c2ab0778f291cff3f71260da96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Aug 2021 14:08:53 +0200
+Subject: ext4: scope ret locally in ext4_try_to_trim_range()
+
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+
+[ Upstream commit afcc4e32f606dbfb47aa7309172c89174b86e74c ]
+
+As commit 6920b3913235 ("ext4: add new helper interface
+ext4_try_to_trim_range()") moves some code into the separate function
+ext4_try_to_trim_range(), the use of the variable ret within that
+function is more limited and can be adjusted as well.
+
+Scope the use of the variable ret locally and drop dead assignments.
+
+No functional change.
+
+Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Link: https://lore.kernel.org/r/20210820120853.23134-1-lukas.bulwahn@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/mballoc.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 7e7153c673c0d..6eb445bbb2be8 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -6441,7 +6441,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+ {
+       ext4_grpblk_t next, count, free_count;
+       void *bitmap;
+-      int ret = 0;
+       bitmap = e4b->bd_bitmap;
+       start = (e4b->bd_info->bb_first_free > start) ?
+@@ -6456,10 +6455,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
+               next = mb_find_next_bit(bitmap, max + 1, start);
+               if ((next - start) >= minblocks) {
+-                      ret = ext4_trim_extent(sb, start, next - start, e4b);
++                      int ret = ext4_trim_extent(sb, start, next - start, e4b);
++
+                       if (ret && ret != -EOPNOTSUPP)
+                               break;
+-                      ret = 0;
+                       count += next - start;
+               }
+               free_count += next - start;
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch b/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch
new file mode 100644 (file)
index 0000000..2d33173
--- /dev/null
@@ -0,0 +1,55 @@
+From d79d0562fe7befd6610837c6ccbbd5cb39bc1e5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:34:41 -0400
+Subject: NFS: More fixes for nfs_direct_write_reschedule_io()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit b11243f720ee5f9376861099019c8542969b6318 ]
+
+Ensure that all requests are put back onto the commit list so that they
+can be rescheduled.
+
+Fixes: 4daaeba93822 ("NFS: Fix nfs_direct_write_reschedule_io()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/direct.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index 5d86ffa72ceab..bbe2a5cc49f68 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -786,16 +786,21 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error)
+ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
+ {
+       struct nfs_direct_req *dreq = hdr->dreq;
++      struct nfs_page *req;
++      struct nfs_commit_info cinfo;
++      nfs_init_cinfo_from_dreq(&cinfo, dreq);
+       spin_lock(&dreq->lock);
+-      if (dreq->error == 0) {
++      if (dreq->error == 0)
+               dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+-              /* fake unstable write to let common nfs resend pages */
+-              hdr->verf.committed = NFS_UNSTABLE;
+-              hdr->good_bytes = hdr->args.offset + hdr->args.count -
+-                      hdr->io_start;
+-      }
++      set_bit(NFS_IOHDR_REDO, &hdr->flags);
+       spin_unlock(&dreq->lock);
++      while (!list_empty(&hdr->pages)) {
++              req = nfs_list_entry(hdr->pages.next);
++              nfs_list_remove_request(req);
++              nfs_unlock_request(req);
++              nfs_mark_request_commit(req, NULL, &cinfo, 0);
++      }
+ }
+ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch b/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch
new file mode 100644 (file)
index 0000000..edb2d11
--- /dev/null
@@ -0,0 +1,36 @@
+From 01ea2e4b380297a95e5c66df6655c2343745230e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:43:58 -0400
+Subject: NFS/pNFS: Report EINVAL errors from connect() to the server
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit dd7d7ee3ba2a70d12d02defb478790cf57d5b87b ]
+
+With IPv6, connect() can occasionally return EINVAL if a route is
+unavailable. If this happens during I/O to a data server, we want to
+report it using LAYOUTERROR as an inability to connect.
+
+Fixes: dd52128afdde ("NFSv4.1/pnfs Ensure flexfiles reports all connection related errors")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/flexfilelayout/flexfilelayout.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
+index ceef75b4d2494..4269df0f0ffa5 100644
+--- a/fs/nfs/flexfilelayout/flexfilelayout.c
++++ b/fs/nfs/flexfilelayout/flexfilelayout.c
+@@ -1238,6 +1238,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
+               case -EPFNOSUPPORT:
+               case -EPROTONOSUPPORT:
+               case -EOPNOTSUPP:
++              case -EINVAL:
+               case -ECONNREFUSED:
+               case -ECONNRESET:
+               case -EHOSTDOWN:
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch b/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch
new file mode 100644 (file)
index 0000000..64d47d6
--- /dev/null
@@ -0,0 +1,150 @@
+From caa4d1a0e12550437e774ce7960816f63a5f1a27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:34:40 -0400
+Subject: NFS: Use the correct commit info in nfs_join_page_group()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit b193a78ddb5ee7dba074d3f28dc050069ba083c0 ]
+
+Ensure that nfs_clear_request_commit() updates the correct counters when
+it removes them from the commit list.
+
+Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/direct.c          |  8 +++++---
+ fs/nfs/write.c           | 23 ++++++++++++-----------
+ include/linux/nfs_page.h |  4 +++-
+ 3 files changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
+index 018af6ec97b40..5d86ffa72ceab 100644
+--- a/fs/nfs/direct.c
++++ b/fs/nfs/direct.c
+@@ -525,7 +525,9 @@ static void nfs_direct_add_page_head(struct list_head *list,
+       kref_get(&head->wb_kref);
+ }
+-static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
++static void nfs_direct_join_group(struct list_head *list,
++                                struct nfs_commit_info *cinfo,
++                                struct inode *inode)
+ {
+       struct nfs_page *req, *subreq;
+@@ -547,7 +549,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
+                               nfs_release_request(subreq);
+                       }
+               } while ((subreq = subreq->wb_this_page) != req);
+-              nfs_join_page_group(req, inode);
++              nfs_join_page_group(req, cinfo, inode);
+       }
+ }
+@@ -573,7 +575,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+       nfs_init_cinfo_from_dreq(&cinfo, dreq);
+       nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
+-      nfs_direct_join_group(&reqs, dreq->inode);
++      nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
+       dreq->count = 0;
+       dreq->max_count = 0;
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index be70874bc3292..4231d51fc1add 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -58,7 +58,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
+ static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+ static const struct nfs_rw_ops nfs_rw_write_ops;
+ static void nfs_inode_remove_request(struct nfs_page *req);
+-static void nfs_clear_request_commit(struct nfs_page *req);
++static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
++                                   struct nfs_page *req);
+ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+                                     struct inode *inode);
+ static struct nfs_page *
+@@ -500,8 +501,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+  * the (former) group.  All subrequests are removed from any write or commit
+  * lists, unlinked from the group and destroyed.
+  */
+-void
+-nfs_join_page_group(struct nfs_page *head, struct inode *inode)
++void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
++                       struct inode *inode)
+ {
+       struct nfs_page *subreq;
+       struct nfs_page *destroy_list = NULL;
+@@ -531,7 +532,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
+        * Commit list removal accounting is done after locks are dropped */
+       subreq = head;
+       do {
+-              nfs_clear_request_commit(subreq);
++              nfs_clear_request_commit(cinfo, subreq);
+               subreq = subreq->wb_this_page;
+       } while (subreq != head);
+@@ -565,8 +566,10 @@ nfs_lock_and_join_requests(struct page *page)
+ {
+       struct inode *inode = page_file_mapping(page)->host;
+       struct nfs_page *head;
++      struct nfs_commit_info cinfo;
+       int ret;
++      nfs_init_cinfo_from_inode(&cinfo, inode);
+       /*
+        * A reference is taken only on the head request which acts as a
+        * reference to the whole page group - the group will not be destroyed
+@@ -583,7 +586,7 @@ nfs_lock_and_join_requests(struct page *page)
+               return ERR_PTR(ret);
+       }
+-      nfs_join_page_group(head, inode);
++      nfs_join_page_group(head, &cinfo, inode);
+       return head;
+ }
+@@ -945,18 +948,16 @@ nfs_clear_page_commit(struct page *page)
+ }
+ /* Called holding the request lock on @req */
+-static void
+-nfs_clear_request_commit(struct nfs_page *req)
++static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
++                                   struct nfs_page *req)
+ {
+       if (test_bit(PG_CLEAN, &req->wb_flags)) {
+               struct nfs_open_context *ctx = nfs_req_openctx(req);
+               struct inode *inode = d_inode(ctx->dentry);
+-              struct nfs_commit_info cinfo;
+-              nfs_init_cinfo_from_inode(&cinfo, inode);
+               mutex_lock(&NFS_I(inode)->commit_mutex);
+-              if (!pnfs_clear_request_commit(req, &cinfo)) {
+-                      nfs_request_remove_commit_list(req, &cinfo);
++              if (!pnfs_clear_request_commit(req, cinfo)) {
++                      nfs_request_remove_commit_list(req, cinfo);
+               }
+               mutex_unlock(&NFS_I(inode)->commit_mutex);
+               nfs_clear_page_commit(req->wb_page);
+diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
+index f0373a6cb5fb6..40aa09a21f75d 100644
+--- a/include/linux/nfs_page.h
++++ b/include/linux/nfs_page.h
+@@ -145,7 +145,9 @@ extern     void nfs_unlock_request(struct nfs_page *req);
+ extern        void nfs_unlock_and_release_request(struct nfs_page *);
+ extern        struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
+ extern        int nfs_page_group_lock_subrequests(struct nfs_page *head);
+-extern        void nfs_join_page_group(struct nfs_page *head, struct inode *inode);
++extern void nfs_join_page_group(struct nfs_page *head,
++                              struct nfs_commit_info *cinfo,
++                              struct inode *inode);
+ extern int nfs_page_group_lock(struct nfs_page *);
+ extern void nfs_page_group_unlock(struct nfs_page *);
+ extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch b/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch
new file mode 100644 (file)
index 0000000..4d43e9d
--- /dev/null
@@ -0,0 +1,134 @@
+From a81a7077c38eab34c1f87754b6805791bd8cac1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Aug 2023 15:29:34 -0400
+Subject: NFSv4.1: fix pnfs MDS=DS session trunking
+
+From: Olga Kornievskaia <kolga@netapp.com>
+
+[ Upstream commit 806a3bc421a115fbb287c1efce63a48c54ee804b ]
+
+Currently, when GETDEVICEINFO returns multiple locations where each
+is a different IP but the server's identity is same as MDS, then
+nfs4_set_ds_client() finds the existing nfs_client structure which
+has the MDS's max_connect value (and if it's 1), then the 1st IP
+on the DS's list will get dropped due to MDS trunking rules. Other
+IPs would be added as they fall under the pnfs trunking rules.
+
+For the list of IPs the 1st goes thru calling nfs4_set_ds_client()
+which will eventually call nfs4_add_trunk() and call into
+rpc_clnt_test_and_add_xprt() which has the check for MDS trunking.
+The other IPs (after the 1st one), would call rpc_clnt_add_xprt()
+which doesn't go thru that check.
+
+nfs4_add_trunk() is called when MDS trunking is happening and it
+needs to enforce the usage of max_connect mount option of the
+1st mount. However, this shouldn't be applied to pnfs flow.
+
+Instead, this patch proposed to treat MDS=DS as DS trunking and
+make sure that MDS's max_connect limit does not apply to the
+1st IP returned in the GETDEVICEINFO list. It does so by
+marking the newly created client with a new flag NFS_CS_PNFS
+which then used to pass max_connect value to use into the
+rpc_clnt_test_and_add_xprt() instead of the existing rpc
+client's max_connect value set by the MDS connection.
+
+For example, mount was done without max_connect value set
+so MDS's rpc client has cl_max_connect=1. Upon calling into
+rpc_clnt_test_and_add_xprt() and using rpc client's value,
+the caller passes in max_connect value which is previously
+been set in the pnfs path (as a part of handling
+GETDEVICEINFO list of IPs) in nfs4_set_ds_client().
+
+However, when NFS_CS_PNFS flag is not set and we know we
+are doing MDS trunking, comparing a new IP of the same
+server, we then set the max_connect value to the
+existing MDS's value and pass that into
+rpc_clnt_test_and_add_xprt().
+
+Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts")
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4client.c       |  6 +++++-
+ include/linux/nfs_fs_sb.h |  1 +
+ net/sunrpc/clnt.c         | 11 +++++++----
+ 3 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
+index 81cda46d5829d..cba8b4c1fb4a3 100644
+--- a/fs/nfs/nfs4client.c
++++ b/fs/nfs/nfs4client.c
+@@ -416,6 +416,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
+               .net = old->cl_net,
+               .servername = old->cl_hostname,
+       };
++      int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ?
++              clp->cl_max_connect : old->cl_max_connect;
+       if (clp->cl_proto != old->cl_proto)
+               return;
+@@ -429,7 +431,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old)
+       xprt_args.addrlen = clp_salen;
+       rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args,
+-                        rpc_clnt_test_and_add_xprt, NULL);
++                        rpc_clnt_test_and_add_xprt, &max_connect);
+ }
+ /**
+@@ -996,6 +998,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
+               __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+       __set_bit(NFS_CS_DS, &cl_init.init_flags);
++      __set_bit(NFS_CS_PNFS, &cl_init.init_flags);
++      cl_init.max_connect = NFS_MAX_TRANSPORTS;
+       /*
+        * Set an authflavor equual to the MDS value. Use the MDS nfs_client
+        * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
+diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
+index da9ef0ab9b4b6..5e065f16d061d 100644
+--- a/include/linux/nfs_fs_sb.h
++++ b/include/linux/nfs_fs_sb.h
+@@ -48,6 +48,7 @@ struct nfs_client {
+ #define NFS_CS_NOPING         6               /* - don't ping on connect */
+ #define NFS_CS_DS             7               /* - Server is a DS */
+ #define NFS_CS_REUSEPORT      8               /* - reuse src port on reconnect */
++#define NFS_CS_PNFS           9               /* - Server used for pnfs */
+       struct sockaddr_storage cl_addr;        /* server identifier */
+       size_t                  cl_addrlen;
+       char *                  cl_hostname;    /* hostname of server */
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index 130d8166b9ce8..f8750683bded4 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2822,19 +2822,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+  * @clnt: pointer to struct rpc_clnt
+  * @xps: pointer to struct rpc_xprt_switch,
+  * @xprt: pointer struct rpc_xprt
+- * @dummy: unused
++ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport
+  */
+ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
+               struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
+-              void *dummy)
++              void *in_max_connect)
+ {
+       struct rpc_cb_add_xprt_calldata *data;
+       struct rpc_task *task;
++      int max_connect = clnt->cl_max_connect;
+-      if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
++      if (in_max_connect)
++              max_connect = *(int *)in_max_connect;
++      if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
+               rcu_read_lock();
+               pr_warn("SUNRPC: reached max allowed number (%d) did not add "
+-                      "transport to server: %s\n", clnt->cl_max_connect,
++                      "transport to server: %s\n", max_connect,
+                       rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+               rcu_read_unlock();
+               return -EINVAL;
+-- 
+2.40.1
+
diff --git a/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch b/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch
new file mode 100644 (file)
index 0000000..f87d15b
--- /dev/null
@@ -0,0 +1,68 @@
+From 024249d6037797c2eab917e37bfbe9586933c1fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 13:02:38 -0400
+Subject: NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server
+
+From: Olga Kornievskaia <kolga@netapp.com>
+
+[ Upstream commit 51d674a5e4889f1c8e223ac131cf218e1631e423 ]
+
+After receiving the location(s) of the DS server(s) in the
+GETDEVINCEINFO, create the request for the clientid to such
+server and indicate that the client is connecting to a DS.
+
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Stable-dep-of: 806a3bc421a1 ("NFSv4.1: fix pnfs MDS=DS session trunking")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4client.c | 3 +++
+ fs/nfs/nfs4proc.c   | 4 ++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
+index 1bf7a72ebda6e..81cda46d5829d 100644
+--- a/fs/nfs/nfs4client.c
++++ b/fs/nfs/nfs4client.c
+@@ -231,6 +231,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
+       __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+       __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
++      if (test_bit(NFS_CS_DS, &cl_init->init_flags))
++              __set_bit(NFS_CS_DS, &clp->cl_flags);
+       /*
+        * Set up the connection to the server before we add add to the
+        * global list.
+@@ -993,6 +995,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
+       if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
+               __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
++      __set_bit(NFS_CS_DS, &cl_init.init_flags);
+       /*
+        * Set an authflavor equual to the MDS value. Use the MDS nfs_client
+        * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
+diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+index a21e25cbd4515..32204c0b3d098 100644
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -8715,6 +8715,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
+ #ifdef CONFIG_NFS_V4_1_MIGRATION
+       calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
+ #endif
++      if (test_bit(NFS_CS_DS, &clp->cl_flags))
++              calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
+       msg.rpc_argp = &calldata->args;
+       msg.rpc_resp = &calldata->res;
+       task_setup_data.callback_data = calldata;
+@@ -8792,6 +8794,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
+       /* Save the EXCHANGE_ID verifier session trunk tests */
+       memcpy(clp->cl_confirm.data, argp->verifier.data,
+              sizeof(clp->cl_confirm.data));
++      if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS)
++              set_bit(NFS_CS_DS, &clp->cl_flags);
+ out:
+       trace_nfs4_exchange_id(clp, status);
+       rpc_put_task(task);
+-- 
+2.40.1
+
diff --git a/queue-5.15/series b/queue-5.15/series
new file mode 100644 (file)
index 0000000..86755ff
--- /dev/null
@@ -0,0 +1,16 @@
+nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch
+nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch
+nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch
+sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch
+nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch
+nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch
+tracing-make-trace_marker-_raw-stream-like.patch
+tracing-increase-trace-array-ref-count-on-enable-and.patch
+ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch
+ata-libahci-clear-pending-interrupt-status.patch
+ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch
+ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch
+ext4-replace-the-traditional-ternary-conditional-ope.patch
+ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch
+ext4-do-not-let-fstrim-block-system-suspend.patch
+tracing-have-event-inject-files-inc-the-trace-array-.patch
diff --git a/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch b/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch
new file mode 100644 (file)
index 0000000..2dc41a3
--- /dev/null
@@ -0,0 +1,35 @@
+From 1b253e40266ec9f6b078a4d109e04d5579e4875e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Sep 2023 12:50:09 -0400
+Subject: SUNRPC: Mark the cred for revalidation if the server rejects it
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 611fa42dfa9d2f3918ac5f4dd5705dfad81b323d ]
+
+If the server rejects the credential as being stale, or bad, then we
+should mark it for revalidation before retransmitting.
+
+Fixes: 7f5667a5f8c4 ("SUNRPC: Clean up rpc_verify_header()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/clnt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index b9c54c03c30a6..130d8166b9ce8 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2668,6 +2668,7 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
+       case rpc_autherr_rejectedverf:
+       case rpcsec_gsserr_credproblem:
+       case rpcsec_gsserr_ctxproblem:
++              rpcauth_invalcred(task);
+               if (!task->tk_cred_retry)
+                       break;
+               task->tk_cred_retry--;
+-- 
+2.40.1
+
diff --git a/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch b/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch
new file mode 100644 (file)
index 0000000..82e7a8f
--- /dev/null
@@ -0,0 +1,49 @@
+From 929153db87153eccdddcaa3fa8ca51a644dda458 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 22:47:16 -0400
+Subject: tracing: Have event inject files inc the trace array ref count
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit e5c624f027ac74f97e97c8f36c69228ac9f1102d ]
+
+The event inject files add events for a specific trace array. For an
+instance, if the file is opened and the instance is deleted, reading or
+writing to the file will cause a use after free.
+
+Up the ref count of the trace_array when a event inject file is opened.
+
+Link: https://lkml.kernel.org/r/20230907024804.292337868@goodmis.org
+Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Zheng Yejian <zhengyejian1@huawei.com>
+Fixes: 6c3edaf9fd6a ("tracing: Introduce trace event injection")
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events_inject.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
+index c188045c5f976..b1fce64e126c0 100644
+--- a/kernel/trace/trace_events_inject.c
++++ b/kernel/trace/trace_events_inject.c
+@@ -321,7 +321,8 @@ event_inject_read(struct file *file, char __user *buf, size_t size,
+ }
+ const struct file_operations event_inject_fops = {
+-      .open = tracing_open_generic,
++      .open = tracing_open_file_tr,
+       .read = event_inject_read,
+       .write = event_inject_write,
++      .release = tracing_release_file_tr,
+ };
+-- 
+2.40.1
+
diff --git a/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch b/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch
new file mode 100644 (file)
index 0000000..fca2c78
--- /dev/null
@@ -0,0 +1,115 @@
+From 247d21f337455384310fd8ef7089a969b4d90181 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Sep 2023 22:47:12 -0400
+Subject: tracing: Increase trace array ref count on enable and filter files
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit f5ca233e2e66dc1c249bf07eefa37e34a6c9346a ]
+
+When the trace event enable and filter files are opened, increment the
+trace array ref counter, otherwise they can be accessed when the trace
+array is being deleted. The ref counter keeps the trace array from being
+deleted while those files are opened.
+
+Link: https://lkml.kernel.org/r/20230907024803.456187066@goodmis.org
+Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()")
+Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
+Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
+Reported-by: Zheng Yejian <zhengyejian1@huawei.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c        | 27 +++++++++++++++++++++++++++
+ kernel/trace/trace.h        |  2 ++
+ kernel/trace/trace_events.c |  6 ++++--
+ 3 files changed, 33 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 5aa23a4382c5e..7453840c77be2 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -4887,6 +4887,33 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+       return 0;
+ }
++/*
++ * The private pointer of the inode is the trace_event_file.
++ * Update the tr ref count associated to it.
++ */
++int tracing_open_file_tr(struct inode *inode, struct file *filp)
++{
++      struct trace_event_file *file = inode->i_private;
++      int ret;
++
++      ret = tracing_check_open_get_tr(file->tr);
++      if (ret)
++              return ret;
++
++      filp->private_data = inode->i_private;
++
++      return 0;
++}
++
++int tracing_release_file_tr(struct inode *inode, struct file *filp)
++{
++      struct trace_event_file *file = inode->i_private;
++
++      trace_array_put(file->tr);
++
++      return 0;
++}
++
+ static int tracing_mark_open(struct inode *inode, struct file *filp)
+ {
+       stream_open(inode, filp);
+diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
+index 90ab921884b10..a4a90bd3373be 100644
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -591,6 +591,8 @@ void tracing_reset_all_online_cpus(void);
+ void tracing_reset_all_online_cpus_unlocked(void);
+ int tracing_open_generic(struct inode *inode, struct file *filp);
+ int tracing_open_generic_tr(struct inode *inode, struct file *filp);
++int tracing_open_file_tr(struct inode *inode, struct file *filp);
++int tracing_release_file_tr(struct inode *inode, struct file *filp);
+ bool tracing_is_disabled(void);
+ bool tracer_tracing_is_on(struct trace_array *tr);
+ void tracer_tracing_on(struct trace_array *tr);
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 2a2a599997671..c626d02776a54 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -2078,9 +2078,10 @@ static const struct file_operations ftrace_set_event_notrace_pid_fops = {
+ };
+ static const struct file_operations ftrace_enable_fops = {
+-      .open = tracing_open_generic,
++      .open = tracing_open_file_tr,
+       .read = event_enable_read,
+       .write = event_enable_write,
++      .release = tracing_release_file_tr,
+       .llseek = default_llseek,
+ };
+@@ -2097,9 +2098,10 @@ static const struct file_operations ftrace_event_id_fops = {
+ };
+ static const struct file_operations ftrace_event_filter_fops = {
+-      .open = tracing_open_generic,
++      .open = tracing_open_file_tr,
+       .read = event_filter_read,
+       .write = event_filter_write,
++      .release = tracing_release_file_tr,
+       .llseek = default_llseek,
+ };
+-- 
+2.40.1
+
diff --git a/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch b/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch
new file mode 100644 (file)
index 0000000..fb56bf1
--- /dev/null
@@ -0,0 +1,97 @@
+From 742e4e870af5dfe2b163eb5e6b7ba745d4e4f139 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Dec 2021 14:25:58 +0000
+Subject: tracing: Make trace_marker{,_raw} stream-like
+
+From: John Keeping <john@metanate.com>
+
+[ Upstream commit 2972e3050e3517a85ca1813b227d4c302e804343 ]
+
+The tracing marker files are write-only streams with no meaningful
+concept of file position.  Using stream_open() to mark them as
+stream-link indicates this and has the added advantage that a single
+file descriptor can now be used from multiple threads without contention
+thanks to clearing FMODE_ATOMIC_POS.
+
+Note that this has the potential to break existing userspace by since
+both lseek(2) and pwrite(2) will now return ESPIPE when previously lseek
+would have updated the stored offset and pwrite would have appended to
+the trace.  A survey of libtracefs and several other projects found to
+use trace_marker(_raw) [1][2][3] suggests that everyone limits
+themselves to calling write(2) and close(2) on these file descriptors so
+there is a good chance this will go unnoticed and the benefits of
+reduced overhead and lock contention seem worth the risk.
+
+[1] https://github.com/google/perfetto
+[2] https://github.com/intel/media-driver/
+[3] https://w1.fi/cgit/hostap/
+
+Link: https://lkml.kernel.org/r/20211207142558.347029-1-john@metanate.com
+
+Signed-off-by: John Keeping <john@metanate.com>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Stable-dep-of: f5ca233e2e66 ("tracing: Increase trace array ref count on enable and filter files")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c | 18 ++++++++----------
+ 1 file changed, 8 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 6adacfc880d6c..5aa23a4382c5e 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -4887,6 +4887,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+       return 0;
+ }
++static int tracing_mark_open(struct inode *inode, struct file *filp)
++{
++      stream_open(inode, filp);
++      return tracing_open_generic_tr(inode, filp);
++}
++
+ static int tracing_release(struct inode *inode, struct file *file)
+ {
+       struct trace_array *tr = inode->i_private;
+@@ -7225,9 +7231,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
+       if (tt)
+               event_triggers_post_call(tr->trace_marker_file, tt);
+-      if (written > 0)
+-              *fpos += written;
+-
+       return written;
+ }
+@@ -7286,9 +7289,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+       __buffer_unlock_commit(buffer, event);
+-      if (written > 0)
+-              *fpos += written;
+-
+       return written;
+ }
+@@ -7699,16 +7699,14 @@ static const struct file_operations tracing_free_buffer_fops = {
+ };
+ static const struct file_operations tracing_mark_fops = {
+-      .open           = tracing_open_generic_tr,
++      .open           = tracing_mark_open,
+       .write          = tracing_mark_write,
+-      .llseek         = generic_file_llseek,
+       .release        = tracing_release_generic_tr,
+ };
+ static const struct file_operations tracing_mark_raw_fops = {
+-      .open           = tracing_open_generic_tr,
++      .open           = tracing_mark_open,
+       .write          = tracing_mark_raw_write,
+-      .llseek         = generic_file_llseek,
+       .release        = tracing_release_generic_tr,
+ };
+-- 
+2.40.1
+