From: Sasha Levin Date: Sat, 23 Sep 2023 12:16:13 +0000 (-0400) Subject: Fixes for 5.15 X-Git-Tag: v6.5.6~113 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5359babfe81c4a89fb31f1ea9d6cdbacea1f4fc0;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch b/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch new file mode 100644 index 00000000000..20e98f29305 --- /dev/null +++ b/queue-5.15/ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch @@ -0,0 +1,155 @@ +From 2a41b882175a8811688ca7c43b2cfed663f7a974 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 Dec 2021 08:20:47 +0100 +Subject: ata: ahci: Drop pointless VPRINTK() calls and convert the remaining + ones + +From: Hannes Reinecke + +[ Upstream commit 93c7711494f47f9c829321e2a8711671b02f6e4c ] + +Drop pointless VPRINTK() calls for entering and existing interrupt +routines and convert the remaining calls to dev_dbg(). + +Signed-off-by: Hannes Reinecke +Signed-off-by: Damien Le Moal +Stable-dep-of: 737dd811a3db ("ata: libahci: clear pending interrupt status") +Signed-off-by: Sasha Levin +--- + drivers/ata/ahci.c | 4 +--- + drivers/ata/ahci_xgene.c | 4 ---- + drivers/ata/libahci.c | 18 ++++-------------- + 3 files changed, 5 insertions(+), 21 deletions(-) + +diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c +index acc028414ee94..719fe2e2b36c2 100644 +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -707,7 +707,7 @@ static void ahci_pci_init_controller(struct ata_host *host) + + /* clear port IRQ */ + tmp = readl(port_mmio + PORT_IRQ_STAT); +- VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp); ++ dev_dbg(&pdev->dev, "PORT_IRQ_STAT 0x%x\n", tmp); + if (tmp) + writel(tmp, port_mmio + PORT_IRQ_STAT); + } +@@ -1499,7 +1499,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) + u32 irq_stat, irq_masked; + unsigned int handled = 1; + +- VPRINTK("ENTER\n"); + hpriv = host->private_data; + mmio = hpriv->mmio; + irq_stat = readl(mmio + HOST_IRQ_STAT); +@@ -1516,7 +1515,6 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance) + irq_stat = readl(mmio + HOST_IRQ_STAT); + spin_unlock(&host->lock); + } while (irq_stat); +- VPRINTK("EXIT\n"); + + return IRQ_RETVAL(handled); + } +diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c +index 292099410cf68..c1f61d255bc31 100644 +--- a/drivers/ata/ahci_xgene.c ++++ b/drivers/ata/ahci_xgene.c +@@ -588,8 +588,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance) + void __iomem *mmio; + u32 irq_stat, irq_masked; + +- VPRINTK("ENTER\n"); +- + hpriv = host->private_data; + mmio = hpriv->mmio; + +@@ -612,8 +610,6 @@ static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance) + + spin_unlock(&host->lock); + +- VPRINTK("EXIT\n"); +- + return IRQ_RETVAL(rc); + } + +diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c +index 192115a45dd78..b591a05768744 100644 +--- a/drivers/ata/libahci.c ++++ b/drivers/ata/libahci.c +@@ -1216,12 +1216,12 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, + + /* clear SError */ + tmp = readl(port_mmio + PORT_SCR_ERR); +- VPRINTK("PORT_SCR_ERR 0x%x\n", tmp); ++ dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); + writel(tmp, port_mmio + PORT_SCR_ERR); + + /* clear port IRQ */ + tmp = readl(port_mmio + PORT_IRQ_STAT); +- VPRINTK("PORT_IRQ_STAT 0x%x\n", tmp); ++ dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); + if (tmp) + writel(tmp, port_mmio + PORT_IRQ_STAT); + +@@ -1252,10 +1252,10 @@ void ahci_init_controller(struct ata_host *host) + } + + tmp = readl(mmio + HOST_CTL); +- VPRINTK("HOST_CTL 0x%x\n", tmp); ++ dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp); + writel(tmp | HOST_IRQ_EN, mmio + HOST_CTL); + tmp = readl(mmio + HOST_CTL); +- VPRINTK("HOST_CTL 0x%x\n", tmp); ++ dev_dbg(host->dev, "HOST_CTL 0x%x\n", tmp); + } + EXPORT_SYMBOL_GPL(ahci_init_controller); + +@@ -1906,8 +1906,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance) + void __iomem *port_mmio = ahci_port_base(ap); + u32 status; + +- VPRINTK("ENTER\n"); +- + status = readl(port_mmio + PORT_IRQ_STAT); + writel(status, port_mmio + PORT_IRQ_STAT); + +@@ -1915,8 +1913,6 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance) + ahci_handle_port_interrupt(ap, port_mmio, status); + spin_unlock(ap->lock); + +- VPRINTK("EXIT\n"); +- + return IRQ_HANDLED; + } + +@@ -1933,9 +1929,7 @@ u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked) + ap = host->ports[i]; + if (ap) { + ahci_port_intr(ap); +- VPRINTK("port %u\n", i); + } else { +- VPRINTK("port %u (no irq)\n", i); + if (ata_ratelimit()) + dev_warn(host->dev, + "interrupt on disabled port %u\n", i); +@@ -1956,8 +1950,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance) + void __iomem *mmio; + u32 irq_stat, irq_masked; + +- VPRINTK("ENTER\n"); +- + hpriv = host->private_data; + mmio = hpriv->mmio; + +@@ -1985,8 +1977,6 @@ static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance) + + spin_unlock(&host->lock); + +- VPRINTK("EXIT\n"); +- + return IRQ_RETVAL(rc); + } + +-- +2.40.1 + diff --git a/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch b/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch new file mode 100644 index 00000000000..351a6840612 --- /dev/null +++ b/queue-5.15/ata-libahci-clear-pending-interrupt-status.patch @@ -0,0 +1,101 @@ +From de70500d95c937f534fb005a2378ca680e99f871 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Sep 2023 16:17:10 +0800 +Subject: ata: libahci: clear pending interrupt status + +From: Szuying Chen + +[ Upstream commit 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 ] + +When a CRC error occurs, the HBA asserts an interrupt to indicate an +interface fatal error (PxIS.IFS). The ISR clears PxIE and PxIS, then +does error recovery. If the adapter receives another SDB FIS +with an error (PxIS.TFES) from the device before the start of the EH +recovery process, the interrupt signaling the new SDB cannot be +serviced as PxIE was cleared already. This in turn results in the HBA +inability to issue any command during the error recovery process after +setting PxCMD.ST to 1 because PxIS.TFES is still set. + +According to AHCI 1.3.1 specifications section 6.2.2, fatal errors +notified by setting PxIS.HBFS, PxIS.HBDS, PxIS.IFS or PxIS.TFES will +cause the HBA to enter the ERR:Fatal state. In this state, the HBA +shall not issue any new commands. + +To avoid this situation, introduce the function +ahci_port_clear_pending_irq() to clear pending interrupts before +executing a COMRESET. This follows the AHCI 1.3.1 - section 6.2.2.2 +specification. + +Signed-off-by: Szuying Chen +Fixes: e0bfd149973d ("[PATCH] ahci: stop engine during hard reset") +Cc: stable@vger.kernel.org +Reviewed-by: Niklas Cassel +Signed-off-by: Damien Le Moal +Signed-off-by: Sasha Levin +--- + drivers/ata/libahci.c | 35 +++++++++++++++++++++++------------ + 1 file changed, 23 insertions(+), 12 deletions(-) + +diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c +index b591a05768744..e22d45fb8ebdc 100644 +--- a/drivers/ata/libahci.c ++++ b/drivers/ata/libahci.c +@@ -1200,6 +1200,26 @@ static ssize_t ahci_activity_show(struct ata_device *dev, char *buf) + return sprintf(buf, "%d\n", emp->blink_policy); + } + ++static void ahci_port_clear_pending_irq(struct ata_port *ap) ++{ ++ struct ahci_host_priv *hpriv = ap->host->private_data; ++ void __iomem *port_mmio = ahci_port_base(ap); ++ u32 tmp; ++ ++ /* clear SError */ ++ tmp = readl(port_mmio + PORT_SCR_ERR); ++ dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp); ++ writel(tmp, port_mmio + PORT_SCR_ERR); ++ ++ /* clear port IRQ */ ++ tmp = readl(port_mmio + PORT_IRQ_STAT); ++ dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp); ++ if (tmp) ++ writel(tmp, port_mmio + PORT_IRQ_STAT); ++ ++ writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT); ++} ++ + static void ahci_port_init(struct device *dev, struct ata_port *ap, + int port_no, void __iomem *mmio, + void __iomem *port_mmio) +@@ -1214,18 +1234,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, + if (rc) + dev_warn(dev, "%s (%d)\n", emsg, rc); + +- /* clear SError */ +- tmp = readl(port_mmio + PORT_SCR_ERR); +- dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); +- writel(tmp, port_mmio + PORT_SCR_ERR); +- +- /* clear port IRQ */ +- tmp = readl(port_mmio + PORT_IRQ_STAT); +- dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); +- if (tmp) +- writel(tmp, port_mmio + PORT_IRQ_STAT); +- +- writel(1 << port_no, mmio + HOST_IRQ_STAT); ++ ahci_port_clear_pending_irq(ap); + + /* mark esata ports */ + tmp = readl(port_mmio + PORT_CMD); +@@ -1555,6 +1564,8 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, + tf.status = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + ++ ahci_port_clear_pending_irq(ap); ++ + rc = sata_link_hardreset(link, timing, deadline, online, + ahci_check_ready); + +-- +2.40.1 + diff --git a/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch b/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch new file mode 100644 index 00000000000..117602b8a0f --- /dev/null +++ b/queue-5.15/ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch @@ -0,0 +1,64 @@ +From 00c71696b428f0e1be9d4dd14165a57e03b848d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Nov 2021 15:51:21 +0100 +Subject: ext4: change s_last_trim_minblks type to unsigned long + +From: Lukas Czerner + +[ Upstream commit 2327fb2e23416cfb2795ccca2f77d4d65925be99 ] + +There is no good reason for the s_last_trim_minblks to be atomic. There is +no data integrity needed and there is no real danger in setting and +reading it in a racy manner. Change it to be unsigned long, the same type +as s_clusters_per_group which is the maximum that's allowed. + +Signed-off-by: Lukas Czerner +Suggested-by: Andreas Dilger +Reviewed-by: Andreas Dilger +Link: https://lore.kernel.org/r/20211103145122.17338-1-lczerner@redhat.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 2 +- + fs/ext4/mballoc.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 976cb4b3ff660..e1a5ec7362ad6 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1656,7 +1656,7 @@ struct ext4_sb_info { + struct task_struct *s_mmp_tsk; + + /* record the last minlen when FITRIM is called. */ +- atomic_t s_last_trim_minblks; ++ unsigned long s_last_trim_minblks; + + /* Reference to checksum algorithm driver via cryptoapi */ + struct crypto_shash *s_chksum_driver; +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 6eb445bbb2be8..782a13aca4e4a 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -6516,7 +6516,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, + ext4_lock_group(sb, group); + + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || +- minblocks < atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) { ++ minblocks < EXT4_SB(sb)->s_last_trim_minblks) { + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); + if (ret >= 0 && set_trimmed) + EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); +@@ -6632,7 +6632,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) + } + + if (!ret) +- atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); ++ EXT4_SB(sb)->s_last_trim_minblks = minlen; + + out: + range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; +-- +2.40.1 + diff --git a/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch b/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch new file mode 100644 index 00000000000..11af3df12ba --- /dev/null +++ b/queue-5.15/ext4-do-not-let-fstrim-block-system-suspend.patch @@ -0,0 +1,76 @@ +From ebb24b8150ab0e2720fe014fec55dd8cd2d98e64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Sep 2023 17:04:55 +0200 +Subject: ext4: do not let fstrim block system suspend + +From: Jan Kara + +[ Upstream commit 5229a658f6453362fbb9da6bf96872ef25a7097e ] + +Len Brown has reported that system suspend sometimes fail due to +inability to freeze a task working in ext4_trim_fs() for one minute. +Trimming a large filesystem on a disk that slowly processes discard +requests can indeed take a long time. Since discard is just an advisory +call, it is perfectly fine to interrupt it at any time and the return +number of discarded blocks until that moment. Do that when we detect the +task is being frozen. + +Cc: stable@kernel.org +Reported-by: Len Brown +Suggested-by: Dave Chinner +References: https://bugzilla.kernel.org/show_bug.cgi?id=216322 +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20230913150504.9054-2-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 8503b9aa34daf..e5b81d8be2324 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + + /* +@@ -6443,6 +6444,11 @@ static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, + EXT4_CLUSTER_BITS(sb); + } + ++static bool ext4_trim_interrupted(void) ++{ ++ return fatal_signal_pending(current) || freezing(current); ++} ++ + static int ext4_try_to_trim_range(struct super_block *sb, + struct ext4_buddy *e4b, ext4_grpblk_t start, + ext4_grpblk_t max, ext4_grpblk_t minblocks) +@@ -6476,8 +6482,8 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + free_count += next - start; + start = next + 1; + +- if (fatal_signal_pending(current)) +- return -ERESTARTSYS; ++ if (ext4_trim_interrupted()) ++ return count; + + if (need_resched()) { + ext4_unlock_group(sb, e4b->bd_group); +@@ -6599,6 +6605,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; + + for (group = first_group; group <= last_group; group++) { ++ if (ext4_trim_interrupted()) ++ break; + grp = ext4_get_group_info(sb, group); + if (!grp) + continue; +-- +2.40.1 + diff --git a/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch b/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch new file mode 100644 index 00000000000..3bd3cf22dab --- /dev/null +++ b/queue-5.15/ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch @@ -0,0 +1,170 @@ +From 882f4c9c7ea777927a1e0775763193b0fcb68522 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Sep 2023 17:04:54 +0200 +Subject: ext4: move setting of trimmed bit into ext4_try_to_trim_range() + +From: Jan Kara + +[ Upstream commit 45e4ab320c9b5fa67b1fc3b6a9b381cfcc0c8488 ] + +Currently we set the group's trimmed bit in ext4_trim_all_free() based +on return value of ext4_try_to_trim_range(). However when we will want +to abort trimming because of suspend attempt, we want to return success +from ext4_try_to_trim_range() but not set the trimmed bit. Instead +implementing awkward propagation of this information, just move setting +of trimmed bit into ext4_try_to_trim_range() when the whole group is +trimmed. + +Cc: stable@kernel.org +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20230913150504.9054-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 46 +++++++++++++++++++++++++--------------------- + 1 file changed, 25 insertions(+), 21 deletions(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index d68ff5df6f668..8503b9aa34daf 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -6433,6 +6433,16 @@ __acquires(bitlock) + return ret; + } + ++static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, ++ ext4_group_t grp) ++{ ++ if (grp < ext4_get_groups_count(sb)) ++ return EXT4_CLUSTERS_PER_GROUP(sb) - 1; ++ return (ext4_blocks_count(EXT4_SB(sb)->s_es) - ++ ext4_group_first_block_no(sb, grp) - 1) >> ++ EXT4_CLUSTER_BITS(sb); ++} ++ + static int ext4_try_to_trim_range(struct super_block *sb, + struct ext4_buddy *e4b, ext4_grpblk_t start, + ext4_grpblk_t max, ext4_grpblk_t minblocks) +@@ -6440,9 +6450,12 @@ __acquires(ext4_group_lock_ptr(sb, e4b->bd_group)) + __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + { + ext4_grpblk_t next, count, free_count; ++ bool set_trimmed = false; + void *bitmap; + + bitmap = e4b->bd_bitmap; ++ if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) ++ set_trimmed = true; + start = max(e4b->bd_info->bb_first_free, start); + count = 0; + free_count = 0; +@@ -6457,16 +6470,14 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + int ret = ext4_trim_extent(sb, start, next - start, e4b); + + if (ret && ret != -EOPNOTSUPP) +- break; ++ return count; + count += next - start; + } + free_count += next - start; + start = next + 1; + +- if (fatal_signal_pending(current)) { +- count = -ERESTARTSYS; +- break; +- } ++ if (fatal_signal_pending(current)) ++ return -ERESTARTSYS; + + if (need_resched()) { + ext4_unlock_group(sb, e4b->bd_group); +@@ -6478,6 +6489,9 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + break; + } + ++ if (set_trimmed) ++ EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); ++ + return count; + } + +@@ -6488,7 +6502,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + * @start: first group block to examine + * @max: last group block to examine + * @minblocks: minimum extent block count +- * @set_trimmed: set the trimmed flag if at least one block is trimmed + * + * ext4_trim_all_free walks through group's block bitmap searching for free + * extents. When the free extent is found, mark it as used in group buddy +@@ -6498,7 +6511,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + static ext4_grpblk_t + ext4_trim_all_free(struct super_block *sb, ext4_group_t group, + ext4_grpblk_t start, ext4_grpblk_t max, +- ext4_grpblk_t minblocks, bool set_trimmed) ++ ext4_grpblk_t minblocks) + { + struct ext4_buddy e4b; + int ret; +@@ -6515,13 +6528,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, + ext4_lock_group(sb, group); + + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || +- minblocks < EXT4_SB(sb)->s_last_trim_minblks) { ++ minblocks < EXT4_SB(sb)->s_last_trim_minblks) + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); +- if (ret >= 0 && set_trimmed) +- EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); +- } else { ++ else + ret = 0; +- } + + ext4_unlock_group(sb, group); + ext4_mb_unload_buddy(&e4b); +@@ -6554,7 +6564,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) + ext4_fsblk_t first_data_blk = + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); + ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); +- bool whole_group, eof = false; + int ret = 0; + + start = range->start >> sb->s_blocksize_bits; +@@ -6573,10 +6582,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) + goto out; + } +- if (end >= max_blks - 1) { ++ if (end >= max_blks - 1) + end = max_blks - 1; +- eof = true; +- } + if (end <= first_data_blk) + goto out; + if (start < first_data_blk) +@@ -6590,7 +6597,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) + + /* end now represents the last cluster to discard in this group */ + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; +- whole_group = true; + + for (group = first_group; group <= last_group; group++) { + grp = ext4_get_group_info(sb, group); +@@ -6609,13 +6615,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) + * change it for the last group, note that last_cluster is + * already computed earlier by ext4_get_group_no_and_offset() + */ +- if (group == last_group) { ++ if (group == last_group) + end = last_cluster; +- whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1; +- } + if (grp->bb_free >= minlen) { + cnt = ext4_trim_all_free(sb, group, first_cluster, +- end, minlen, whole_group); ++ end, minlen); + if (cnt < 0) { + ret = cnt; + break; +-- +2.40.1 + diff --git a/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch b/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch new file mode 100644 index 00000000000..791c3507eee --- /dev/null +++ b/queue-5.15/ext4-replace-the-traditional-ternary-conditional-ope.patch @@ -0,0 +1,49 @@ +From b45d178235c822d1b3ea03ff86370868b519392c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Aug 2023 22:32:00 +0800 +Subject: ext4: replace the traditional ternary conditional operator with with + max()/min() + +From: Kemeng Shi + +[ Upstream commit de8bf0e5ee7482585450357c6d4eddec8efc5cb7 ] + +Replace the traditional ternary conditional operator with with max()/min() + +Signed-off-by: Kemeng Shi +Reviewed-by: Ritesh Harjani (IBM) +Link: https://lore.kernel.org/r/20230801143204.2284343-7-shikemeng@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 782a13aca4e4a..d68ff5df6f668 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -6443,8 +6443,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + void *bitmap; + + bitmap = e4b->bd_bitmap; +- start = (e4b->bd_info->bb_first_free > start) ? +- e4b->bd_info->bb_first_free : start; ++ start = max(e4b->bd_info->bb_first_free, start); + count = 0; + free_count = 0; + +@@ -6661,8 +6660,7 @@ ext4_mballoc_query_range( + + ext4_lock_group(sb, group); + +- start = (e4b.bd_info->bb_first_free > start) ? +- e4b.bd_info->bb_first_free : start; ++ start = max(e4b.bd_info->bb_first_free, start); + if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) + end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; + +-- +2.40.1 + diff --git a/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch b/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch new file mode 100644 index 00000000000..fcf05abb316 --- /dev/null +++ b/queue-5.15/ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch @@ -0,0 +1,55 @@ +From 729efa407d2267c2ab0778f291cff3f71260da96 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Aug 2021 14:08:53 +0200 +Subject: ext4: scope ret locally in ext4_try_to_trim_range() + +From: Lukas Bulwahn + +[ Upstream commit afcc4e32f606dbfb47aa7309172c89174b86e74c ] + +As commit 6920b3913235 ("ext4: add new helper interface +ext4_try_to_trim_range()") moves some code into the separate function +ext4_try_to_trim_range(), the use of the variable ret within that +function is more limited and can be adjusted as well. + +Scope the use of the variable ret locally and drop dead assignments. + +No functional change. + +Signed-off-by: Lukas Bulwahn +Link: https://lore.kernel.org/r/20210820120853.23134-1-lukas.bulwahn@gmail.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 45e4ab320c9b ("ext4: move setting of trimmed bit into ext4_try_to_trim_range()") +Signed-off-by: Sasha Levin +--- + fs/ext4/mballoc.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 7e7153c673c0d..6eb445bbb2be8 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -6441,7 +6441,6 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + { + ext4_grpblk_t next, count, free_count; + void *bitmap; +- int ret = 0; + + bitmap = e4b->bd_bitmap; + start = (e4b->bd_info->bb_first_free > start) ? +@@ -6456,10 +6455,10 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) + next = mb_find_next_bit(bitmap, max + 1, start); + + if ((next - start) >= minblocks) { +- ret = ext4_trim_extent(sb, start, next - start, e4b); ++ int ret = ext4_trim_extent(sb, start, next - start, e4b); ++ + if (ret && ret != -EOPNOTSUPP) + break; +- ret = 0; + count += next - start; + } + free_count += next - start; +-- +2.40.1 + diff --git a/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch b/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch new file mode 100644 index 00000000000..2d33173fb75 --- /dev/null +++ b/queue-5.15/nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch @@ -0,0 +1,55 @@ +From d79d0562fe7befd6610837c6ccbbd5cb39bc1e5f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Sep 2023 12:34:41 -0400 +Subject: NFS: More fixes for nfs_direct_write_reschedule_io() + +From: Trond Myklebust + +[ Upstream commit b11243f720ee5f9376861099019c8542969b6318 ] + +Ensure that all requests are put back onto the commit list so that they +can be rescheduled. + +Fixes: 4daaeba93822 ("NFS: Fix nfs_direct_write_reschedule_io()") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/direct.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index 5d86ffa72ceab..bbe2a5cc49f68 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -786,16 +786,21 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error) + static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) + { + struct nfs_direct_req *dreq = hdr->dreq; ++ struct nfs_page *req; ++ struct nfs_commit_info cinfo; + ++ nfs_init_cinfo_from_dreq(&cinfo, dreq); + spin_lock(&dreq->lock); +- if (dreq->error == 0) { ++ if (dreq->error == 0) + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; +- /* fake unstable write to let common nfs resend pages */ +- hdr->verf.committed = NFS_UNSTABLE; +- hdr->good_bytes = hdr->args.offset + hdr->args.count - +- hdr->io_start; +- } ++ set_bit(NFS_IOHDR_REDO, &hdr->flags); + spin_unlock(&dreq->lock); ++ while (!list_empty(&hdr->pages)) { ++ req = nfs_list_entry(hdr->pages.next); ++ nfs_list_remove_request(req); ++ nfs_unlock_request(req); ++ nfs_mark_request_commit(req, NULL, &cinfo, 0); ++ } + } + + static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { +-- +2.40.1 + diff --git a/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch b/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch new file mode 100644 index 00000000000..edb2d117205 --- /dev/null +++ b/queue-5.15/nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch @@ -0,0 +1,36 @@ +From 01ea2e4b380297a95e5c66df6655c2343745230e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Sep 2023 12:43:58 -0400 +Subject: NFS/pNFS: Report EINVAL errors from connect() to the server + +From: Trond Myklebust + +[ Upstream commit dd7d7ee3ba2a70d12d02defb478790cf57d5b87b ] + +With IPv6, connect() can occasionally return EINVAL if a route is +unavailable. If this happens during I/O to a data server, we want to +report it using LAYOUTERROR as an inability to connect. + +Fixes: dd52128afdde ("NFSv4.1/pnfs Ensure flexfiles reports all connection related errors") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/flexfilelayout/flexfilelayout.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c +index ceef75b4d2494..4269df0f0ffa5 100644 +--- a/fs/nfs/flexfilelayout/flexfilelayout.c ++++ b/fs/nfs/flexfilelayout/flexfilelayout.c +@@ -1238,6 +1238,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + case -EOPNOTSUPP: ++ case -EINVAL: + case -ECONNREFUSED: + case -ECONNRESET: + case -EHOSTDOWN: +-- +2.40.1 + diff --git a/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch b/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch new file mode 100644 index 00000000000..64d47d6e6d0 --- /dev/null +++ b/queue-5.15/nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch @@ -0,0 +1,150 @@ +From caa4d1a0e12550437e774ce7960816f63a5f1a27 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Sep 2023 12:34:40 -0400 +Subject: NFS: Use the correct commit info in nfs_join_page_group() + +From: Trond Myklebust + +[ Upstream commit b193a78ddb5ee7dba074d3f28dc050069ba083c0 ] + +Ensure that nfs_clear_request_commit() updates the correct counters when +it removes them from the commit list. + +Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/direct.c | 8 +++++--- + fs/nfs/write.c | 23 ++++++++++++----------- + include/linux/nfs_page.h | 4 +++- + 3 files changed, 20 insertions(+), 15 deletions(-) + +diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c +index 018af6ec97b40..5d86ffa72ceab 100644 +--- a/fs/nfs/direct.c ++++ b/fs/nfs/direct.c +@@ -525,7 +525,9 @@ static void nfs_direct_add_page_head(struct list_head *list, + kref_get(&head->wb_kref); + } + +-static void nfs_direct_join_group(struct list_head *list, struct inode *inode) ++static void nfs_direct_join_group(struct list_head *list, ++ struct nfs_commit_info *cinfo, ++ struct inode *inode) + { + struct nfs_page *req, *subreq; + +@@ -547,7 +549,7 @@ static void nfs_direct_join_group(struct list_head *list, struct inode *inode) + nfs_release_request(subreq); + } + } while ((subreq = subreq->wb_this_page) != req); +- nfs_join_page_group(req, inode); ++ nfs_join_page_group(req, cinfo, inode); + } + } + +@@ -573,7 +575,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) + nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); + +- nfs_direct_join_group(&reqs, dreq->inode); ++ nfs_direct_join_group(&reqs, &cinfo, dreq->inode); + + dreq->count = 0; + dreq->max_count = 0; +diff --git a/fs/nfs/write.c b/fs/nfs/write.c +index be70874bc3292..4231d51fc1add 100644 +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -58,7 +58,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; + static const struct nfs_commit_completion_ops nfs_commit_completion_ops; + static const struct nfs_rw_ops nfs_rw_write_ops; + static void nfs_inode_remove_request(struct nfs_page *req); +-static void nfs_clear_request_commit(struct nfs_page *req); ++static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, ++ struct nfs_page *req); + static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, + struct inode *inode); + static struct nfs_page * +@@ -500,8 +501,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, + * the (former) group. All subrequests are removed from any write or commit + * lists, unlinked from the group and destroyed. + */ +-void +-nfs_join_page_group(struct nfs_page *head, struct inode *inode) ++void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, ++ struct inode *inode) + { + struct nfs_page *subreq; + struct nfs_page *destroy_list = NULL; +@@ -531,7 +532,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode) + * Commit list removal accounting is done after locks are dropped */ + subreq = head; + do { +- nfs_clear_request_commit(subreq); ++ nfs_clear_request_commit(cinfo, subreq); + subreq = subreq->wb_this_page; + } while (subreq != head); + +@@ -565,8 +566,10 @@ nfs_lock_and_join_requests(struct page *page) + { + struct inode *inode = page_file_mapping(page)->host; + struct nfs_page *head; ++ struct nfs_commit_info cinfo; + int ret; + ++ nfs_init_cinfo_from_inode(&cinfo, inode); + /* + * A reference is taken only on the head request which acts as a + * reference to the whole page group - the group will not be destroyed +@@ -583,7 +586,7 @@ nfs_lock_and_join_requests(struct page *page) + return ERR_PTR(ret); + } + +- nfs_join_page_group(head, inode); ++ nfs_join_page_group(head, &cinfo, inode); + + return head; + } +@@ -945,18 +948,16 @@ nfs_clear_page_commit(struct page *page) + } + + /* Called holding the request lock on @req */ +-static void +-nfs_clear_request_commit(struct nfs_page *req) ++static void nfs_clear_request_commit(struct nfs_commit_info *cinfo, ++ struct nfs_page *req) + { + if (test_bit(PG_CLEAN, &req->wb_flags)) { + struct nfs_open_context *ctx = nfs_req_openctx(req); + struct inode *inode = d_inode(ctx->dentry); +- struct nfs_commit_info cinfo; + +- nfs_init_cinfo_from_inode(&cinfo, inode); + mutex_lock(&NFS_I(inode)->commit_mutex); +- if (!pnfs_clear_request_commit(req, &cinfo)) { +- nfs_request_remove_commit_list(req, &cinfo); ++ if (!pnfs_clear_request_commit(req, cinfo)) { ++ nfs_request_remove_commit_list(req, cinfo); + } + mutex_unlock(&NFS_I(inode)->commit_mutex); + nfs_clear_page_commit(req->wb_page); +diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h +index f0373a6cb5fb6..40aa09a21f75d 100644 +--- a/include/linux/nfs_page.h ++++ b/include/linux/nfs_page.h +@@ -145,7 +145,9 @@ extern void nfs_unlock_request(struct nfs_page *req); + extern void nfs_unlock_and_release_request(struct nfs_page *); + extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); + extern int nfs_page_group_lock_subrequests(struct nfs_page *head); +-extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode); ++extern void nfs_join_page_group(struct nfs_page *head, ++ struct nfs_commit_info *cinfo, ++ struct inode *inode); + extern int nfs_page_group_lock(struct nfs_page *); + extern void nfs_page_group_unlock(struct nfs_page *); + extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +-- +2.40.1 + diff --git a/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch b/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch new file mode 100644 index 00000000000..4d43e9d5542 --- /dev/null +++ b/queue-5.15/nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch @@ -0,0 +1,134 @@ +From a81a7077c38eab34c1f87754b6805791bd8cac1c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Aug 2023 15:29:34 -0400 +Subject: NFSv4.1: fix pnfs MDS=DS session trunking + +From: Olga Kornievskaia + +[ Upstream commit 806a3bc421a115fbb287c1efce63a48c54ee804b ] + +Currently, when GETDEVICEINFO returns multiple locations where each +is a different IP but the server's identity is same as MDS, then +nfs4_set_ds_client() finds the existing nfs_client structure which +has the MDS's max_connect value (and if it's 1), then the 1st IP +on the DS's list will get dropped due to MDS trunking rules. Other +IPs would be added as they fall under the pnfs trunking rules. + +For the list of IPs the 1st goes thru calling nfs4_set_ds_client() +which will eventually call nfs4_add_trunk() and call into +rpc_clnt_test_and_add_xprt() which has the check for MDS trunking. +The other IPs (after the 1st one), would call rpc_clnt_add_xprt() +which doesn't go thru that check. + +nfs4_add_trunk() is called when MDS trunking is happening and it +needs to enforce the usage of max_connect mount option of the +1st mount. However, this shouldn't be applied to pnfs flow. + +Instead, this patch proposed to treat MDS=DS as DS trunking and +make sure that MDS's max_connect limit does not apply to the +1st IP returned in the GETDEVICEINFO list. It does so by +marking the newly created client with a new flag NFS_CS_PNFS +which then used to pass max_connect value to use into the +rpc_clnt_test_and_add_xprt() instead of the existing rpc +client's max_connect value set by the MDS connection. + +For example, mount was done without max_connect value set +so MDS's rpc client has cl_max_connect=1. Upon calling into +rpc_clnt_test_and_add_xprt() and using rpc client's value, +the caller passes in max_connect value which is previously +been set in the pnfs path (as a part of handling +GETDEVICEINFO list of IPs) in nfs4_set_ds_client(). + +However, when NFS_CS_PNFS flag is not set and we know we +are doing MDS trunking, comparing a new IP of the same +server, we then set the max_connect value to the +existing MDS's value and pass that into +rpc_clnt_test_and_add_xprt(). + +Fixes: dc48e0abee24 ("SUNRPC enforce creation of no more than max_connect xprts") +Signed-off-by: Olga Kornievskaia +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4client.c | 6 +++++- + include/linux/nfs_fs_sb.h | 1 + + net/sunrpc/clnt.c | 11 +++++++---- + 3 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c +index 81cda46d5829d..cba8b4c1fb4a3 100644 +--- a/fs/nfs/nfs4client.c ++++ b/fs/nfs/nfs4client.c +@@ -416,6 +416,8 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) + .net = old->cl_net, + .servername = old->cl_hostname, + }; ++ int max_connect = test_bit(NFS_CS_PNFS, &clp->cl_flags) ? ++ clp->cl_max_connect : old->cl_max_connect; + + if (clp->cl_proto != old->cl_proto) + return; +@@ -429,7 +431,7 @@ static void nfs4_add_trunk(struct nfs_client *clp, struct nfs_client *old) + xprt_args.addrlen = clp_salen; + + rpc_clnt_add_xprt(old->cl_rpcclient, &xprt_args, +- rpc_clnt_test_and_add_xprt, NULL); ++ rpc_clnt_test_and_add_xprt, &max_connect); + } + + /** +@@ -996,6 +998,8 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, + __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + + __set_bit(NFS_CS_DS, &cl_init.init_flags); ++ __set_bit(NFS_CS_PNFS, &cl_init.init_flags); ++ cl_init.max_connect = NFS_MAX_TRANSPORTS; + /* + * Set an authflavor equual to the MDS value. Use the MDS nfs_client + * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS +diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h +index da9ef0ab9b4b6..5e065f16d061d 100644 +--- a/include/linux/nfs_fs_sb.h ++++ b/include/linux/nfs_fs_sb.h +@@ -48,6 +48,7 @@ struct nfs_client { + #define NFS_CS_NOPING 6 /* - don't ping on connect */ + #define NFS_CS_DS 7 /* - Server is a DS */ + #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ ++#define NFS_CS_PNFS 9 /* - Server used for pnfs */ + struct sockaddr_storage cl_addr; /* server identifier */ + size_t cl_addrlen; + char * cl_hostname; /* hostname of server */ +diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c +index 130d8166b9ce8..f8750683bded4 100644 +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -2822,19 +2822,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = { + * @clnt: pointer to struct rpc_clnt + * @xps: pointer to struct rpc_xprt_switch, + * @xprt: pointer struct rpc_xprt +- * @dummy: unused ++ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport + */ + int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, + struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, +- void *dummy) ++ void *in_max_connect) + { + struct rpc_cb_add_xprt_calldata *data; + struct rpc_task *task; ++ int max_connect = clnt->cl_max_connect; + +- if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) { ++ if (in_max_connect) ++ max_connect = *(int *)in_max_connect; ++ if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) { + rcu_read_lock(); + pr_warn("SUNRPC: reached max allowed number (%d) did not add " +- "transport to server: %s\n", clnt->cl_max_connect, ++ "transport to server: %s\n", max_connect, + rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + return -EINVAL; +-- +2.40.1 + diff --git a/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch b/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch new file mode 100644 index 00000000000..f87d15b3258 --- /dev/null +++ b/queue-5.15/nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch @@ -0,0 +1,68 @@ +From 024249d6037797c2eab917e37bfbe9586933c1fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jul 2023 13:02:38 -0400 +Subject: NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server + +From: Olga Kornievskaia + +[ Upstream commit 51d674a5e4889f1c8e223ac131cf218e1631e423 ] + +After receiving the location(s) of the DS server(s) in the +GETDEVINCEINFO, create the request for the clientid to such +server and indicate that the client is connecting to a DS. + +Signed-off-by: Olga Kornievskaia +Signed-off-by: Anna Schumaker +Stable-dep-of: 806a3bc421a1 ("NFSv4.1: fix pnfs MDS=DS session trunking") +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4client.c | 3 +++ + fs/nfs/nfs4proc.c | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c +index 1bf7a72ebda6e..81cda46d5829d 100644 +--- a/fs/nfs/nfs4client.c ++++ b/fs/nfs/nfs4client.c +@@ -231,6 +231,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) + __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); + ++ if (test_bit(NFS_CS_DS, &cl_init->init_flags)) ++ __set_bit(NFS_CS_DS, &clp->cl_flags); + /* + * Set up the connection to the server before we add add to the + * global list. +@@ -993,6 +995,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, + if (mds_srv->flags & NFS_MOUNT_NORESVPORT) + __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + ++ __set_bit(NFS_CS_DS, &cl_init.init_flags); + /* + * Set an authflavor equual to the MDS value. Use the MDS nfs_client + * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index a21e25cbd4515..32204c0b3d098 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -8715,6 +8715,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, + #ifdef CONFIG_NFS_V4_1_MIGRATION + calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR; + #endif ++ if (test_bit(NFS_CS_DS, &clp->cl_flags)) ++ calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS; + msg.rpc_argp = &calldata->args; + msg.rpc_resp = &calldata->res; + task_setup_data.callback_data = calldata; +@@ -8792,6 +8794,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre + /* Save the EXCHANGE_ID verifier session trunk tests */ + memcpy(clp->cl_confirm.data, argp->verifier.data, + sizeof(clp->cl_confirm.data)); ++ if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS) ++ set_bit(NFS_CS_DS, &clp->cl_flags); + out: + trace_nfs4_exchange_id(clp, status); + rpc_put_task(task); +-- +2.40.1 + diff --git a/queue-5.15/series b/queue-5.15/series new file mode 100644 index 00000000000..86755ffe71b --- /dev/null +++ b/queue-5.15/series @@ -0,0 +1,16 @@ +nfs-use-the-correct-commit-info-in-nfs_join_page_gro.patch +nfs-more-fixes-for-nfs_direct_write_reschedule_io.patch +nfs-pnfs-report-einval-errors-from-connect-to-the-se.patch +sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch +nfsv4.1-use-exchgid4_flag_use_pnfs_ds-for-ds-server.patch +nfsv4.1-fix-pnfs-mds-ds-session-trunking.patch +tracing-make-trace_marker-_raw-stream-like.patch +tracing-increase-trace-array-ref-count-on-enable-and.patch +ata-ahci-drop-pointless-vprintk-calls-and-convert-th.patch +ata-libahci-clear-pending-interrupt-status.patch +ext4-scope-ret-locally-in-ext4_try_to_trim_range.patch +ext4-change-s_last_trim_minblks-type-to-unsigned-lon.patch +ext4-replace-the-traditional-ternary-conditional-ope.patch +ext4-move-setting-of-trimmed-bit-into-ext4_try_to_tr.patch +ext4-do-not-let-fstrim-block-system-suspend.patch +tracing-have-event-inject-files-inc-the-trace-array-.patch diff --git a/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch b/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch new file mode 100644 index 00000000000..2dc41a32fdf --- /dev/null +++ b/queue-5.15/sunrpc-mark-the-cred-for-revalidation-if-the-server-.patch @@ -0,0 +1,35 @@ +From 1b253e40266ec9f6b078a4d109e04d5579e4875e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Sep 2023 12:50:09 -0400 +Subject: SUNRPC: Mark the cred for revalidation if the server rejects it + +From: Trond Myklebust + +[ Upstream commit 611fa42dfa9d2f3918ac5f4dd5705dfad81b323d ] + +If the server rejects the credential as being stale, or bad, then we +should mark it for revalidation before retransmitting. + +Fixes: 7f5667a5f8c4 ("SUNRPC: Clean up rpc_verify_header()") +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Signed-off-by: Sasha Levin +--- + net/sunrpc/clnt.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c +index b9c54c03c30a6..130d8166b9ce8 100644 +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -2668,6 +2668,7 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr) + case rpc_autherr_rejectedverf: + case rpcsec_gsserr_credproblem: + case rpcsec_gsserr_ctxproblem: ++ rpcauth_invalcred(task); + if (!task->tk_cred_retry) + break; + task->tk_cred_retry--; +-- +2.40.1 + diff --git a/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch b/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch new file mode 100644 index 00000000000..82e7a8f62da --- /dev/null +++ b/queue-5.15/tracing-have-event-inject-files-inc-the-trace-array-.patch @@ -0,0 +1,49 @@ +From 929153db87153eccdddcaa3fa8ca51a644dda458 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Sep 2023 22:47:16 -0400 +Subject: tracing: Have event inject files inc the trace array ref count + +From: Steven Rostedt (Google) + +[ Upstream commit e5c624f027ac74f97e97c8f36c69228ac9f1102d ] + +The event inject files add events for a specific trace array. For an +instance, if the file is opened and the instance is deleted, reading or +writing to the file will cause a use after free. + +Up the ref count of the trace_array when a event inject file is opened. + +Link: https://lkml.kernel.org/r/20230907024804.292337868@goodmis.org +Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Cc: Zheng Yejian +Fixes: 6c3edaf9fd6a ("tracing: Introduce trace event injection") +Tested-by: Linux Kernel Functional Testing +Tested-by: Naresh Kamboju +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_events_inject.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c +index c188045c5f976..b1fce64e126c0 100644 +--- a/kernel/trace/trace_events_inject.c ++++ b/kernel/trace/trace_events_inject.c +@@ -321,7 +321,8 @@ event_inject_read(struct file *file, char __user *buf, size_t size, + } + + const struct file_operations event_inject_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_file_tr, + .read = event_inject_read, + .write = event_inject_write, ++ .release = tracing_release_file_tr, + }; +-- +2.40.1 + diff --git a/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch b/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch new file mode 100644 index 00000000000..fca2c7826bd --- /dev/null +++ b/queue-5.15/tracing-increase-trace-array-ref-count-on-enable-and.patch @@ -0,0 +1,115 @@ +From 247d21f337455384310fd8ef7089a969b4d90181 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Sep 2023 22:47:12 -0400 +Subject: tracing: Increase trace array ref count on enable and filter files + +From: Steven Rostedt (Google) + +[ Upstream commit f5ca233e2e66dc1c249bf07eefa37e34a6c9346a ] + +When the trace event enable and filter files are opened, increment the +trace array ref counter, otherwise they can be accessed when the trace +array is being deleted. The ref counter keeps the trace array from being +deleted while those files are opened. + +Link: https://lkml.kernel.org/r/20230907024803.456187066@goodmis.org +Link: https://lore.kernel.org/all/1cb3aee2-19af-c472-e265-05176fe9bd84@huawei.com/ + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Andrew Morton +Fixes: 8530dec63e7b4 ("tracing: Add tracing_check_open_get_tr()") +Tested-by: Linux Kernel Functional Testing +Tested-by: Naresh Kamboju +Reported-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 27 +++++++++++++++++++++++++++ + kernel/trace/trace.h | 2 ++ + kernel/trace/trace_events.c | 6 ++++-- + 3 files changed, 33 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 5aa23a4382c5e..7453840c77be2 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -4887,6 +4887,33 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) + return 0; + } + ++/* ++ * The private pointer of the inode is the trace_event_file. ++ * Update the tr ref count associated to it. ++ */ ++int tracing_open_file_tr(struct inode *inode, struct file *filp) ++{ ++ struct trace_event_file *file = inode->i_private; ++ int ret; ++ ++ ret = tracing_check_open_get_tr(file->tr); ++ if (ret) ++ return ret; ++ ++ filp->private_data = inode->i_private; ++ ++ return 0; ++} ++ ++int tracing_release_file_tr(struct inode *inode, struct file *filp) ++{ ++ struct trace_event_file *file = inode->i_private; ++ ++ trace_array_put(file->tr); ++ ++ return 0; ++} ++ + static int tracing_mark_open(struct inode *inode, struct file *filp) + { + stream_open(inode, filp); +diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h +index 90ab921884b10..a4a90bd3373be 100644 +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -591,6 +591,8 @@ void tracing_reset_all_online_cpus(void); + void tracing_reset_all_online_cpus_unlocked(void); + int tracing_open_generic(struct inode *inode, struct file *filp); + int tracing_open_generic_tr(struct inode *inode, struct file *filp); ++int tracing_open_file_tr(struct inode *inode, struct file *filp); ++int tracing_release_file_tr(struct inode *inode, struct file *filp); + bool tracing_is_disabled(void); + bool tracer_tracing_is_on(struct trace_array *tr); + void tracer_tracing_on(struct trace_array *tr); +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index 2a2a599997671..c626d02776a54 100644 +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -2078,9 +2078,10 @@ static const struct file_operations ftrace_set_event_notrace_pid_fops = { + }; + + static const struct file_operations ftrace_enable_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_file_tr, + .read = event_enable_read, + .write = event_enable_write, ++ .release = tracing_release_file_tr, + .llseek = default_llseek, + }; + +@@ -2097,9 +2098,10 @@ static const struct file_operations ftrace_event_id_fops = { + }; + + static const struct file_operations ftrace_event_filter_fops = { +- .open = tracing_open_generic, ++ .open = tracing_open_file_tr, + .read = event_filter_read, + .write = event_filter_write, ++ .release = tracing_release_file_tr, + .llseek = default_llseek, + }; + +-- +2.40.1 + diff --git a/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch b/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch new file mode 100644 index 00000000000..fb56bf1b977 --- /dev/null +++ b/queue-5.15/tracing-make-trace_marker-_raw-stream-like.patch @@ -0,0 +1,97 @@ +From 742e4e870af5dfe2b163eb5e6b7ba745d4e4f139 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Dec 2021 14:25:58 +0000 +Subject: tracing: Make trace_marker{,_raw} stream-like + +From: John Keeping + +[ Upstream commit 2972e3050e3517a85ca1813b227d4c302e804343 ] + +The tracing marker files are write-only streams with no meaningful +concept of file position. Using stream_open() to mark them as +stream-link indicates this and has the added advantage that a single +file descriptor can now be used from multiple threads without contention +thanks to clearing FMODE_ATOMIC_POS. + +Note that this has the potential to break existing userspace by since +both lseek(2) and pwrite(2) will now return ESPIPE when previously lseek +would have updated the stored offset and pwrite would have appended to +the trace. A survey of libtracefs and several other projects found to +use trace_marker(_raw) [1][2][3] suggests that everyone limits +themselves to calling write(2) and close(2) on these file descriptors so +there is a good chance this will go unnoticed and the benefits of +reduced overhead and lock contention seem worth the risk. + +[1] https://github.com/google/perfetto +[2] https://github.com/intel/media-driver/ +[3] https://w1.fi/cgit/hostap/ + +Link: https://lkml.kernel.org/r/20211207142558.347029-1-john@metanate.com + +Signed-off-by: John Keeping +Signed-off-by: Steven Rostedt (VMware) +Stable-dep-of: f5ca233e2e66 ("tracing: Increase trace array ref count on enable and filter files") +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 18 ++++++++---------- + 1 file changed, 8 insertions(+), 10 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 6adacfc880d6c..5aa23a4382c5e 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -4887,6 +4887,12 @@ int tracing_open_generic_tr(struct inode *inode, struct file *filp) + return 0; + } + ++static int tracing_mark_open(struct inode *inode, struct file *filp) ++{ ++ stream_open(inode, filp); ++ return tracing_open_generic_tr(inode, filp); ++} ++ + static int tracing_release(struct inode *inode, struct file *file) + { + struct trace_array *tr = inode->i_private; +@@ -7225,9 +7231,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, + if (tt) + event_triggers_post_call(tr->trace_marker_file, tt); + +- if (written > 0) +- *fpos += written; +- + return written; + } + +@@ -7286,9 +7289,6 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + + __buffer_unlock_commit(buffer, event); + +- if (written > 0) +- *fpos += written; +- + return written; + } + +@@ -7699,16 +7699,14 @@ static const struct file_operations tracing_free_buffer_fops = { + }; + + static const struct file_operations tracing_mark_fops = { +- .open = tracing_open_generic_tr, ++ .open = tracing_mark_open, + .write = tracing_mark_write, +- .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, + }; + + static const struct file_operations tracing_mark_raw_fops = { +- .open = tracing_open_generic_tr, ++ .open = tracing_mark_open, + .write = tracing_mark_raw_write, +- .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, + }; + +-- +2.40.1 +