From: Greg Kroah-Hartman Date: Sat, 16 Sep 2023 12:20:15 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v5.10.195~44 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=513824b9bc83c022fc1eb234dad050ba101da0c1;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: arc-atomics-add-compiler-barrier-to-atomic-operations.patch ata-ahci-add-elkhart-lake-ahci-controller.patch ata-pata_falcon-fix-io-base-selection-for-q40.patch ata-pata_ftide010-add-missing-module_description.patch ata-sata_gemini-add-missing-module_description.patch btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch btrfs-fix-start-transaction-qgroup-rsv-double-free.patch btrfs-free-qgroup-rsv-on-io-failure.patch btrfs-zoned-do-not-zone-finish-data-relocation-block-group.patch clocksource-drivers-arm_arch_timer-disable-timer-before-programming-cval.patch dmaengine-sh-rz-dmac-fix-destination-and-source-data-size-setting.patch ext4-add-correct-group-descriptors-and-reserved-gdt-blocks-to-system-zone.patch ext4-fix-memory-leaks-in-ext4_fname_-setup_filename-prepare_lookup.patch f2fs-avoid-false-alarm-of-circular-locking.patch f2fs-flush-inode-if-atomic-file-is-aborted.patch fuse-nlookup-missing-decrement-in-fuse_direntplus_link.patch hwspinlock-qcom-add-missing-regmap-config-for-sfpb-mmio-implementation.patch jbd2-check-jh-b_transaction-before-removing-it-from-checkpoint.patch jbd2-correct-the-end-of-the-journal-recovery-scan-range.patch jbd2-fix-checkpoint-cleanup-performance-regression.patch lib-test_scanf-add-explicit-type-cast-to-result-initialization-in-test_number_prefix.patch --- diff --git a/queue-6.1/arc-atomics-add-compiler-barrier-to-atomic-operations.patch b/queue-6.1/arc-atomics-add-compiler-barrier-to-atomic-operations.patch new file mode 100644 index 00000000000..7dd9450eb75 --- /dev/null +++ b/queue-6.1/arc-atomics-add-compiler-barrier-to-atomic-operations.patch @@ -0,0 +1,100 @@ +From 42f51fb24fd39cc547c086ab3d8a314cc603a91c Mon Sep 17 00:00:00 2001 +From: Pavel Kozlov +Date: Tue, 15 Aug 2023 19:11:36 +0400 +Subject: ARC: atomics: Add compiler barrier to atomic operations... + +From: Pavel Kozlov + +commit 42f51fb24fd39cc547c086ab3d8a314cc603a91c upstream. + +... to avoid unwanted gcc optimizations + +SMP kernels fail to boot with commit 596ff4a09b89 +("cpumask: re-introduce constant-sized cpumask optimizations"). + +| +| percpu: BUG: failure at mm/percpu.c:2981/pcpu_build_alloc_info()! +| + +The write operation performed by the SCOND instruction in the atomic +inline asm code is not properly passed to the compiler. The compiler +cannot correctly optimize a nested loop that runs through the cpumask +in the pcpu_build_alloc_info() function. + +Fix this by add a compiler barrier (memory clobber in inline asm). + +Apparently atomic ops used to have memory clobber implicitly via +surrounding smp_mb(). However commit b64be6836993c431e +("ARC: atomics: implement relaxed variants") removed the smp_mb() for +the relaxed variants, but failed to add the explicit compiler barrier. + +Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/135 +Cc: # v6.3+ +Fixes: b64be6836993c43 ("ARC: atomics: implement relaxed variants") +Signed-off-by: Pavel Kozlov +Signed-off-by: Vineet Gupta +[vgupta: tweaked the changelog and added Fixes tag] +Signed-off-by: Greg Kroah-Hartman +--- + arch/arc/include/asm/atomic-llsc.h | 6 +++--- + arch/arc/include/asm/atomic64-arcv2.h | 6 +++--- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/arc/include/asm/atomic-llsc.h ++++ b/arch/arc/include/asm/atomic-llsc.h +@@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int + : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ + : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ + [i] "ir" (i) \ +- : "cc"); \ ++ : "cc", "memory"); \ + } \ + + #define ATOMIC_OP_RETURN(op, asm_op) \ +@@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_ret + : [val] "=&r" (val) \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ +- : "cc"); \ ++ : "cc", "memory"); \ + \ + return val; \ + } +@@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op + [orig] "=&r" (orig) \ + : [ctr] "r" (&v->counter), \ + [i] "ir" (i) \ +- : "cc"); \ ++ : "cc", "memory"); \ + \ + return orig; \ + } +--- a/arch/arc/include/asm/atomic64-arcv2.h ++++ b/arch/arc/include/asm/atomic64-arcv2.h +@@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s6 + " bnz 1b \n" \ + : "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ +- : "cc"); \ ++ : "cc", "memory"); \ + } \ + + #define ATOMIC64_OP_RETURN(op, op1, op2) \ +@@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_r + " bnz 1b \n" \ + : [val] "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ +- : "cc"); /* memory clobber comes from smp_mb() */ \ ++ : "cc", "memory"); \ + \ + return val; \ + } +@@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_## + " bnz 1b \n" \ + : "=&r"(orig), "=&r"(val) \ + : "r"(&v->counter), "ir"(a) \ +- : "cc"); /* memory clobber comes from smp_mb() */ \ ++ : "cc", "memory"); \ + \ + return orig; \ + } diff --git a/queue-6.1/ata-ahci-add-elkhart-lake-ahci-controller.patch b/queue-6.1/ata-ahci-add-elkhart-lake-ahci-controller.patch new file mode 100644 index 00000000000..7f4392196a3 --- /dev/null +++ b/queue-6.1/ata-ahci-add-elkhart-lake-ahci-controller.patch @@ -0,0 +1,61 @@ +From 2a2df98ec592667927b5c1351afa6493ea125c9f Mon Sep 17 00:00:00 2001 +From: Werner Fischer +Date: Tue, 29 Aug 2023 13:33:58 +0200 +Subject: ata: ahci: Add Elkhart Lake AHCI controller +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Werner Fischer + +commit 2a2df98ec592667927b5c1351afa6493ea125c9f upstream. + +Elkhart Lake is the successor of Apollo Lake and Gemini Lake. These +CPUs and their PCHs are used in mobile and embedded environments. + +With this patch I suggest that Elkhart Lake SATA controllers [1] should +use the default LPM policy for mobile chipsets. +The disadvantage of missing hot-plug support with this setting should +not be an issue, as those CPUs are used in embedded environments and +not in servers with hot-plug backplanes. + +We discovered that the Elkhart Lake SATA controllers have been missing +in ahci.c after a customer reported the throttling of his SATA SSD +after a short period of higher I/O. We determined the high temperature +of the SSD controller in idle mode as the root cause for that. + +Depending on the used SSD, we have seen up to 1.8 Watt lower system +idle power usage and up to 30°C lower SSD controller temperatures in +our tests, when we set med_power_with_dipm manually. I have provided a +table showing seven different SATA SSDs from ATP, Intel/Solidigm and +Samsung [2]. + +Intel lists a total of 3 SATA controller IDs (4B60, 4B62, 4B63) in [1] +for those mobile PCHs. +This commit just adds 0x4b63 as I do not have test systems with 0x4b60 +and 0x4b62 SATA controllers. +I have tested this patch with a system which uses 0x4b63 as SATA +controller. + +[1] https://sata-io.org/product/8803 +[2] https://www.thomas-krenn.com/en/wiki/SATA_Link_Power_Management#Example_LES_v4 + +Signed-off-by: Werner Fischer +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/ahci.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/ata/ahci.c ++++ b/drivers/ata/ahci.c +@@ -422,6 +422,8 @@ static const struct pci_device_id ahci_p + { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */ + { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */ + { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */ ++ /* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */ ++ { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_low_power }, /* Elkhart Lake AHCI */ + + /* JMicron 360/1/3/5/6, match class to avoid IDE function */ + { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, diff --git a/queue-6.1/ata-pata_falcon-fix-io-base-selection-for-q40.patch b/queue-6.1/ata-pata_falcon-fix-io-base-selection-for-q40.patch new file mode 100644 index 00000000000..1c43b9de37e --- /dev/null +++ b/queue-6.1/ata-pata_falcon-fix-io-base-selection-for-q40.patch @@ -0,0 +1,123 @@ +From 8a1f00b753ecfdb117dc1a07e68c46d80e7923ea Mon Sep 17 00:00:00 2001 +From: Michael Schmitz +Date: Sun, 27 Aug 2023 16:13:47 +1200 +Subject: ata: pata_falcon: fix IO base selection for Q40 + +From: Michael Schmitz + +commit 8a1f00b753ecfdb117dc1a07e68c46d80e7923ea upstream. + +With commit 44b1fbc0f5f3 ("m68k/q40: Replace q40ide driver +with pata_falcon and falconide"), the Q40 IDE driver was +replaced by pata_falcon.c. + +Both IO and memory resources were defined for the Q40 IDE +platform device, but definition of the IDE register addresses +was modeled after the Falcon case, both in use of the memory +resources and in including register shift and byte vs. word +offset in the address. + +This was correct for the Falcon case, which does not apply +any address translation to the register addresses. In the +Q40 case, all of device base address, byte access offset +and register shift is included in the platform specific +ISA access translation (in asm/mm_io.h). + +As a consequence, such address translation gets applied +twice, and register addresses are mangled. + +Use the device base address from the platform IO resource +for Q40 (the IO address translation will then add the correct +ISA window base address and byte access offset), with register +shift 1. Use MMIO base address and register shift 2 as before +for Falcon. + +Encode PIO_OFFSET into IO port addresses for all registers +for Q40 except the data transfer register. Encode the MMIO +offset there (pata_falcon_data_xfer() directly uses raw IO +with no address translation). + +Reported-by: William R Sowerbutts +Closes: https://lore.kernel.org/r/CAMuHMdUU62jjunJh9cqSqHT87B0H0A4udOOPs=WN7WZKpcagVA@mail.gmail.com +Link: https://lore.kernel.org/r/CAMuHMdUU62jjunJh9cqSqHT87B0H0A4udOOPs=WN7WZKpcagVA@mail.gmail.com +Fixes: 44b1fbc0f5f3 ("m68k/q40: Replace q40ide driver with pata_falcon and falconide") +Cc: stable@vger.kernel.org +Cc: Finn Thain +Cc: Geert Uytterhoeven +Tested-by: William R Sowerbutts +Signed-off-by: Michael Schmitz +Reviewed-by: Sergey Shtylyov +Reviewed-by: Geert Uytterhoeven +Signed-off-by: Damien Le Moal +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/pata_falcon.c | 50 ++++++++++++++++++++++++++-------------------- + 1 file changed, 29 insertions(+), 21 deletions(-) + +--- a/drivers/ata/pata_falcon.c ++++ b/drivers/ata/pata_falcon.c +@@ -123,8 +123,8 @@ static int __init pata_falcon_init_one(s + struct resource *base_res, *ctl_res, *irq_res; + struct ata_host *host; + struct ata_port *ap; +- void __iomem *base; +- int irq = 0; ++ void __iomem *base, *ctl_base; ++ int irq = 0, io_offset = 1, reg_shift = 2; /* Falcon defaults */ + + dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n"); + +@@ -165,26 +165,34 @@ static int __init pata_falcon_init_one(s + ap->pio_mask = ATA_PIO4; + ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY; + +- base = (void __iomem *)base_mem_res->start; + /* N.B. this assumes data_addr will be used for word-sized I/O only */ +- ap->ioaddr.data_addr = base + 0 + 0 * 4; +- ap->ioaddr.error_addr = base + 1 + 1 * 4; +- ap->ioaddr.feature_addr = base + 1 + 1 * 4; +- ap->ioaddr.nsect_addr = base + 1 + 2 * 4; +- ap->ioaddr.lbal_addr = base + 1 + 3 * 4; +- ap->ioaddr.lbam_addr = base + 1 + 4 * 4; +- ap->ioaddr.lbah_addr = base + 1 + 5 * 4; +- ap->ioaddr.device_addr = base + 1 + 6 * 4; +- ap->ioaddr.status_addr = base + 1 + 7 * 4; +- ap->ioaddr.command_addr = base + 1 + 7 * 4; +- +- base = (void __iomem *)ctl_mem_res->start; +- ap->ioaddr.altstatus_addr = base + 1; +- ap->ioaddr.ctl_addr = base + 1; +- +- ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx", +- (unsigned long)base_mem_res->start, +- (unsigned long)ctl_mem_res->start); ++ ap->ioaddr.data_addr = (void __iomem *)base_mem_res->start; ++ ++ if (base_res) { /* only Q40 has IO resources */ ++ io_offset = 0x10000; ++ reg_shift = 0; ++ base = (void __iomem *)base_res->start; ++ ctl_base = (void __iomem *)ctl_res->start; ++ } else { ++ base = (void __iomem *)base_mem_res->start; ++ ctl_base = (void __iomem *)ctl_mem_res->start; ++ } ++ ++ ap->ioaddr.error_addr = base + io_offset + (1 << reg_shift); ++ ap->ioaddr.feature_addr = base + io_offset + (1 << reg_shift); ++ ap->ioaddr.nsect_addr = base + io_offset + (2 << reg_shift); ++ ap->ioaddr.lbal_addr = base + io_offset + (3 << reg_shift); ++ ap->ioaddr.lbam_addr = base + io_offset + (4 << reg_shift); ++ ap->ioaddr.lbah_addr = base + io_offset + (5 << reg_shift); ++ ap->ioaddr.device_addr = base + io_offset + (6 << reg_shift); ++ ap->ioaddr.status_addr = base + io_offset + (7 << reg_shift); ++ ap->ioaddr.command_addr = base + io_offset + (7 << reg_shift); ++ ++ ap->ioaddr.altstatus_addr = ctl_base + io_offset; ++ ap->ioaddr.ctl_addr = ctl_base + io_offset; ++ ++ ata_port_desc(ap, "cmd %px ctl %px data %px", ++ base, ctl_base, ap->ioaddr.data_addr); + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (irq_res && irq_res->start > 0) { diff --git a/queue-6.1/ata-pata_ftide010-add-missing-module_description.patch b/queue-6.1/ata-pata_ftide010-add-missing-module_description.patch new file mode 100644 index 00000000000..f89f6d5bda0 --- /dev/null +++ b/queue-6.1/ata-pata_ftide010-add-missing-module_description.patch @@ -0,0 +1,34 @@ +From 7274eef5729037300f29d14edeb334a47a098f65 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 24 Aug 2023 07:41:59 +0900 +Subject: ata: pata_ftide010: Add missing MODULE_DESCRIPTION + +From: Damien Le Moal + +commit 7274eef5729037300f29d14edeb334a47a098f65 upstream. + +Add the missing MODULE_DESCRIPTION() to avoid warnings such as: + +WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/ata/pata_ftide010.o + +when compiling with W=1. + +Fixes: be4e456ed3a5 ("ata: Add driver for Faraday Technology FTIDE010") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/pata_ftide010.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/ata/pata_ftide010.c ++++ b/drivers/ata/pata_ftide010.c +@@ -567,6 +567,7 @@ static struct platform_driver pata_ftide + }; + module_platform_driver(pata_ftide010_driver); + ++MODULE_DESCRIPTION("low level driver for Faraday Technology FTIDE010"); + MODULE_AUTHOR("Linus Walleij "); + MODULE_LICENSE("GPL"); + MODULE_ALIAS("platform:" DRV_NAME); diff --git a/queue-6.1/ata-sata_gemini-add-missing-module_description.patch b/queue-6.1/ata-sata_gemini-add-missing-module_description.patch new file mode 100644 index 00000000000..4dc5ff86c16 --- /dev/null +++ b/queue-6.1/ata-sata_gemini-add-missing-module_description.patch @@ -0,0 +1,34 @@ +From 8566572bf3b4d6e416a4bf2110dbb4817d11ba59 Mon Sep 17 00:00:00 2001 +From: Damien Le Moal +Date: Thu, 24 Aug 2023 07:43:18 +0900 +Subject: ata: sata_gemini: Add missing MODULE_DESCRIPTION + +From: Damien Le Moal + +commit 8566572bf3b4d6e416a4bf2110dbb4817d11ba59 upstream. + +Add the missing MODULE_DESCRIPTION() to avoid warnings such as: + +WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/ata/sata_gemini.o + +when compiling with W=1. + +Fixes: be4e456ed3a5 ("ata: Add driver for Faraday Technology FTIDE010") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Reviewed-by: Linus Walleij +Signed-off-by: Greg Kroah-Hartman +--- + drivers/ata/sata_gemini.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/ata/sata_gemini.c ++++ b/drivers/ata/sata_gemini.c +@@ -428,6 +428,7 @@ static struct platform_driver gemini_sat + }; + module_platform_driver(gemini_sata_driver); + ++MODULE_DESCRIPTION("low level driver for Cortina Systems Gemini SATA bridge"); + MODULE_AUTHOR("Linus Walleij "); + MODULE_LICENSE("GPL"); + MODULE_ALIAS("platform:" DRV_NAME); diff --git a/queue-6.1/btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch b/queue-6.1/btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch new file mode 100644 index 00000000000..b9b009e93ea --- /dev/null +++ b/queue-6.1/btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch @@ -0,0 +1,43 @@ +From 4490e803e1fe9fab8db5025e44e23b55df54078b Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 26 Jul 2023 16:56:57 +0100 +Subject: btrfs: don't start transaction when joining with TRANS_JOIN_NOSTART + +From: Filipe Manana + +commit 4490e803e1fe9fab8db5025e44e23b55df54078b upstream. + +When joining a transaction with TRANS_JOIN_NOSTART, if we don't find a +running transaction we end up creating one. This goes against the purpose +of TRANS_JOIN_NOSTART which is to join a running transaction if its state +is at or below the state TRANS_STATE_COMMIT_START, otherwise return an +-ENOENT error and don't start a new transaction. So fix this to not create +a new transaction if there's no running transaction at or below that +state. + +CC: stable@vger.kernel.org # 4.14+ +Fixes: a6d155d2e363 ("Btrfs: fix deadlock between fiemap and transaction commits") +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/transaction.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -279,10 +279,11 @@ loop: + spin_unlock(&fs_info->trans_lock); + + /* +- * If we are ATTACH, we just want to catch the current transaction, +- * and commit it. If there is no transaction, just return ENOENT. ++ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the ++ * current transaction, and commit it. If there is no transaction, just ++ * return ENOENT. + */ +- if (type == TRANS_ATTACH) ++ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART) + return -ENOENT; + + /* diff --git a/queue-6.1/btrfs-fix-start-transaction-qgroup-rsv-double-free.patch b/queue-6.1/btrfs-fix-start-transaction-qgroup-rsv-double-free.patch new file mode 100644 index 00000000000..ebd05eb05d9 --- /dev/null +++ b/queue-6.1/btrfs-fix-start-transaction-qgroup-rsv-double-free.patch @@ -0,0 +1,97 @@ +From a6496849671a5bc9218ecec25a983253b34351b1 Mon Sep 17 00:00:00 2001 +From: Boris Burkov +Date: Fri, 21 Jul 2023 09:02:07 -0700 +Subject: btrfs: fix start transaction qgroup rsv double free + +From: Boris Burkov + +commit a6496849671a5bc9218ecec25a983253b34351b1 upstream. + +btrfs_start_transaction reserves metadata space of the PERTRANS type +before it identifies a transaction to start/join. This allows flushing +when reserving that space without a deadlock. However, it results in a +race which temporarily breaks qgroup rsv accounting. + +T1 T2 +start_transaction +do_stuff + start_transaction + qgroup_reserve_meta_pertrans +commit_transaction + qgroup_free_meta_all_pertrans + hit an error starting txn + goto reserve_fail + qgroup_free_meta_pertrans (already freed!) + +The basic issue is that there is nothing preventing another commit from +committing before start_transaction finishes (in fact sometimes we +intentionally wait for it) so any error path that frees the reserve is +at risk of this race. + +While this exact space was getting freed anyway, and it's not a huge +deal to double free it (just a warning, the free code catches this), it +can result in incorrectly freeing some other pertrans reservation in +this same reservation, which could then lead to spuriously granting +reservations we might not have the space for. Therefore, I do believe it +is worth fixing. + +To fix it, use the existing prealloc->pertrans conversion mechanism. +When we first reserve the space, we reserve prealloc space and only when +we are sure we have a transaction do we convert it to pertrans. This way +any racing commits do not blow away our reservation, but we still get a +pertrans reservation that is freed when _this_ transaction gets committed. + +This issue can be reproduced by running generic/269 with either qgroups +or squotas enabled via mkfs on the scratch device. + +Reviewed-by: Josef Bacik +CC: stable@vger.kernel.org # 5.10+ +Signed-off-by: Boris Burkov +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/transaction.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/transaction.c ++++ b/fs/btrfs/transaction.c +@@ -580,8 +580,13 @@ start_transaction(struct btrfs_root *roo + u64 delayed_refs_bytes = 0; + + qgroup_reserved = num_items * fs_info->nodesize; +- ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved, +- enforce_qgroups); ++ /* ++ * Use prealloc for now, as there might be a currently running ++ * transaction that could free this reserved space prematurely ++ * by committing. ++ */ ++ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserved, ++ enforce_qgroups, false); + if (ret) + return ERR_PTR(ret); + +@@ -693,6 +698,14 @@ again: + h->reloc_reserved = reloc_reserved; + } + ++ /* ++ * Now that we have found a transaction to be a part of, convert the ++ * qgroup reservation from prealloc to pertrans. A different transaction ++ * can't race in and free our pertrans out from under us. ++ */ ++ if (qgroup_reserved) ++ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); ++ + got_it: + if (!current->journal_info) + current->journal_info = h; +@@ -740,7 +753,7 @@ alloc_fail: + btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv, + num_bytes, NULL); + reserve_fail: +- btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved); ++ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); + return ERR_PTR(ret); + } + diff --git a/queue-6.1/btrfs-free-qgroup-rsv-on-io-failure.patch b/queue-6.1/btrfs-free-qgroup-rsv-on-io-failure.patch new file mode 100644 index 00000000000..b3f6a9a5101 --- /dev/null +++ b/queue-6.1/btrfs-free-qgroup-rsv-on-io-failure.patch @@ -0,0 +1,46 @@ +From e28b02118b94e42be3355458a2406c6861e2dd32 Mon Sep 17 00:00:00 2001 +From: Boris Burkov +Date: Fri, 21 Jul 2023 09:02:06 -0700 +Subject: btrfs: free qgroup rsv on io failure + +From: Boris Burkov + +commit e28b02118b94e42be3355458a2406c6861e2dd32 upstream. + +If we do a write whose bio suffers an error, we will never reclaim the +qgroup reserved space for it. We allocate the space in the write_iter +codepath, then release the reservation as we allocate the ordered +extent, but we only create a delayed ref if the ordered extent finishes. +If it has an error, we simply leak the rsv. This is apparent in running +any error injecting (dmerror) fstests like btrfs/146 or btrfs/160. Such +tests fail due to dmesg on umount complaining about the leaked qgroup +data space. + +When we clean up other aspects of space on failed ordered_extents, also +free the qgroup rsv. + +Reviewed-by: Josef Bacik +CC: stable@vger.kernel.org # 5.10+ +Signed-off-by: Boris Burkov +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -3393,6 +3393,13 @@ out: + btrfs_free_reserved_extent(fs_info, + ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes, 1); ++ /* ++ * Actually free the qgroup rsv which was released when ++ * the ordered extent was created. ++ */ ++ btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid, ++ ordered_extent->qgroup_rsv, ++ BTRFS_QGROUP_RSV_DATA); + } + } + diff --git a/queue-6.1/btrfs-zoned-do-not-zone-finish-data-relocation-block-group.patch b/queue-6.1/btrfs-zoned-do-not-zone-finish-data-relocation-block-group.patch new file mode 100644 index 00000000000..666236cb40e --- /dev/null +++ b/queue-6.1/btrfs-zoned-do-not-zone-finish-data-relocation-block-group.patch @@ -0,0 +1,166 @@ +From 332581bde2a419d5f12a93a1cdc2856af649a3cc Mon Sep 17 00:00:00 2001 +From: Naohiro Aota +Date: Fri, 21 Jul 2023 16:42:14 +0900 +Subject: btrfs: zoned: do not zone finish data relocation block group + +From: Naohiro Aota + +commit 332581bde2a419d5f12a93a1cdc2856af649a3cc upstream. + +When multiple writes happen at once, we may need to sacrifice a currently +active block group to be zone finished for a new allocation. We choose a +block group with the least free space left, and zone finish it. + +To do the finishing, we need to send IOs for already allocated region +and wait for them and on-going IOs. Otherwise, these IOs fail because the +zone is already finished at the time the IO reach a device. + +However, if a block group dedicated to the data relocation is zone +finished, there is a chance that finishing it before an ongoing write IO +reaches the device. That is because there is timing gap between an +allocation is done (block_group->reservations == 0, as pre-allocation is +done) and an ordered extent is created when the relocation IO starts. +Thus, if we finish the zone between them, we can fail the IOs. + +We cannot simply use "fs_info->data_reloc_bg == block_group->start" to +avoid the zone finishing. Because, the data_reloc_bg may already switch to +a new block group, while there are still ongoing write IOs to the old +data_reloc_bg. + +So, this patch reworks the BLOCK_GROUP_FLAG_ZONED_DATA_RELOC bit to +indicate there is a data relocation allocation and/or ongoing write to the +block group. The bit is set on allocation and cleared in end_io function of +the last IO for the currently allocated region. + +To change the timing of the bit setting also solves the issue that the bit +being left even after there is no IO going on. With the current code, if +the data_reloc_bg switches after the last IO to the current data_reloc_bg, +the bit is set at this timing and there is no one clearing that bit. As a +result, that block group is kept unallocatable for anything. + +Fixes: 343d8a30851c ("btrfs: zoned: prevent allocation from previous data relocation BG") +Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group") +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Christoph Hellwig +Reviewed-by: Johannes Thumshirn +Signed-off-by: Naohiro Aota +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent-tree.c | 43 +++++++++++++++++++++++-------------------- + fs/btrfs/zoned.c | 16 +++++++++++++--- + 2 files changed, 36 insertions(+), 23 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -3810,7 +3810,8 @@ static int do_allocation_zoned(struct bt + fs_info->data_reloc_bg == 0); + + if (block_group->ro || +- test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { ++ (!ffe_ctl->for_data_reloc && ++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) { + ret = 1; + goto out; + } +@@ -3853,8 +3854,26 @@ static int do_allocation_zoned(struct bt + if (ffe_ctl->for_treelog && !fs_info->treelog_bg) + fs_info->treelog_bg = block_group->start; + +- if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg) +- fs_info->data_reloc_bg = block_group->start; ++ if (ffe_ctl->for_data_reloc) { ++ if (!fs_info->data_reloc_bg) ++ fs_info->data_reloc_bg = block_group->start; ++ /* ++ * Do not allow allocations from this block group, unless it is ++ * for data relocation. Compared to increasing the ->ro, setting ++ * the ->zoned_data_reloc_ongoing flag still allows nocow ++ * writers to come in. See btrfs_inc_nocow_writers(). ++ * ++ * We need to disable an allocation to avoid an allocation of ++ * regular (non-relocation data) extent. With mix of relocation ++ * extents and regular extents, we can dispatch WRITE commands ++ * (for relocation extents) and ZONE APPEND commands (for ++ * regular extents) at the same time to the same zone, which ++ * easily break the write pointer. ++ * ++ * Also, this flag avoids this block group to be zone finished. ++ */ ++ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags); ++ } + + ffe_ctl->found_offset = start + block_group->alloc_offset; + block_group->alloc_offset += num_bytes; +@@ -3872,24 +3891,8 @@ static int do_allocation_zoned(struct bt + out: + if (ret && ffe_ctl->for_treelog) + fs_info->treelog_bg = 0; +- if (ret && ffe_ctl->for_data_reloc && +- fs_info->data_reloc_bg == block_group->start) { +- /* +- * Do not allow further allocations from this block group. +- * Compared to increasing the ->ro, setting the +- * ->zoned_data_reloc_ongoing flag still allows nocow +- * writers to come in. See btrfs_inc_nocow_writers(). +- * +- * We need to disable an allocation to avoid an allocation of +- * regular (non-relocation data) extent. With mix of relocation +- * extents and regular extents, we can dispatch WRITE commands +- * (for relocation extents) and ZONE APPEND commands (for +- * regular extents) at the same time to the same zone, which +- * easily break the write pointer. +- */ +- set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags); ++ if (ret && ffe_ctl->for_data_reloc) + fs_info->data_reloc_bg = 0; +- } + spin_unlock(&fs_info->relocation_bg_lock); + spin_unlock(&fs_info->treelog_bg_lock); + spin_unlock(&block_group->lock); +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -2009,6 +2009,10 @@ static int do_zone_finish(struct btrfs_b + * and block_group->meta_write_pointer for metadata. + */ + if (!fully_written) { ++ if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { ++ spin_unlock(&block_group->lock); ++ return -EAGAIN; ++ } + spin_unlock(&block_group->lock); + + ret = btrfs_inc_block_group_ro(block_group, false); +@@ -2037,7 +2041,9 @@ static int do_zone_finish(struct btrfs_b + return 0; + } + +- if (block_group->reserved) { ++ if (block_group->reserved || ++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, ++ &block_group->runtime_flags)) { + spin_unlock(&block_group->lock); + btrfs_dec_block_group_ro(block_group); + return -EAGAIN; +@@ -2268,7 +2274,10 @@ void btrfs_zoned_release_data_reloc_bg(s + + /* All relocation extents are written. */ + if (block_group->start + block_group->alloc_offset == logical + length) { +- /* Now, release this block group for further allocations. */ ++ /* ++ * Now, release this block group for further allocations and ++ * zone finish. ++ */ + clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, + &block_group->runtime_flags); + } +@@ -2292,7 +2301,8 @@ int btrfs_zone_finish_one_bg(struct btrf + + spin_lock(&block_group->lock); + if (block_group->reserved || block_group->alloc_offset == 0 || +- (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) { ++ (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || ++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { + spin_unlock(&block_group->lock); + continue; + } diff --git a/queue-6.1/clocksource-drivers-arm_arch_timer-disable-timer-before-programming-cval.patch b/queue-6.1/clocksource-drivers-arm_arch_timer-disable-timer-before-programming-cval.patch new file mode 100644 index 00000000000..e04d1600916 --- /dev/null +++ b/queue-6.1/clocksource-drivers-arm_arch_timer-disable-timer-before-programming-cval.patch @@ -0,0 +1,58 @@ +From e7d65e40ab5a5940785c5922f317602d0268caaf Mon Sep 17 00:00:00 2001 +From: Walter Chang +Date: Mon, 17 Jul 2023 17:07:34 +0800 +Subject: clocksource/drivers/arm_arch_timer: Disable timer before programming CVAL + +From: Walter Chang + +commit e7d65e40ab5a5940785c5922f317602d0268caaf upstream. + +Due to the fact that the use of `writeq_relaxed()` to program CVAL is +not guaranteed to be atomic, it is necessary to disable the timer before +programming CVAL. + +However, if the MMIO timer is already enabled and has not yet expired, +there is a possibility of unexpected behavior occurring: when the CPU +enters the idle state during this period, and if the CPU's local event +is earlier than the broadcast event, the following process occurs: + +tick_broadcast_enter() + tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER) + __tick_broadcast_oneshot_control() + ___tick_broadcast_oneshot_control() + tick_broadcast_set_event() + clockevents_program_event() + set_next_event_mem() + +During this process, the MMIO timer remains enabled while programming +CVAL. To prevent such behavior, disable timer explicitly prior to +programming CVAL. + +Fixes: 8b82c4f883a7 ("clocksource/drivers/arm_arch_timer: Move MMIO timer programming over to CVAL") +Cc: stable@vger.kernel.org +Signed-off-by: Walter Chang +Acked-by: Marc Zyngier +Reviewed-by: AngeloGioacchino Del Regno +Signed-off-by: Daniel Lezcano +Link: https://lore.kernel.org/r/20230717090735.19370-1-walter.chang@mediatek.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/clocksource/arm_arch_timer.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/clocksource/arm_arch_timer.c ++++ b/drivers/clocksource/arm_arch_timer.c +@@ -773,6 +773,13 @@ static __always_inline void set_next_eve + u64 cnt; + + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk); ++ ++ /* Timer must be disabled before programming CVAL */ ++ if (ctrl & ARCH_TIMER_CTRL_ENABLE) { ++ ctrl &= ~ARCH_TIMER_CTRL_ENABLE; ++ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); ++ } ++ + ctrl |= ARCH_TIMER_CTRL_ENABLE; + ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; + diff --git a/queue-6.1/dmaengine-sh-rz-dmac-fix-destination-and-source-data-size-setting.patch b/queue-6.1/dmaengine-sh-rz-dmac-fix-destination-and-source-data-size-setting.patch new file mode 100644 index 00000000000..f1db285ec13 --- /dev/null +++ b/queue-6.1/dmaengine-sh-rz-dmac-fix-destination-and-source-data-size-setting.patch @@ -0,0 +1,64 @@ +From c6ec8c83a29fb3aec3efa6fabbf5344498f57c7f Mon Sep 17 00:00:00 2001 +From: Hien Huynh +Date: Thu, 6 Jul 2023 12:21:50 +0100 +Subject: dmaengine: sh: rz-dmac: Fix destination and source data size setting + +From: Hien Huynh + +commit c6ec8c83a29fb3aec3efa6fabbf5344498f57c7f upstream. + +Before setting DDS and SDS values, we need to clear its value first +otherwise, we get incorrect results when we change/update the DMA bus +width several times due to the 'OR' expression. + +Fixes: 5000d37042a6 ("dmaengine: sh: Add DMAC driver for RZ/G2L SoC") +Cc: stable@kernel.org +Signed-off-by: Hien Huynh +Signed-off-by: Biju Das +Reviewed-by: Geert Uytterhoeven +Link: https://lore.kernel.org/r/20230706112150.198941-3-biju.das.jz@bp.renesas.com +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma/sh/rz-dmac.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/drivers/dma/sh/rz-dmac.c ++++ b/drivers/dma/sh/rz-dmac.c +@@ -9,6 +9,7 @@ + * Copyright 2012 Javier Martin, Vista Silicon + */ + ++#include + #include + #include + #include +@@ -145,8 +146,8 @@ struct rz_dmac { + #define CHCFG_REQD BIT(3) + #define CHCFG_SEL(bits) ((bits) & 0x07) + #define CHCFG_MEM_COPY (0x80400008) +-#define CHCFG_FILL_DDS(a) (((a) << 16) & GENMASK(19, 16)) +-#define CHCFG_FILL_SDS(a) (((a) << 12) & GENMASK(15, 12)) ++#define CHCFG_FILL_DDS_MASK GENMASK(19, 16) ++#define CHCFG_FILL_SDS_MASK GENMASK(15, 12) + #define CHCFG_FILL_TM(a) (((a) & BIT(5)) << 22) + #define CHCFG_FILL_AM(a) (((a) & GENMASK(4, 2)) << 6) + #define CHCFG_FILL_LVL(a) (((a) & BIT(1)) << 5) +@@ -609,13 +610,15 @@ static int rz_dmac_config(struct dma_cha + if (val == CHCFG_DS_INVALID) + return -EINVAL; + +- channel->chcfg |= CHCFG_FILL_DDS(val); ++ channel->chcfg &= ~CHCFG_FILL_DDS_MASK; ++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val); + + val = rz_dmac_ds_to_val_mapping(config->src_addr_width); + if (val == CHCFG_DS_INVALID) + return -EINVAL; + +- channel->chcfg |= CHCFG_FILL_SDS(val); ++ channel->chcfg &= ~CHCFG_FILL_SDS_MASK; ++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val); + + return 0; + } diff --git a/queue-6.1/ext4-add-correct-group-descriptors-and-reserved-gdt-blocks-to-system-zone.patch b/queue-6.1/ext4-add-correct-group-descriptors-and-reserved-gdt-blocks-to-system-zone.patch new file mode 100644 index 00000000000..1a68fbf2434 --- /dev/null +++ b/queue-6.1/ext4-add-correct-group-descriptors-and-reserved-gdt-blocks-to-system-zone.patch @@ -0,0 +1,101 @@ +From 68228da51c9a436872a4ef4b5a7692e29f7e5bc7 Mon Sep 17 00:00:00 2001 +From: Wang Jianjian +Date: Thu, 3 Aug 2023 00:28:39 +0800 +Subject: ext4: add correct group descriptors and reserved GDT blocks to system zone + +From: Wang Jianjian + +commit 68228da51c9a436872a4ef4b5a7692e29f7e5bc7 upstream. + +When setup_system_zone, flex_bg is not initialized so it is always 1. +Use a new helper function, ext4_num_base_meta_blocks() which does not +depend on sbi->s_log_groups_per_flex being initialized. + +[ Squashed two patches in the Link URL's below together into a single + commit, which is simpler to review/understand. Also fix checkpatch + warnings. --TYT ] + +Cc: stable@kernel.org +Signed-off-by: Wang Jianjian +Link: https://lore.kernel.org/r/tencent_21AF0D446A9916ED5C51492CC6C9A0A77B05@qq.com +Link: https://lore.kernel.org/r/tencent_D744D1450CC169AEA77FCF0A64719909ED05@qq.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/balloc.c | 15 +++++++++++---- + fs/ext4/block_validity.c | 8 ++++---- + fs/ext4/ext4.h | 2 ++ + 3 files changed, 17 insertions(+), 8 deletions(-) + +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -910,11 +910,11 @@ unsigned long ext4_bg_num_gdb(struct sup + } + + /* +- * This function returns the number of file system metadata clusters at ++ * This function returns the number of file system metadata blocks at + * the beginning of a block group, including the reserved gdt blocks. + */ +-static unsigned ext4_num_base_meta_clusters(struct super_block *sb, +- ext4_group_t block_group) ++unsigned int ext4_num_base_meta_blocks(struct super_block *sb, ++ ext4_group_t block_group) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned num; +@@ -932,8 +932,15 @@ static unsigned ext4_num_base_meta_clust + } else { /* For META_BG_BLOCK_GROUPS */ + num += ext4_bg_num_gdb(sb, block_group); + } +- return EXT4_NUM_B2C(sbi, num); ++ return num; + } ++ ++static unsigned int ext4_num_base_meta_clusters(struct super_block *sb, ++ ext4_group_t block_group) ++{ ++ return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group)); ++} ++ + /** + * ext4_inode_to_goal_block - return a hint for block allocation + * @inode: inode for block allocation +--- a/fs/ext4/block_validity.c ++++ b/fs/ext4/block_validity.c +@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_ + struct ext4_system_blocks *system_blks; + struct ext4_group_desc *gdp; + ext4_group_t i; +- int flex_size = ext4_flex_bg_size(sbi); + int ret; + + system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); +@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_ + return -ENOMEM; + + for (i=0; i < ngroups; i++) { ++ unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i); ++ + cond_resched(); +- if (ext4_bg_has_super(sb, i) && +- ((i < 5) || ((i % flex_size) == 0))) { ++ if (meta_blks != 0) { + ret = add_system_zone(system_blks, + ext4_group_first_block_no(sb, i), +- ext4_bg_num_gdb(sb, i) + 1, 0); ++ meta_blks, 0); + if (ret) + goto err; + } +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -3096,6 +3096,8 @@ extern const char *ext4_decode_error(str + extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb, + ext4_group_t block_group, + unsigned int flags); ++extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb, ++ ext4_group_t block_group); + + extern __printf(7, 8) + void __ext4_error(struct super_block *, const char *, unsigned int, bool, diff --git a/queue-6.1/ext4-fix-memory-leaks-in-ext4_fname_-setup_filename-prepare_lookup.patch b/queue-6.1/ext4-fix-memory-leaks-in-ext4_fname_-setup_filename-prepare_lookup.patch new file mode 100644 index 00000000000..b2ec8caed53 --- /dev/null +++ b/queue-6.1/ext4-fix-memory-leaks-in-ext4_fname_-setup_filename-prepare_lookup.patch @@ -0,0 +1,49 @@ +From 7ca4b085f430f3774c3838b3da569ceccd6a0177 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= +Date: Thu, 3 Aug 2023 10:17:13 +0100 +Subject: ext4: fix memory leaks in ext4_fname_{setup_filename,prepare_lookup} +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Luís Henriques + +commit 7ca4b085f430f3774c3838b3da569ceccd6a0177 upstream. + +If the filename casefolding fails, we'll be leaking memory from the +fscrypt_name struct, namely from the 'crypto_buf.name' member. + +Make sure we free it in the error path on both ext4_fname_setup_filename() +and ext4_fname_prepare_lookup() functions. + +Cc: stable@kernel.org +Fixes: 1ae98e295fa2 ("ext4: optimize match for casefolded encrypted dirs") +Signed-off-by: Luís Henriques +Reviewed-by: Eric Biggers +Link: https://lore.kernel.org/r/20230803091713.13239-1-lhenriques@suse.de +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/crypto.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/ext4/crypto.c ++++ b/fs/ext4/crypto.c +@@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct ino + + #if IS_ENABLED(CONFIG_UNICODE) + err = ext4_fname_setup_ci_filename(dir, iname, fname); ++ if (err) ++ ext4_fname_free_filename(fname); + #endif + return err; + } +@@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct ino + + #if IS_ENABLED(CONFIG_UNICODE) + err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); ++ if (err) ++ ext4_fname_free_filename(fname); + #endif + return err; + } diff --git a/queue-6.1/f2fs-avoid-false-alarm-of-circular-locking.patch b/queue-6.1/f2fs-avoid-false-alarm-of-circular-locking.patch new file mode 100644 index 00000000000..7400f010800 --- /dev/null +++ b/queue-6.1/f2fs-avoid-false-alarm-of-circular-locking.patch @@ -0,0 +1,155 @@ +From 5c13e2388bf3426fd69a89eb46e50469e9624e56 Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Fri, 18 Aug 2023 11:34:32 -0700 +Subject: f2fs: avoid false alarm of circular locking + +From: Jaegeuk Kim + +commit 5c13e2388bf3426fd69a89eb46e50469e9624e56 upstream. + +====================================================== +WARNING: possible circular locking dependency detected +6.5.0-rc5-syzkaller-00353-gae545c3283dc #0 Not tainted +------------------------------------------------------ +syz-executor273/5027 is trying to acquire lock: +ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_down_write fs/f2fs/f2fs.h:2133 [inline] +ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644 + +but task is already holding lock: +ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_down_read fs/f2fs/f2fs.h:2108 [inline] +ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_add_dentry+0x92/0x230 fs/f2fs/dir.c:783 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (&fi->i_xattr_sem){.+.+}-{3:3}: + down_read+0x9c/0x470 kernel/locking/rwsem.c:1520 + f2fs_down_read fs/f2fs/f2fs.h:2108 [inline] + f2fs_getxattr+0xb1e/0x12c0 fs/f2fs/xattr.c:532 + __f2fs_get_acl+0x5a/0x900 fs/f2fs/acl.c:179 + f2fs_acl_create fs/f2fs/acl.c:377 [inline] + f2fs_init_acl+0x15c/0xb30 fs/f2fs/acl.c:420 + f2fs_init_inode_metadata+0x159/0x1290 fs/f2fs/dir.c:558 + f2fs_add_regular_entry+0x79e/0xb90 fs/f2fs/dir.c:740 + f2fs_add_dentry+0x1de/0x230 fs/f2fs/dir.c:788 + f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827 + f2fs_add_link fs/f2fs/f2fs.h:3554 [inline] + f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781 + vfs_mkdir+0x532/0x7e0 fs/namei.c:4117 + do_mkdirat+0x2a9/0x330 fs/namei.c:4140 + __do_sys_mkdir fs/namei.c:4160 [inline] + __se_sys_mkdir fs/namei.c:4158 [inline] + __x64_sys_mkdir+0xf2/0x140 fs/namei.c:4158 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +-> #0 (&fi->i_sem){+.+.}-{3:3}: + check_prev_add kernel/locking/lockdep.c:3142 [inline] + check_prevs_add kernel/locking/lockdep.c:3261 [inline] + validate_chain kernel/locking/lockdep.c:3876 [inline] + __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144 + lock_acquire kernel/locking/lockdep.c:5761 [inline] + lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726 + down_write+0x93/0x200 kernel/locking/rwsem.c:1573 + f2fs_down_write fs/f2fs/f2fs.h:2133 [inline] + f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644 + f2fs_add_dentry+0xa6/0x230 fs/f2fs/dir.c:784 + f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827 + f2fs_add_link fs/f2fs/f2fs.h:3554 [inline] + f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781 + vfs_mkdir+0x532/0x7e0 fs/namei.c:4117 + ovl_do_mkdir fs/overlayfs/overlayfs.h:196 [inline] + ovl_mkdir_real+0xb5/0x370 fs/overlayfs/dir.c:146 + ovl_workdir_create+0x3de/0x820 fs/overlayfs/super.c:309 + ovl_make_workdir fs/overlayfs/super.c:711 [inline] + ovl_get_workdir fs/overlayfs/super.c:864 [inline] + ovl_fill_super+0xdab/0x6180 fs/overlayfs/super.c:1400 + vfs_get_super+0xf9/0x290 fs/super.c:1152 + vfs_get_tree+0x88/0x350 fs/super.c:1519 + do_new_mount fs/namespace.c:3335 [inline] + path_mount+0x1492/0x1ed0 fs/namespace.c:3662 + do_mount fs/namespace.c:3675 [inline] + __do_sys_mount fs/namespace.c:3884 [inline] + __se_sys_mount fs/namespace.c:3861 [inline] + __x64_sys_mount+0x293/0x310 fs/namespace.c:3861 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + rlock(&fi->i_xattr_sem); + lock(&fi->i_sem); + lock(&fi->i_xattr_sem); + lock(&fi->i_sem); + +Cc: +Reported-and-tested-by: syzbot+e5600587fa9cbf8e3826@syzkaller.appspotmail.com +Fixes: 5eda1ad1aaff "f2fs: fix deadlock in i_xattr_sem and inode page lock" +Tested-by: Guenter Roeck +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/f2fs.h | 24 +++++++++++++++--------- + fs/f2fs/inline.c | 3 ++- + 2 files changed, 17 insertions(+), 10 deletions(-) + +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -2160,15 +2160,6 @@ static inline int f2fs_down_read_trylock + return down_read_trylock(&sem->internal_rwsem); + } + +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +-{ +- down_read_nested(&sem->internal_rwsem, subclass); +-} +-#else +-#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +-#endif +- + static inline void f2fs_up_read(struct f2fs_rwsem *sem) + { + up_read(&sem->internal_rwsem); +@@ -2179,6 +2170,21 @@ static inline void f2fs_down_write(struc + down_write(&sem->internal_rwsem); + } + ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) ++{ ++ down_read_nested(&sem->internal_rwsem, subclass); ++} ++ ++static inline void f2fs_down_write_nested(struct f2fs_rwsem *sem, int subclass) ++{ ++ down_write_nested(&sem->internal_rwsem, subclass); ++} ++#else ++#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) ++#define f2fs_down_write_nested(sem, subclass) f2fs_down_write(sem) ++#endif ++ + static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) + { + return down_write_trylock(&sem->internal_rwsem); +--- a/fs/f2fs/inline.c ++++ b/fs/f2fs/inline.c +@@ -642,7 +642,8 @@ int f2fs_add_inline_entry(struct inode * + } + + if (inode) { +- f2fs_down_write(&F2FS_I(inode)->i_sem); ++ f2fs_down_write_nested(&F2FS_I(inode)->i_sem, ++ SINGLE_DEPTH_NESTING); + page = f2fs_init_inode_metadata(inode, dir, fname, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); diff --git a/queue-6.1/f2fs-flush-inode-if-atomic-file-is-aborted.patch b/queue-6.1/f2fs-flush-inode-if-atomic-file-is-aborted.patch new file mode 100644 index 00000000000..11910c675af --- /dev/null +++ b/queue-6.1/f2fs-flush-inode-if-atomic-file-is-aborted.patch @@ -0,0 +1,118 @@ +From a3ab55746612247ce3dcaac6de66f5ffc055b9df Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Fri, 7 Jul 2023 07:03:13 -0700 +Subject: f2fs: flush inode if atomic file is aborted + +From: Jaegeuk Kim + +commit a3ab55746612247ce3dcaac6de66f5ffc055b9df upstream. + +Let's flush the inode being aborted atomic operation to avoid stale dirty +inode during eviction in this call stack: + + f2fs_mark_inode_dirty_sync+0x22/0x40 [f2fs] + f2fs_abort_atomic_write+0xc4/0xf0 [f2fs] + f2fs_evict_inode+0x3f/0x690 [f2fs] + ? sugov_start+0x140/0x140 + evict+0xc3/0x1c0 + evict_inodes+0x17b/0x210 + generic_shutdown_super+0x32/0x120 + kill_block_super+0x21/0x50 + deactivate_locked_super+0x31/0x90 + cleanup_mnt+0x100/0x160 + task_work_run+0x59/0x90 + do_exit+0x33b/0xa50 + do_group_exit+0x2d/0x80 + __x64_sys_exit_group+0x14/0x20 + do_syscall_64+0x3b/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +This triggers f2fs_bug_on() in f2fs_evict_inode: + f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); + +This fixes the syzbot report: + +loop0: detected capacity change from 0 to 131072 +F2FS-fs (loop0): invalid crc value +F2FS-fs (loop0): Found nat_bits in checkpoint +F2FS-fs (loop0): Mounted with checkpoint version = 48b305e4 +------------[ cut here ]------------ +kernel BUG at fs/f2fs/inode.c:869! +invalid opcode: 0000 [#1] PREEMPT SMP KASAN +CPU: 0 PID: 5014 Comm: syz-executor220 Not tainted 6.4.0-syzkaller-11479-g6cd06ab12d1a #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 +RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869 +Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc +RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 +RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007 +RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000 +R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50 +FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0 +Call Trace: + + evict+0x2ed/0x6b0 fs/inode.c:665 + dispose_list+0x117/0x1e0 fs/inode.c:698 + evict_inodes+0x345/0x440 fs/inode.c:748 + generic_shutdown_super+0xaf/0x480 fs/super.c:478 + kill_block_super+0x64/0xb0 fs/super.c:1417 + kill_f2fs_super+0x2af/0x3c0 fs/f2fs/super.c:4704 + deactivate_locked_super+0x98/0x160 fs/super.c:330 + deactivate_super+0xb1/0xd0 fs/super.c:361 + cleanup_mnt+0x2ae/0x3d0 fs/namespace.c:1254 + task_work_run+0x16f/0x270 kernel/task_work.c:179 + exit_task_work include/linux/task_work.h:38 [inline] + do_exit+0xa9a/0x29a0 kernel/exit.c:874 + do_group_exit+0xd4/0x2a0 kernel/exit.c:1024 + __do_sys_exit_group kernel/exit.c:1035 [inline] + __se_sys_exit_group kernel/exit.c:1033 [inline] + __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1033 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f309be71a09 +Code: Unable to access opcode bytes at 0x7f309be719df. +RSP: 002b:00007fff171df518 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 +RAX: ffffffffffffffda RBX: 00007f309bef7330 RCX: 00007f309be71a09 +RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000001 +RBP: 0000000000000001 R08: ffffffffffffffc0 R09: 00007f309bef1e40 +R10: 0000000000010600 R11: 0000000000000246 R12: 00007f309bef7330 +R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000001 + +Modules linked in: +---[ end trace 0000000000000000 ]--- +RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869 +Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc +RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 +RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007 +RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000 +R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50 +FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0 + +Cc: +Reported-and-tested-by: syzbot+e1246909d526a9d470fa@syzkaller.appspotmail.com +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/segment.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -204,6 +204,8 @@ void f2fs_abort_atomic_write(struct inod + f2fs_i_size_write(inode, fi->original_i_size); + fi->original_i_size = 0; + } ++ /* avoid stale dirty inode during eviction */ ++ sync_inode_metadata(inode, 0); + } + + static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, diff --git a/queue-6.1/fuse-nlookup-missing-decrement-in-fuse_direntplus_link.patch b/queue-6.1/fuse-nlookup-missing-decrement-in-fuse_direntplus_link.patch new file mode 100644 index 00000000000..e533c7c7d0b --- /dev/null +++ b/queue-6.1/fuse-nlookup-missing-decrement-in-fuse_direntplus_link.patch @@ -0,0 +1,47 @@ +From b8bd342d50cbf606666488488f9fea374aceb2d5 Mon Sep 17 00:00:00 2001 +From: ruanmeisi +Date: Tue, 25 Apr 2023 19:13:54 +0800 +Subject: fuse: nlookup missing decrement in fuse_direntplus_link + +From: ruanmeisi + +commit b8bd342d50cbf606666488488f9fea374aceb2d5 upstream. + +During our debugging of glusterfs, we found an Assertion failed error: +inode_lookup >= nlookup, which was caused by the nlookup value in the +kernel being greater than that in the FUSE file system. + +The issue was introduced by fuse_direntplus_link, where in the function, +fuse_iget increments nlookup, and if d_splice_alias returns failure, +fuse_direntplus_link returns failure without decrementing nlookup +https://github.com/gluster/glusterfs/pull/4081 + +Signed-off-by: ruanmeisi +Fixes: 0b05b18381ee ("fuse: implement NFS-like readdirplus support") +Cc: # v3.9 +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/fuse/readdir.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/fs/fuse/readdir.c ++++ b/fs/fuse/readdir.c +@@ -243,8 +243,16 @@ retry: + dput(dentry); + dentry = alias; + } +- if (IS_ERR(dentry)) ++ if (IS_ERR(dentry)) { ++ if (!IS_ERR(inode)) { ++ struct fuse_inode *fi = get_fuse_inode(inode); ++ ++ spin_lock(&fi->lock); ++ fi->nlookup--; ++ spin_unlock(&fi->lock); ++ } + return PTR_ERR(dentry); ++ } + } + if (fc->readdirplus_auto) + set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); diff --git a/queue-6.1/hwspinlock-qcom-add-missing-regmap-config-for-sfpb-mmio-implementation.patch b/queue-6.1/hwspinlock-qcom-add-missing-regmap-config-for-sfpb-mmio-implementation.patch new file mode 100644 index 00000000000..782de43b35b --- /dev/null +++ b/queue-6.1/hwspinlock-qcom-add-missing-regmap-config-for-sfpb-mmio-implementation.patch @@ -0,0 +1,53 @@ +From 23316be8a9d450f33a21f1efe7d89570becbec58 Mon Sep 17 00:00:00 2001 +From: Christian Marangi +Date: Sun, 16 Jul 2023 04:28:04 +0200 +Subject: hwspinlock: qcom: add missing regmap config for SFPB MMIO implementation + +From: Christian Marangi + +commit 23316be8a9d450f33a21f1efe7d89570becbec58 upstream. + +Commit 5d4753f741d8 ("hwspinlock: qcom: add support for MMIO on older +SoCs") introduced and made regmap_config mandatory in the of_data struct +but didn't add the regmap_config for sfpb based devices. + +SFPB based devices can both use the legacy syscon way to probe or the +new MMIO way and currently device that use the MMIO way are broken as +they lack the definition of the now required regmap_config and always +return -EINVAL (and indirectly makes fail probing everything that +depends on it, smem, nandc with smem-parser...) + +Fix this by correctly adding the missing regmap_config and restore +function of hwspinlock on SFPB based devices with MMIO implementation. + +Cc: stable@vger.kernel.org +Fixes: 5d4753f741d8 ("hwspinlock: qcom: add support for MMIO on older SoCs") +Signed-off-by: Christian Marangi +Link: https://lore.kernel.org/r/20230716022804.21239-1-ansuelsmth@gmail.com +Signed-off-by: Bjorn Andersson +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hwspinlock/qcom_hwspinlock.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/hwspinlock/qcom_hwspinlock.c ++++ b/drivers/hwspinlock/qcom_hwspinlock.c +@@ -69,9 +69,18 @@ static const struct hwspinlock_ops qcom_ + .unlock = qcom_hwspinlock_unlock, + }; + ++static const struct regmap_config sfpb_mutex_config = { ++ .reg_bits = 32, ++ .reg_stride = 4, ++ .val_bits = 32, ++ .max_register = 0x100, ++ .fast_io = true, ++}; ++ + static const struct qcom_hwspinlock_of_data of_sfpb_mutex = { + .offset = 0x4, + .stride = 0x4, ++ .regmap_config = &sfpb_mutex_config, + }; + + static const struct regmap_config tcsr_msm8226_mutex_config = { diff --git a/queue-6.1/jbd2-check-jh-b_transaction-before-removing-it-from-checkpoint.patch b/queue-6.1/jbd2-check-jh-b_transaction-before-removing-it-from-checkpoint.patch new file mode 100644 index 00000000000..2b851f5efaa --- /dev/null +++ b/queue-6.1/jbd2-check-jh-b_transaction-before-removing-it-from-checkpoint.patch @@ -0,0 +1,67 @@ +From 590a809ff743e7bd890ba5fb36bc38e20a36de53 Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Fri, 14 Jul 2023 10:55:27 +0800 +Subject: jbd2: check 'jh->b_transaction' before removing it from checkpoint +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Zhihao Cheng + +commit 590a809ff743e7bd890ba5fb36bc38e20a36de53 upstream. + +Following process will corrupt ext4 image: +Step 1: +jbd2_journal_commit_transaction + __jbd2_journal_insert_checkpoint(jh, commit_transaction) + // Put jh into trans1->t_checkpoint_list + journal->j_checkpoint_transactions = commit_transaction + // Put trans1 into journal->j_checkpoint_transactions + +Step 2: +do_get_write_access + test_clear_buffer_dirty(bh) // clear buffer dirty,set jbd dirty + __jbd2_journal_file_buffer(jh, transaction) // jh belongs to trans2 + +Step 3: +drop_cache + journal_shrink_one_cp_list + jbd2_journal_try_remove_checkpoint + if (!trylock_buffer(bh)) // lock bh, true + if (buffer_dirty(bh)) // buffer is not dirty + __jbd2_journal_remove_checkpoint(jh) + // remove jh from trans1->t_checkpoint_list + +Step 4: +jbd2_log_do_checkpoint + trans1 = journal->j_checkpoint_transactions + // jh is not in trans1->t_checkpoint_list + jbd2_cleanup_journal_tail(journal) // trans1 is done + +Step 5: Power cut, trans2 is not committed, jh is lost in next mounting. + +Fix it by checking 'jh->b_transaction' before remove it from checkpoint. + +Cc: stable@kernel.org +Fixes: 46f881b5b175 ("jbd2: fix a race when checking checkpoint buffer busy") +Signed-off-by: Zhihao Cheng +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230714025528.564988-3-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/checkpoint.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -639,6 +639,8 @@ int jbd2_journal_try_remove_checkpoint(s + { + struct buffer_head *bh = jh2bh(jh); + ++ if (jh->b_transaction) ++ return -EBUSY; + if (!trylock_buffer(bh)) + return -EBUSY; + if (buffer_dirty(bh)) { diff --git a/queue-6.1/jbd2-correct-the-end-of-the-journal-recovery-scan-range.patch b/queue-6.1/jbd2-correct-the-end-of-the-journal-recovery-scan-range.patch new file mode 100644 index 00000000000..27ca4f46182 --- /dev/null +++ b/queue-6.1/jbd2-correct-the-end-of-the-journal-recovery-scan-range.patch @@ -0,0 +1,76 @@ +From 2dfba3bb40ad8536b9fa802364f2d40da31aa88e Mon Sep 17 00:00:00 2001 +From: Zhang Yi +Date: Mon, 26 Jun 2023 15:33:22 +0800 +Subject: jbd2: correct the end of the journal recovery scan range + +From: Zhang Yi + +commit 2dfba3bb40ad8536b9fa802364f2d40da31aa88e upstream. + +We got a filesystem inconsistency issue below while running generic/475 +I/O failure pressure test with fast_commit feature enabled. + + Symlink /p3/d3/d1c/d6c/dd6/dce/l101 (inode #132605) is invalid. + +If fast_commit feature is enabled, a special fast_commit journal area is +appended to the end of the normal journal area. The journal->j_last +point to the first unused block behind the normal journal area instead +of the whole log area, and the journal->j_fc_last point to the first +unused block behind the fast_commit journal area. While doing journal +recovery, do_one_pass(PASS_SCAN) should first scan the normal journal +area and turn around to the first block once it meet journal->j_last, +but the wrap() macro misuse the journal->j_fc_last, so the recovering +could not read the next magic block (commit block perhaps) and would end +early mistakenly and missing tN and every transaction after it in the +following example. Finally, it could lead to filesystem inconsistency. + + | normal journal area | fast commit area | + +-------------------------------------------------+------------------+ + | tN(rere) | tN+1 |~| tN-x |...| tN-1 | tN(front) | .... | + +-------------------------------------------------+------------------+ + / / / + start journal->j_last journal->j_fc_last + +This patch fix it by use the correct ending journal->j_last. + +Fixes: 5b849b5f96b4 ("jbd2: fast commit recovery path") +Cc: stable@kernel.org +Reported-by: Theodore Ts'o +Link: https://lore.kernel.org/linux-ext4/20230613043120.GB1584772@mit.edu/ +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230626073322.3956567-1-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/recovery.c | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +--- a/fs/jbd2/recovery.c ++++ b/fs/jbd2/recovery.c +@@ -229,12 +229,8 @@ static int count_tags(journal_t *journal + /* Make sure we wrap around the log correctly! */ + #define wrap(journal, var) \ + do { \ +- unsigned long _wrap_last = \ +- jbd2_has_feature_fast_commit(journal) ? \ +- (journal)->j_fc_last : (journal)->j_last; \ +- \ +- if (var >= _wrap_last) \ +- var -= (_wrap_last - (journal)->j_first); \ ++ if (var >= (journal)->j_last) \ ++ var -= ((journal)->j_last - (journal)->j_first); \ + } while (0) + + static int fc_do_one_pass(journal_t *journal, +@@ -517,9 +513,7 @@ static int do_one_pass(journal_t *journa + break; + + jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", +- next_commit_ID, next_log_block, +- jbd2_has_feature_fast_commit(journal) ? +- journal->j_fc_last : journal->j_last); ++ next_commit_ID, next_log_block, journal->j_last); + + /* Skip over each chunk of the transaction looking + * either the next descriptor block or the final commit diff --git a/queue-6.1/jbd2-fix-checkpoint-cleanup-performance-regression.patch b/queue-6.1/jbd2-fix-checkpoint-cleanup-performance-regression.patch new file mode 100644 index 00000000000..422c4224341 --- /dev/null +++ b/queue-6.1/jbd2-fix-checkpoint-cleanup-performance-regression.patch @@ -0,0 +1,123 @@ +From 373ac521799d9e97061515aca6ec6621789036bb Mon Sep 17 00:00:00 2001 +From: Zhang Yi +Date: Fri, 14 Jul 2023 10:55:26 +0800 +Subject: jbd2: fix checkpoint cleanup performance regression + +From: Zhang Yi + +commit 373ac521799d9e97061515aca6ec6621789036bb upstream. + +journal_clean_one_cp_list() has been merged into +journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the +committing process is just a best effort, it should stop scan once it +meet a busy buffer, or else it will cause a lot of invalid buffer scan +and checks. We catch a performance regression when doing fs_mark tests +below. + +Test cmd: + ./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100 + +Before merging checkpoint buffer cleanup: + FSUse% Count Size Files/sec App Overhead + 95 10000 1024 8304.9 49033 + +After merging checkpoint buffer cleanup: + FSUse% Count Size Files/sec App Overhead + 95 10000 1024 7649.0 50012 + FSUse% Count Size Files/sec App Overhead + 95 10000 1024 2107.1 50871 + +After merging checkpoint buffer cleanup, the total loop count in +journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~ +100,000+ in general), most of them are invalid. This patch fix it +through passing 'shrink_type' into journal_shrink_one_cp_list() and add +a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy +buffer. After fix, the loop count descending back to 10,000+. + +After this fix: + FSUse% Count Size Files/sec App Overhead + 95 10000 1024 8558.4 49109 + +Cc: stable@kernel.org +Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()") +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/checkpoint.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -349,6 +349,8 @@ int jbd2_cleanup_journal_tail(journal_t + + /* Checkpoint list management */ + ++enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP}; ++ + /* + * journal_shrink_one_cp_list + * +@@ -360,7 +362,8 @@ int jbd2_cleanup_journal_tail(journal_t + * Called with j_list_lock held. + */ + static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, +- bool destroy, bool *released) ++ enum shrink_type type, ++ bool *released) + { + struct journal_head *last_jh; + struct journal_head *next_jh = jh; +@@ -376,12 +379,15 @@ static unsigned long journal_shrink_one_ + jh = next_jh; + next_jh = jh->b_cpnext; + +- if (destroy) { ++ if (type == SHRINK_DESTROY) { + ret = __jbd2_journal_remove_checkpoint(jh); + } else { + ret = jbd2_journal_try_remove_checkpoint(jh); +- if (ret < 0) +- continue; ++ if (ret < 0) { ++ if (type == SHRINK_BUSY_SKIP) ++ continue; ++ break; ++ } + } + + nr_freed++; +@@ -445,7 +451,7 @@ again: + tid = transaction->t_tid; + + freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list, +- false, &released); ++ SHRINK_BUSY_SKIP, &released); + nr_freed += freed; + (*nr_to_scan) -= min(*nr_to_scan, freed); + if (*nr_to_scan == 0) +@@ -485,19 +491,21 @@ out: + void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) + { + transaction_t *transaction, *last_transaction, *next_transaction; ++ enum shrink_type type; + bool released; + + transaction = journal->j_checkpoint_transactions; + if (!transaction) + return; + ++ type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP; + last_transaction = transaction->t_cpprev; + next_transaction = transaction; + do { + transaction = next_transaction; + next_transaction = transaction->t_cpnext; + journal_shrink_one_cp_list(transaction->t_checkpoint_list, +- destroy, &released); ++ type, &released); + /* + * This function only frees up some memory if possible so we + * dont have an obligation to finish processing. Bail out if diff --git a/queue-6.1/lib-test_scanf-add-explicit-type-cast-to-result-initialization-in-test_number_prefix.patch b/queue-6.1/lib-test_scanf-add-explicit-type-cast-to-result-initialization-in-test_number_prefix.patch new file mode 100644 index 00000000000..8031f4628ae --- /dev/null +++ b/queue-6.1/lib-test_scanf-add-explicit-type-cast-to-result-initialization-in-test_number_prefix.patch @@ -0,0 +1,53 @@ +From 92382d744176f230101d54f5c017bccd62770f01 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Mon, 7 Aug 2023 08:36:28 -0700 +Subject: lib: test_scanf: Add explicit type cast to result initialization in test_number_prefix() + +From: Nathan Chancellor + +commit 92382d744176f230101d54f5c017bccd62770f01 upstream. + +A recent change in clang allows it to consider more expressions as +compile time constants, which causes it to point out an implicit +conversion in the scanf tests: + + lib/test_scanf.c:661:2: warning: implicit conversion from 'int' to 'unsigned char' changes value from -168 to 88 [-Wconstant-conversion] + 661 | test_number_prefix(unsigned char, "0xA7", "%2hhx%hhx", 0, 0xa7, 2, check_uchar); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + lib/test_scanf.c:609:29: note: expanded from macro 'test_number_prefix' + 609 | T result[2] = {~expect[0], ~expect[1]}; \ + | ~ ^~~~~~~~~~ + 1 warning generated. + +The result of the bitwise negation is the type of the operand after +going through the integer promotion rules, so this truncation is +expected but harmless, as the initial values in the result array get +overwritten by _test() anyways. Add an explicit cast to the expected +type in test_number_prefix() to silence the warning. There is no +functional change, as all the tests still pass with GCC 13.1.0 and clang +18.0.0. + +Cc: stable@vger.kernel.org +Link: https://github.com/ClangBuiltLinux/linuxq/issues/1899 +Link: https://github.com/llvm/llvm-project/commit/610ec954e1f81c0e8fcadedcd25afe643f5a094e +Suggested-by: Nick Desaulniers +Signed-off-by: Nathan Chancellor +Reviewed-by: Petr Mladek +Signed-off-by: Petr Mladek +Link: https://lore.kernel.org/r/20230807-test_scanf-wconstant-conversion-v2-1-839ca39083e1@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + lib/test_scanf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/lib/test_scanf.c ++++ b/lib/test_scanf.c +@@ -606,7 +606,7 @@ static void __init numbers_slice(void) + #define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn) \ + do { \ + const T expect[2] = { expect0, expect1 }; \ +- T result[2] = {~expect[0], ~expect[1]}; \ ++ T result[2] = { (T)~expect[0], (T)~expect[1] }; \ + \ + _test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]); \ + } while (0) diff --git a/queue-6.1/series b/queue-6.1/series index 95769c11723..fc96d404277 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -135,3 +135,24 @@ net-hns3-remove-gso-partial-feature-bit.patch sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch multi-gen-lru-avoid-race-in-inc_min_seq.patch net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch +arc-atomics-add-compiler-barrier-to-atomic-operations.patch +clocksource-drivers-arm_arch_timer-disable-timer-before-programming-cval.patch +dmaengine-sh-rz-dmac-fix-destination-and-source-data-size-setting.patch +jbd2-fix-checkpoint-cleanup-performance-regression.patch +jbd2-check-jh-b_transaction-before-removing-it-from-checkpoint.patch +jbd2-correct-the-end-of-the-journal-recovery-scan-range.patch +ext4-add-correct-group-descriptors-and-reserved-gdt-blocks-to-system-zone.patch +ext4-fix-memory-leaks-in-ext4_fname_-setup_filename-prepare_lookup.patch +f2fs-flush-inode-if-atomic-file-is-aborted.patch +f2fs-avoid-false-alarm-of-circular-locking.patch +lib-test_scanf-add-explicit-type-cast-to-result-initialization-in-test_number_prefix.patch +hwspinlock-qcom-add-missing-regmap-config-for-sfpb-mmio-implementation.patch +ata-ahci-add-elkhart-lake-ahci-controller.patch +ata-pata_falcon-fix-io-base-selection-for-q40.patch +ata-sata_gemini-add-missing-module_description.patch +ata-pata_ftide010-add-missing-module_description.patch +fuse-nlookup-missing-decrement-in-fuse_direntplus_link.patch +btrfs-zoned-do-not-zone-finish-data-relocation-block-group.patch +btrfs-fix-start-transaction-qgroup-rsv-double-free.patch +btrfs-free-qgroup-rsv-on-io-failure.patch +btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch