--- /dev/null
+From 42f51fb24fd39cc547c086ab3d8a314cc603a91c Mon Sep 17 00:00:00 2001
+From: Pavel Kozlov <pavel.kozlov@synopsys.com>
+Date: Tue, 15 Aug 2023 19:11:36 +0400
+Subject: ARC: atomics: Add compiler barrier to atomic operations...
+
+From: Pavel Kozlov <pavel.kozlov@synopsys.com>
+
+commit 42f51fb24fd39cc547c086ab3d8a314cc603a91c upstream.
+
+... to avoid unwanted gcc optimizations
+
+SMP kernels fail to boot with commit 596ff4a09b89
+("cpumask: re-introduce constant-sized cpumask optimizations").
+
+|
+| percpu: BUG: failure at mm/percpu.c:2981/pcpu_build_alloc_info()!
+|
+
+The write operation performed by the SCOND instruction in the atomic
+inline asm code is not properly passed to the compiler. The compiler
+cannot correctly optimize a nested loop that runs through the cpumask
+in the pcpu_build_alloc_info() function.
+
+Fix this by add a compiler barrier (memory clobber in inline asm).
+
+Apparently atomic ops used to have memory clobber implicitly via
+surrounding smp_mb(). However commit b64be6836993c431e
+("ARC: atomics: implement relaxed variants") removed the smp_mb() for
+the relaxed variants, but failed to add the explicit compiler barrier.
+
+Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/135
+Cc: <stable@vger.kernel.org> # v6.3+
+Fixes: b64be6836993c43 ("ARC: atomics: implement relaxed variants")
+Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
+Signed-off-by: Vineet Gupta <vgupta@kernel.org>
+[vgupta: tweaked the changelog and added Fixes tag]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/include/asm/atomic-llsc.h | 6 +++---
+ arch/arc/include/asm/atomic64-arcv2.h | 6 +++---
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/arc/include/asm/atomic-llsc.h
++++ b/arch/arc/include/asm/atomic-llsc.h
+@@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int
+ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
+ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ } \
+
+ #define ATOMIC_OP_RETURN(op, asm_op) \
+@@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_ret
+ : [val] "=&r" (val) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ \
+ return val; \
+ }
+@@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op
+ [orig] "=&r" (orig) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ \
+ return orig; \
+ }
+--- a/arch/arc/include/asm/atomic64-arcv2.h
++++ b/arch/arc/include/asm/atomic64-arcv2.h
+@@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s6
+ " bnz 1b \n" \
+ : "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); \
++ : "cc", "memory"); \
+ } \
+
+ #define ATOMIC64_OP_RETURN(op, op1, op2) \
+@@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_r
+ " bnz 1b \n" \
+ : [val] "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); /* memory clobber comes from smp_mb() */ \
++ : "cc", "memory"); \
+ \
+ return val; \
+ }
+@@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_##
+ " bnz 1b \n" \
+ : "=&r"(orig), "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); /* memory clobber comes from smp_mb() */ \
++ : "cc", "memory"); \
+ \
+ return orig; \
+ }
--- /dev/null
+From 2a2df98ec592667927b5c1351afa6493ea125c9f Mon Sep 17 00:00:00 2001
+From: Werner Fischer <devlists@wefi.net>
+Date: Tue, 29 Aug 2023 13:33:58 +0200
+Subject: ata: ahci: Add Elkhart Lake AHCI controller
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Werner Fischer <devlists@wefi.net>
+
+commit 2a2df98ec592667927b5c1351afa6493ea125c9f upstream.
+
+Elkhart Lake is the successor of Apollo Lake and Gemini Lake. These
+CPUs and their PCHs are used in mobile and embedded environments.
+
+With this patch I suggest that Elkhart Lake SATA controllers [1] should
+use the default LPM policy for mobile chipsets.
+The disadvantage of missing hot-plug support with this setting should
+not be an issue, as those CPUs are used in embedded environments and
+not in servers with hot-plug backplanes.
+
+We discovered that the Elkhart Lake SATA controllers have been missing
+in ahci.c after a customer reported the throttling of his SATA SSD
+after a short period of higher I/O. We determined the high temperature
+of the SSD controller in idle mode as the root cause for that.
+
+Depending on the used SSD, we have seen up to 1.8 Watt lower system
+idle power usage and up to 30°C lower SSD controller temperatures in
+our tests, when we set med_power_with_dipm manually. I have provided a
+table showing seven different SATA SSDs from ATP, Intel/Solidigm and
+Samsung [2].
+
+Intel lists a total of 3 SATA controller IDs (4B60, 4B62, 4B63) in [1]
+for those mobile PCHs.
+This commit just adds 0x4b63 as I do not have test systems with 0x4b60
+and 0x4b62 SATA controllers.
+I have tested this patch with a system which uses 0x4b63 as SATA
+controller.
+
+[1] https://sata-io.org/product/8803
+[2] https://www.thomas-krenn.com/en/wiki/SATA_Link_Power_Management#Example_LES_v4
+
+Signed-off-by: Werner Fischer <devlists@wefi.net>
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/ahci.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -422,6 +422,8 @@ static const struct pci_device_id ahci_p
+ { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
+ { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
++ /* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */
++ { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_low_power }, /* Elkhart Lake AHCI */
+
+ /* JMicron 360/1/3/5/6, match class to avoid IDE function */
+ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
--- /dev/null
+From 8a1f00b753ecfdb117dc1a07e68c46d80e7923ea Mon Sep 17 00:00:00 2001
+From: Michael Schmitz <schmitzmic@gmail.com>
+Date: Sun, 27 Aug 2023 16:13:47 +1200
+Subject: ata: pata_falcon: fix IO base selection for Q40
+
+From: Michael Schmitz <schmitzmic@gmail.com>
+
+commit 8a1f00b753ecfdb117dc1a07e68c46d80e7923ea upstream.
+
+With commit 44b1fbc0f5f3 ("m68k/q40: Replace q40ide driver
+with pata_falcon and falconide"), the Q40 IDE driver was
+replaced by pata_falcon.c.
+
+Both IO and memory resources were defined for the Q40 IDE
+platform device, but definition of the IDE register addresses
+was modeled after the Falcon case, both in use of the memory
+resources and in including register shift and byte vs. word
+offset in the address.
+
+This was correct for the Falcon case, which does not apply
+any address translation to the register addresses. In the
+Q40 case, all of device base address, byte access offset
+and register shift is included in the platform specific
+ISA access translation (in asm/mm_io.h).
+
+As a consequence, such address translation gets applied
+twice, and register addresses are mangled.
+
+Use the device base address from the platform IO resource
+for Q40 (the IO address translation will then add the correct
+ISA window base address and byte access offset), with register
+shift 1. Use MMIO base address and register shift 2 as before
+for Falcon.
+
+Encode PIO_OFFSET into IO port addresses for all registers
+for Q40 except the data transfer register. Encode the MMIO
+offset there (pata_falcon_data_xfer() directly uses raw IO
+with no address translation).
+
+Reported-by: William R Sowerbutts <will@sowerbutts.com>
+Closes: https://lore.kernel.org/r/CAMuHMdUU62jjunJh9cqSqHT87B0H0A4udOOPs=WN7WZKpcagVA@mail.gmail.com
+Link: https://lore.kernel.org/r/CAMuHMdUU62jjunJh9cqSqHT87B0H0A4udOOPs=WN7WZKpcagVA@mail.gmail.com
+Fixes: 44b1fbc0f5f3 ("m68k/q40: Replace q40ide driver with pata_falcon and falconide")
+Cc: stable@vger.kernel.org
+Cc: Finn Thain <fthain@linux-m68k.org>
+Cc: Geert Uytterhoeven <geert@linux-m68k.org>
+Tested-by: William R Sowerbutts <will@sowerbutts.com>
+Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
+Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/pata_falcon.c | 50 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 29 insertions(+), 21 deletions(-)
+
+--- a/drivers/ata/pata_falcon.c
++++ b/drivers/ata/pata_falcon.c
+@@ -123,8 +123,8 @@ static int __init pata_falcon_init_one(s
+ struct resource *base_res, *ctl_res, *irq_res;
+ struct ata_host *host;
+ struct ata_port *ap;
+- void __iomem *base;
+- int irq = 0;
++ void __iomem *base, *ctl_base;
++ int irq = 0, io_offset = 1, reg_shift = 2; /* Falcon defaults */
+
+ dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n");
+
+@@ -165,26 +165,34 @@ static int __init pata_falcon_init_one(s
+ ap->pio_mask = ATA_PIO4;
+ ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
+
+- base = (void __iomem *)base_mem_res->start;
+ /* N.B. this assumes data_addr will be used for word-sized I/O only */
+- ap->ioaddr.data_addr = base + 0 + 0 * 4;
+- ap->ioaddr.error_addr = base + 1 + 1 * 4;
+- ap->ioaddr.feature_addr = base + 1 + 1 * 4;
+- ap->ioaddr.nsect_addr = base + 1 + 2 * 4;
+- ap->ioaddr.lbal_addr = base + 1 + 3 * 4;
+- ap->ioaddr.lbam_addr = base + 1 + 4 * 4;
+- ap->ioaddr.lbah_addr = base + 1 + 5 * 4;
+- ap->ioaddr.device_addr = base + 1 + 6 * 4;
+- ap->ioaddr.status_addr = base + 1 + 7 * 4;
+- ap->ioaddr.command_addr = base + 1 + 7 * 4;
+-
+- base = (void __iomem *)ctl_mem_res->start;
+- ap->ioaddr.altstatus_addr = base + 1;
+- ap->ioaddr.ctl_addr = base + 1;
+-
+- ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
+- (unsigned long)base_mem_res->start,
+- (unsigned long)ctl_mem_res->start);
++ ap->ioaddr.data_addr = (void __iomem *)base_mem_res->start;
++
++ if (base_res) { /* only Q40 has IO resources */
++ io_offset = 0x10000;
++ reg_shift = 0;
++ base = (void __iomem *)base_res->start;
++ ctl_base = (void __iomem *)ctl_res->start;
++ } else {
++ base = (void __iomem *)base_mem_res->start;
++ ctl_base = (void __iomem *)ctl_mem_res->start;
++ }
++
++ ap->ioaddr.error_addr = base + io_offset + (1 << reg_shift);
++ ap->ioaddr.feature_addr = base + io_offset + (1 << reg_shift);
++ ap->ioaddr.nsect_addr = base + io_offset + (2 << reg_shift);
++ ap->ioaddr.lbal_addr = base + io_offset + (3 << reg_shift);
++ ap->ioaddr.lbam_addr = base + io_offset + (4 << reg_shift);
++ ap->ioaddr.lbah_addr = base + io_offset + (5 << reg_shift);
++ ap->ioaddr.device_addr = base + io_offset + (6 << reg_shift);
++ ap->ioaddr.status_addr = base + io_offset + (7 << reg_shift);
++ ap->ioaddr.command_addr = base + io_offset + (7 << reg_shift);
++
++ ap->ioaddr.altstatus_addr = ctl_base + io_offset;
++ ap->ioaddr.ctl_addr = ctl_base + io_offset;
++
++ ata_port_desc(ap, "cmd %px ctl %px data %px",
++ base, ctl_base, ap->ioaddr.data_addr);
+
+ irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (irq_res && irq_res->start > 0) {
--- /dev/null
+From 7274eef5729037300f29d14edeb334a47a098f65 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Thu, 24 Aug 2023 07:41:59 +0900
+Subject: ata: pata_ftide010: Add missing MODULE_DESCRIPTION
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 7274eef5729037300f29d14edeb334a47a098f65 upstream.
+
+Add the missing MODULE_DESCRIPTION() to avoid warnings such as:
+
+WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/ata/pata_ftide010.o
+
+when compiling with W=1.
+
+Fixes: be4e456ed3a5 ("ata: Add driver for Faraday Technology FTIDE010")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/pata_ftide010.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/ata/pata_ftide010.c
++++ b/drivers/ata/pata_ftide010.c
+@@ -567,6 +567,7 @@ static struct platform_driver pata_ftide
+ };
+ module_platform_driver(pata_ftide010_driver);
+
++MODULE_DESCRIPTION("low level driver for Faraday Technology FTIDE010");
+ MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+ MODULE_LICENSE("GPL");
+ MODULE_ALIAS("platform:" DRV_NAME);
--- /dev/null
+From 8566572bf3b4d6e416a4bf2110dbb4817d11ba59 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Thu, 24 Aug 2023 07:43:18 +0900
+Subject: ata: sata_gemini: Add missing MODULE_DESCRIPTION
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 8566572bf3b4d6e416a4bf2110dbb4817d11ba59 upstream.
+
+Add the missing MODULE_DESCRIPTION() to avoid warnings such as:
+
+WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/ata/sata_gemini.o
+
+when compiling with W=1.
+
+Fixes: be4e456ed3a5 ("ata: Add driver for Faraday Technology FTIDE010")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/sata_gemini.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/ata/sata_gemini.c
++++ b/drivers/ata/sata_gemini.c
+@@ -428,6 +428,7 @@ static struct platform_driver gemini_sat
+ };
+ module_platform_driver(gemini_sata_driver);
+
++MODULE_DESCRIPTION("low level driver for Cortina Systems Gemini SATA bridge");
+ MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+ MODULE_LICENSE("GPL");
+ MODULE_ALIAS("platform:" DRV_NAME);
--- /dev/null
+From 4490e803e1fe9fab8db5025e44e23b55df54078b Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 26 Jul 2023 16:56:57 +0100
+Subject: btrfs: don't start transaction when joining with TRANS_JOIN_NOSTART
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 4490e803e1fe9fab8db5025e44e23b55df54078b upstream.
+
+When joining a transaction with TRANS_JOIN_NOSTART, if we don't find a
+running transaction we end up creating one. This goes against the purpose
+of TRANS_JOIN_NOSTART which is to join a running transaction if its state
+is at or below the state TRANS_STATE_COMMIT_START, otherwise return an
+-ENOENT error and don't start a new transaction. So fix this to not create
+a new transaction if there's no running transaction at or below that
+state.
+
+CC: stable@vger.kernel.org # 4.14+
+Fixes: a6d155d2e363 ("Btrfs: fix deadlock between fiemap and transaction commits")
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/transaction.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -279,10 +279,11 @@ loop:
+ spin_unlock(&fs_info->trans_lock);
+
+ /*
+- * If we are ATTACH, we just want to catch the current transaction,
+- * and commit it. If there is no transaction, just return ENOENT.
++ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
++ * current transaction, and commit it. If there is no transaction, just
++ * return ENOENT.
+ */
+- if (type == TRANS_ATTACH)
++ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
+ return -ENOENT;
+
+ /*
--- /dev/null
+From a6496849671a5bc9218ecec25a983253b34351b1 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 21 Jul 2023 09:02:07 -0700
+Subject: btrfs: fix start transaction qgroup rsv double free
+
+From: Boris Burkov <boris@bur.io>
+
+commit a6496849671a5bc9218ecec25a983253b34351b1 upstream.
+
+btrfs_start_transaction reserves metadata space of the PERTRANS type
+before it identifies a transaction to start/join. This allows flushing
+when reserving that space without a deadlock. However, it results in a
+race which temporarily breaks qgroup rsv accounting.
+
+T1 T2
+start_transaction
+do_stuff
+ start_transaction
+ qgroup_reserve_meta_pertrans
+commit_transaction
+ qgroup_free_meta_all_pertrans
+ hit an error starting txn
+ goto reserve_fail
+ qgroup_free_meta_pertrans (already freed!)
+
+The basic issue is that there is nothing preventing another commit from
+committing before start_transaction finishes (in fact sometimes we
+intentionally wait for it) so any error path that frees the reserve is
+at risk of this race.
+
+While this exact space was getting freed anyway, and it's not a huge
+deal to double free it (just a warning, the free code catches this), it
+can result in incorrectly freeing some other pertrans reservation in
+this same reservation, which could then lead to spuriously granting
+reservations we might not have the space for. Therefore, I do believe it
+is worth fixing.
+
+To fix it, use the existing prealloc->pertrans conversion mechanism.
+When we first reserve the space, we reserve prealloc space and only when
+we are sure we have a transaction do we convert it to pertrans. This way
+any racing commits do not blow away our reservation, but we still get a
+pertrans reservation that is freed when _this_ transaction gets committed.
+
+This issue can be reproduced by running generic/269 with either qgroups
+or squotas enabled via mkfs on the scratch device.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+CC: stable@vger.kernel.org # 5.10+
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/transaction.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -580,8 +580,13 @@ start_transaction(struct btrfs_root *roo
+ u64 delayed_refs_bytes = 0;
+
+ qgroup_reserved = num_items * fs_info->nodesize;
+- ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
+- enforce_qgroups);
++ /*
++ * Use prealloc for now, as there might be a currently running
++ * transaction that could free this reserved space prematurely
++ * by committing.
++ */
++ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserved,
++ enforce_qgroups, false);
+ if (ret)
+ return ERR_PTR(ret);
+
+@@ -693,6 +698,14 @@ again:
+ h->reloc_reserved = reloc_reserved;
+ }
+
++ /*
++ * Now that we have found a transaction to be a part of, convert the
++ * qgroup reservation from prealloc to pertrans. A different transaction
++ * can't race in and free our pertrans out from under us.
++ */
++ if (qgroup_reserved)
++ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
++
+ got_it:
+ if (!current->journal_info)
+ current->journal_info = h;
+@@ -740,7 +753,7 @@ alloc_fail:
+ btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
+ num_bytes, NULL);
+ reserve_fail:
+- btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
++ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
+ return ERR_PTR(ret);
+ }
+
--- /dev/null
+From e28b02118b94e42be3355458a2406c6861e2dd32 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 21 Jul 2023 09:02:06 -0700
+Subject: btrfs: free qgroup rsv on io failure
+
+From: Boris Burkov <boris@bur.io>
+
+commit e28b02118b94e42be3355458a2406c6861e2dd32 upstream.
+
+If we do a write whose bio suffers an error, we will never reclaim the
+qgroup reserved space for it. We allocate the space in the write_iter
+codepath, then release the reservation as we allocate the ordered
+extent, but we only create a delayed ref if the ordered extent finishes.
+If it has an error, we simply leak the rsv. This is apparent in running
+any error injecting (dmerror) fstests like btrfs/146 or btrfs/160. Such
+tests fail due to dmesg on umount complaining about the leaked qgroup
+data space.
+
+When we clean up other aspects of space on failed ordered_extents, also
+free the qgroup rsv.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+CC: stable@vger.kernel.org # 5.10+
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3393,6 +3393,13 @@ out:
+ btrfs_free_reserved_extent(fs_info,
+ ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes, 1);
++ /*
++ * Actually free the qgroup rsv which was released when
++ * the ordered extent was created.
++ */
++ btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
++ ordered_extent->qgroup_rsv,
++ BTRFS_QGROUP_RSV_DATA);
+ }
+ }
+
--- /dev/null
+From 332581bde2a419d5f12a93a1cdc2856af649a3cc Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 21 Jul 2023 16:42:14 +0900
+Subject: btrfs: zoned: do not zone finish data relocation block group
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 332581bde2a419d5f12a93a1cdc2856af649a3cc upstream.
+
+When multiple writes happen at once, we may need to sacrifice a currently
+active block group to be zone finished for a new allocation. We choose a
+block group with the least free space left, and zone finish it.
+
+To do the finishing, we need to send IOs for already allocated region
+and wait for them and on-going IOs. Otherwise, these IOs fail because the
+zone is already finished at the time the IO reach a device.
+
+However, if a block group dedicated to the data relocation is zone
+finished, there is a chance that finishing it before an ongoing write IO
+reaches the device. That is because there is timing gap between an
+allocation is done (block_group->reservations == 0, as pre-allocation is
+done) and an ordered extent is created when the relocation IO starts.
+Thus, if we finish the zone between them, we can fail the IOs.
+
+We cannot simply use "fs_info->data_reloc_bg == block_group->start" to
+avoid the zone finishing. Because, the data_reloc_bg may already switch to
+a new block group, while there are still ongoing write IOs to the old
+data_reloc_bg.
+
+So, this patch reworks the BLOCK_GROUP_FLAG_ZONED_DATA_RELOC bit to
+indicate there is a data relocation allocation and/or ongoing write to the
+block group. The bit is set on allocation and cleared in end_io function of
+the last IO for the currently allocated region.
+
+To change the timing of the bit setting also solves the issue that the bit
+being left even after there is no IO going on. With the current code, if
+the data_reloc_bg switches after the last IO to the current data_reloc_bg,
+the bit is set at this timing and there is no one clearing that bit. As a
+result, that block group is kept unallocatable for anything.
+
+Fixes: 343d8a30851c ("btrfs: zoned: prevent allocation from previous data relocation BG")
+Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 43 +++++++++++++++++++++++--------------------
+ fs/btrfs/zoned.c | 16 +++++++++++++---
+ 2 files changed, 36 insertions(+), 23 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3810,7 +3810,8 @@ static int do_allocation_zoned(struct bt
+ fs_info->data_reloc_bg == 0);
+
+ if (block_group->ro ||
+- test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
++ (!ffe_ctl->for_data_reloc &&
++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
+ ret = 1;
+ goto out;
+ }
+@@ -3853,8 +3854,26 @@ static int do_allocation_zoned(struct bt
+ if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
+ fs_info->treelog_bg = block_group->start;
+
+- if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
+- fs_info->data_reloc_bg = block_group->start;
++ if (ffe_ctl->for_data_reloc) {
++ if (!fs_info->data_reloc_bg)
++ fs_info->data_reloc_bg = block_group->start;
++ /*
++ * Do not allow allocations from this block group, unless it is
++ * for data relocation. Compared to increasing the ->ro, setting
++ * the ->zoned_data_reloc_ongoing flag still allows nocow
++ * writers to come in. See btrfs_inc_nocow_writers().
++ *
++ * We need to disable an allocation to avoid an allocation of
++ * regular (non-relocation data) extent. With mix of relocation
++ * extents and regular extents, we can dispatch WRITE commands
++ * (for relocation extents) and ZONE APPEND commands (for
++ * regular extents) at the same time to the same zone, which
++ * easily break the write pointer.
++ *
++ * Also, this flag avoids this block group to be zone finished.
++ */
++ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
++ }
+
+ ffe_ctl->found_offset = start + block_group->alloc_offset;
+ block_group->alloc_offset += num_bytes;
+@@ -3872,24 +3891,8 @@ static int do_allocation_zoned(struct bt
+ out:
+ if (ret && ffe_ctl->for_treelog)
+ fs_info->treelog_bg = 0;
+- if (ret && ffe_ctl->for_data_reloc &&
+- fs_info->data_reloc_bg == block_group->start) {
+- /*
+- * Do not allow further allocations from this block group.
+- * Compared to increasing the ->ro, setting the
+- * ->zoned_data_reloc_ongoing flag still allows nocow
+- * writers to come in. See btrfs_inc_nocow_writers().
+- *
+- * We need to disable an allocation to avoid an allocation of
+- * regular (non-relocation data) extent. With mix of relocation
+- * extents and regular extents, we can dispatch WRITE commands
+- * (for relocation extents) and ZONE APPEND commands (for
+- * regular extents) at the same time to the same zone, which
+- * easily break the write pointer.
+- */
+- set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
++ if (ret && ffe_ctl->for_data_reloc)
+ fs_info->data_reloc_bg = 0;
+- }
+ spin_unlock(&fs_info->relocation_bg_lock);
+ spin_unlock(&fs_info->treelog_bg_lock);
+ spin_unlock(&block_group->lock);
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2009,6 +2009,10 @@ static int do_zone_finish(struct btrfs_b
+ * and block_group->meta_write_pointer for metadata.
+ */
+ if (!fully_written) {
++ if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
++ spin_unlock(&block_group->lock);
++ return -EAGAIN;
++ }
+ spin_unlock(&block_group->lock);
+
+ ret = btrfs_inc_block_group_ro(block_group, false);
+@@ -2037,7 +2041,9 @@ static int do_zone_finish(struct btrfs_b
+ return 0;
+ }
+
+- if (block_group->reserved) {
++ if (block_group->reserved ||
++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
++ &block_group->runtime_flags)) {
+ spin_unlock(&block_group->lock);
+ btrfs_dec_block_group_ro(block_group);
+ return -EAGAIN;
+@@ -2268,7 +2274,10 @@ void btrfs_zoned_release_data_reloc_bg(s
+
+ /* All relocation extents are written. */
+ if (block_group->start + block_group->alloc_offset == logical + length) {
+- /* Now, release this block group for further allocations. */
++ /*
++ * Now, release this block group for further allocations and
++ * zone finish.
++ */
+ clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
+ &block_group->runtime_flags);
+ }
+@@ -2292,7 +2301,8 @@ int btrfs_zone_finish_one_bg(struct btrf
+
+ spin_lock(&block_group->lock);
+ if (block_group->reserved || block_group->alloc_offset == 0 ||
+- (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
++ (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+ spin_unlock(&block_group->lock);
+ continue;
+ }
--- /dev/null
+From e7d65e40ab5a5940785c5922f317602d0268caaf Mon Sep 17 00:00:00 2001
+From: Walter Chang <walter.chang@mediatek.com>
+Date: Mon, 17 Jul 2023 17:07:34 +0800
+Subject: clocksource/drivers/arm_arch_timer: Disable timer before programming CVAL
+
+From: Walter Chang <walter.chang@mediatek.com>
+
+commit e7d65e40ab5a5940785c5922f317602d0268caaf upstream.
+
+Due to the fact that the use of `writeq_relaxed()` to program CVAL is
+not guaranteed to be atomic, it is necessary to disable the timer before
+programming CVAL.
+
+However, if the MMIO timer is already enabled and has not yet expired,
+there is a possibility of unexpected behavior occurring: when the CPU
+enters the idle state during this period, and if the CPU's local event
+is earlier than the broadcast event, the following process occurs:
+
+tick_broadcast_enter()
+ tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER)
+ __tick_broadcast_oneshot_control()
+ ___tick_broadcast_oneshot_control()
+ tick_broadcast_set_event()
+ clockevents_program_event()
+ set_next_event_mem()
+
+During this process, the MMIO timer remains enabled while programming
+CVAL. To prevent such behavior, disable timer explicitly prior to
+programming CVAL.
+
+Fixes: 8b82c4f883a7 ("clocksource/drivers/arm_arch_timer: Move MMIO timer programming over to CVAL")
+Cc: stable@vger.kernel.org
+Signed-off-by: Walter Chang <walter.chang@mediatek.com>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
+Link: https://lore.kernel.org/r/20230717090735.19370-1-walter.chang@mediatek.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/clocksource/arm_arch_timer.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/clocksource/arm_arch_timer.c
++++ b/drivers/clocksource/arm_arch_timer.c
+@@ -773,6 +773,13 @@ static __always_inline void set_next_eve
+ u64 cnt;
+
+ ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
++
++ /* Timer must be disabled before programming CVAL */
++ if (ctrl & ARCH_TIMER_CTRL_ENABLE) {
++ ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
++ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
++ }
++
+ ctrl |= ARCH_TIMER_CTRL_ENABLE;
+ ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
+
--- /dev/null
+From c6ec8c83a29fb3aec3efa6fabbf5344498f57c7f Mon Sep 17 00:00:00 2001
+From: Hien Huynh <hien.huynh.px@renesas.com>
+Date: Thu, 6 Jul 2023 12:21:50 +0100
+Subject: dmaengine: sh: rz-dmac: Fix destination and source data size setting
+
+From: Hien Huynh <hien.huynh.px@renesas.com>
+
+commit c6ec8c83a29fb3aec3efa6fabbf5344498f57c7f upstream.
+
+Before setting DDS and SDS values, we need to clear its value first
+otherwise, we get incorrect results when we change/update the DMA bus
+width several times due to the 'OR' expression.
+
+Fixes: 5000d37042a6 ("dmaengine: sh: Add DMAC driver for RZ/G2L SoC")
+Cc: stable@kernel.org
+Signed-off-by: Hien Huynh <hien.huynh.px@renesas.com>
+Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Link: https://lore.kernel.org/r/20230706112150.198941-3-biju.das.jz@bp.renesas.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/sh/rz-dmac.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/dma/sh/rz-dmac.c
++++ b/drivers/dma/sh/rz-dmac.c
+@@ -9,6 +9,7 @@
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+ */
+
++#include <linux/bitfield.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/dmaengine.h>
+ #include <linux/interrupt.h>
+@@ -145,8 +146,8 @@ struct rz_dmac {
+ #define CHCFG_REQD BIT(3)
+ #define CHCFG_SEL(bits) ((bits) & 0x07)
+ #define CHCFG_MEM_COPY (0x80400008)
+-#define CHCFG_FILL_DDS(a) (((a) << 16) & GENMASK(19, 16))
+-#define CHCFG_FILL_SDS(a) (((a) << 12) & GENMASK(15, 12))
++#define CHCFG_FILL_DDS_MASK GENMASK(19, 16)
++#define CHCFG_FILL_SDS_MASK GENMASK(15, 12)
+ #define CHCFG_FILL_TM(a) (((a) & BIT(5)) << 22)
+ #define CHCFG_FILL_AM(a) (((a) & GENMASK(4, 2)) << 6)
+ #define CHCFG_FILL_LVL(a) (((a) & BIT(1)) << 5)
+@@ -609,13 +610,15 @@ static int rz_dmac_config(struct dma_cha
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+- channel->chcfg |= CHCFG_FILL_DDS(val);
++ channel->chcfg &= ~CHCFG_FILL_DDS_MASK;
++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val);
+
+ val = rz_dmac_ds_to_val_mapping(config->src_addr_width);
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+- channel->chcfg |= CHCFG_FILL_SDS(val);
++ channel->chcfg &= ~CHCFG_FILL_SDS_MASK;
++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val);
+
+ return 0;
+ }
--- /dev/null
+From 68228da51c9a436872a4ef4b5a7692e29f7e5bc7 Mon Sep 17 00:00:00 2001
+From: Wang Jianjian <wangjianjian0@foxmail.com>
+Date: Thu, 3 Aug 2023 00:28:39 +0800
+Subject: ext4: add correct group descriptors and reserved GDT blocks to system zone
+
+From: Wang Jianjian <wangjianjian0@foxmail.com>
+
+commit 68228da51c9a436872a4ef4b5a7692e29f7e5bc7 upstream.
+
+When setup_system_zone, flex_bg is not initialized so it is always 1.
+Use a new helper function, ext4_num_base_meta_blocks() which does not
+depend on sbi->s_log_groups_per_flex being initialized.
+
+[ Squashed two patches in the Link URL's below together into a single
+ commit, which is simpler to review/understand. Also fix checkpatch
+ warnings. --TYT ]
+
+Cc: stable@kernel.org
+Signed-off-by: Wang Jianjian <wangjianjian0@foxmail.com>
+Link: https://lore.kernel.org/r/tencent_21AF0D446A9916ED5C51492CC6C9A0A77B05@qq.com
+Link: https://lore.kernel.org/r/tencent_D744D1450CC169AEA77FCF0A64719909ED05@qq.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/balloc.c | 15 +++++++++++----
+ fs/ext4/block_validity.c | 8 ++++----
+ fs/ext4/ext4.h | 2 ++
+ 3 files changed, 17 insertions(+), 8 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -910,11 +910,11 @@ unsigned long ext4_bg_num_gdb(struct sup
+ }
+
+ /*
+- * This function returns the number of file system metadata clusters at
++ * This function returns the number of file system metadata blocks at
+ * the beginning of a block group, including the reserved gdt blocks.
+ */
+-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+- ext4_group_t block_group)
++unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
++ ext4_group_t block_group)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned num;
+@@ -932,8 +932,15 @@ static unsigned ext4_num_base_meta_clust
+ } else { /* For META_BG_BLOCK_GROUPS */
+ num += ext4_bg_num_gdb(sb, block_group);
+ }
+- return EXT4_NUM_B2C(sbi, num);
++ return num;
+ }
++
++static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
++ ext4_group_t block_group)
++{
++ return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
++}
++
+ /**
+ * ext4_inode_to_goal_block - return a hint for block allocation
+ * @inode: inode for block allocation
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_
+ struct ext4_system_blocks *system_blks;
+ struct ext4_group_desc *gdp;
+ ext4_group_t i;
+- int flex_size = ext4_flex_bg_size(sbi);
+ int ret;
+
+ system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
+@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_
+ return -ENOMEM;
+
+ for (i=0; i < ngroups; i++) {
++ unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
++
+ cond_resched();
+- if (ext4_bg_has_super(sb, i) &&
+- ((i < 5) || ((i % flex_size) == 0))) {
++ if (meta_blks != 0) {
+ ret = add_system_zone(system_blks,
+ ext4_group_first_block_no(sb, i),
+- ext4_bg_num_gdb(sb, i) + 1, 0);
++ meta_blks, 0);
+ if (ret)
+ goto err;
+ }
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -3096,6 +3096,8 @@ extern const char *ext4_decode_error(str
+ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+ ext4_group_t block_group,
+ unsigned int flags);
++extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
++ ext4_group_t block_group);
+
+ extern __printf(7, 8)
+ void __ext4_error(struct super_block *, const char *, unsigned int, bool,
--- /dev/null
+From 7ca4b085f430f3774c3838b3da569ceccd6a0177 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= <lhenriques@suse.de>
+Date: Thu, 3 Aug 2023 10:17:13 +0100
+Subject: ext4: fix memory leaks in ext4_fname_{setup_filename,prepare_lookup}
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Luís Henriques <lhenriques@suse.de>
+
+commit 7ca4b085f430f3774c3838b3da569ceccd6a0177 upstream.
+
+If the filename casefolding fails, we'll be leaking memory from the
+fscrypt_name struct, namely from the 'crypto_buf.name' member.
+
+Make sure we free it in the error path on both ext4_fname_setup_filename()
+and ext4_fname_prepare_lookup() functions.
+
+Cc: stable@kernel.org
+Fixes: 1ae98e295fa2 ("ext4: optimize match for casefolded encrypted dirs")
+Signed-off-by: Luís Henriques <lhenriques@suse.de>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20230803091713.13239-1-lhenriques@suse.de
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/crypto.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/crypto.c
++++ b/fs/ext4/crypto.c
+@@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct ino
+
+ #if IS_ENABLED(CONFIG_UNICODE)
+ err = ext4_fname_setup_ci_filename(dir, iname, fname);
++ if (err)
++ ext4_fname_free_filename(fname);
+ #endif
+ return err;
+ }
+@@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct ino
+
+ #if IS_ENABLED(CONFIG_UNICODE)
+ err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
++ if (err)
++ ext4_fname_free_filename(fname);
+ #endif
+ return err;
+ }
--- /dev/null
+From 5c13e2388bf3426fd69a89eb46e50469e9624e56 Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Fri, 18 Aug 2023 11:34:32 -0700
+Subject: f2fs: avoid false alarm of circular locking
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 5c13e2388bf3426fd69a89eb46e50469e9624e56 upstream.
+
+======================================================
+WARNING: possible circular locking dependency detected
+6.5.0-rc5-syzkaller-00353-gae545c3283dc #0 Not tainted
+------------------------------------------------------
+syz-executor273/5027 is trying to acquire lock:
+ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_down_write fs/f2fs/f2fs.h:2133 [inline]
+ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644
+
+but task is already holding lock:
+ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_down_read fs/f2fs/f2fs.h:2108 [inline]
+ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_add_dentry+0x92/0x230 fs/f2fs/dir.c:783
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (&fi->i_xattr_sem){.+.+}-{3:3}:
+ down_read+0x9c/0x470 kernel/locking/rwsem.c:1520
+ f2fs_down_read fs/f2fs/f2fs.h:2108 [inline]
+ f2fs_getxattr+0xb1e/0x12c0 fs/f2fs/xattr.c:532
+ __f2fs_get_acl+0x5a/0x900 fs/f2fs/acl.c:179
+ f2fs_acl_create fs/f2fs/acl.c:377 [inline]
+ f2fs_init_acl+0x15c/0xb30 fs/f2fs/acl.c:420
+ f2fs_init_inode_metadata+0x159/0x1290 fs/f2fs/dir.c:558
+ f2fs_add_regular_entry+0x79e/0xb90 fs/f2fs/dir.c:740
+ f2fs_add_dentry+0x1de/0x230 fs/f2fs/dir.c:788
+ f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827
+ f2fs_add_link fs/f2fs/f2fs.h:3554 [inline]
+ f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781
+ vfs_mkdir+0x532/0x7e0 fs/namei.c:4117
+ do_mkdirat+0x2a9/0x330 fs/namei.c:4140
+ __do_sys_mkdir fs/namei.c:4160 [inline]
+ __se_sys_mkdir fs/namei.c:4158 [inline]
+ __x64_sys_mkdir+0xf2/0x140 fs/namei.c:4158
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+-> #0 (&fi->i_sem){+.+.}-{3:3}:
+ check_prev_add kernel/locking/lockdep.c:3142 [inline]
+ check_prevs_add kernel/locking/lockdep.c:3261 [inline]
+ validate_chain kernel/locking/lockdep.c:3876 [inline]
+ __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
+ lock_acquire kernel/locking/lockdep.c:5761 [inline]
+ lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
+ down_write+0x93/0x200 kernel/locking/rwsem.c:1573
+ f2fs_down_write fs/f2fs/f2fs.h:2133 [inline]
+ f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644
+ f2fs_add_dentry+0xa6/0x230 fs/f2fs/dir.c:784
+ f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827
+ f2fs_add_link fs/f2fs/f2fs.h:3554 [inline]
+ f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781
+ vfs_mkdir+0x532/0x7e0 fs/namei.c:4117
+ ovl_do_mkdir fs/overlayfs/overlayfs.h:196 [inline]
+ ovl_mkdir_real+0xb5/0x370 fs/overlayfs/dir.c:146
+ ovl_workdir_create+0x3de/0x820 fs/overlayfs/super.c:309
+ ovl_make_workdir fs/overlayfs/super.c:711 [inline]
+ ovl_get_workdir fs/overlayfs/super.c:864 [inline]
+ ovl_fill_super+0xdab/0x6180 fs/overlayfs/super.c:1400
+ vfs_get_super+0xf9/0x290 fs/super.c:1152
+ vfs_get_tree+0x88/0x350 fs/super.c:1519
+ do_new_mount fs/namespace.c:3335 [inline]
+ path_mount+0x1492/0x1ed0 fs/namespace.c:3662
+ do_mount fs/namespace.c:3675 [inline]
+ __do_sys_mount fs/namespace.c:3884 [inline]
+ __se_sys_mount fs/namespace.c:3861 [inline]
+ __x64_sys_mount+0x293/0x310 fs/namespace.c:3861
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ rlock(&fi->i_xattr_sem);
+ lock(&fi->i_sem);
+ lock(&fi->i_xattr_sem);
+ lock(&fi->i_sem);
+
+Cc: <stable@vger.kernel.org>
+Reported-and-tested-by: syzbot+e5600587fa9cbf8e3826@syzkaller.appspotmail.com
+Fixes: 5eda1ad1aaff "f2fs: fix deadlock in i_xattr_sem and inode page lock"
+Tested-by: Guenter Roeck <linux@roeck-us.net>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/f2fs.h | 24 +++++++++++++++---------
+ fs/f2fs/inline.c | 3 ++-
+ 2 files changed, 17 insertions(+), 10 deletions(-)
+
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -2160,15 +2160,6 @@ static inline int f2fs_down_read_trylock
+ return down_read_trylock(&sem->internal_rwsem);
+ }
+
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass)
+-{
+- down_read_nested(&sem->internal_rwsem, subclass);
+-}
+-#else
+-#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem)
+-#endif
+-
+ static inline void f2fs_up_read(struct f2fs_rwsem *sem)
+ {
+ up_read(&sem->internal_rwsem);
+@@ -2179,6 +2170,21 @@ static inline void f2fs_down_write(struc
+ down_write(&sem->internal_rwsem);
+ }
+
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass)
++{
++ down_read_nested(&sem->internal_rwsem, subclass);
++}
++
++static inline void f2fs_down_write_nested(struct f2fs_rwsem *sem, int subclass)
++{
++ down_write_nested(&sem->internal_rwsem, subclass);
++}
++#else
++#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem)
++#define f2fs_down_write_nested(sem, subclass) f2fs_down_write(sem)
++#endif
++
+ static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
+ {
+ return down_write_trylock(&sem->internal_rwsem);
+--- a/fs/f2fs/inline.c
++++ b/fs/f2fs/inline.c
+@@ -642,7 +642,8 @@ int f2fs_add_inline_entry(struct inode *
+ }
+
+ if (inode) {
+- f2fs_down_write(&F2FS_I(inode)->i_sem);
++ f2fs_down_write_nested(&F2FS_I(inode)->i_sem,
++ SINGLE_DEPTH_NESTING);
+ page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
--- /dev/null
+From a3ab55746612247ce3dcaac6de66f5ffc055b9df Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Fri, 7 Jul 2023 07:03:13 -0700
+Subject: f2fs: flush inode if atomic file is aborted
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit a3ab55746612247ce3dcaac6de66f5ffc055b9df upstream.
+
+Let's flush the inode being aborted atomic operation to avoid stale dirty
+inode during eviction in this call stack:
+
+ f2fs_mark_inode_dirty_sync+0x22/0x40 [f2fs]
+ f2fs_abort_atomic_write+0xc4/0xf0 [f2fs]
+ f2fs_evict_inode+0x3f/0x690 [f2fs]
+ ? sugov_start+0x140/0x140
+ evict+0xc3/0x1c0
+ evict_inodes+0x17b/0x210
+ generic_shutdown_super+0x32/0x120
+ kill_block_super+0x21/0x50
+ deactivate_locked_super+0x31/0x90
+ cleanup_mnt+0x100/0x160
+ task_work_run+0x59/0x90
+ do_exit+0x33b/0xa50
+ do_group_exit+0x2d/0x80
+ __x64_sys_exit_group+0x14/0x20
+ do_syscall_64+0x3b/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+This triggers f2fs_bug_on() in f2fs_evict_inode:
+ f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
+
+This fixes the syzbot report:
+
+loop0: detected capacity change from 0 to 131072
+F2FS-fs (loop0): invalid crc value
+F2FS-fs (loop0): Found nat_bits in checkpoint
+F2FS-fs (loop0): Mounted with checkpoint version = 48b305e4
+------------[ cut here ]------------
+kernel BUG at fs/f2fs/inode.c:869!
+invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+CPU: 0 PID: 5014 Comm: syz-executor220 Not tainted 6.4.0-syzkaller-11479-g6cd06ab12d1a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023
+RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869
+Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc
+RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
+RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007
+RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000
+R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50
+FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0
+Call Trace:
+ <TASK>
+ evict+0x2ed/0x6b0 fs/inode.c:665
+ dispose_list+0x117/0x1e0 fs/inode.c:698
+ evict_inodes+0x345/0x440 fs/inode.c:748
+ generic_shutdown_super+0xaf/0x480 fs/super.c:478
+ kill_block_super+0x64/0xb0 fs/super.c:1417
+ kill_f2fs_super+0x2af/0x3c0 fs/f2fs/super.c:4704
+ deactivate_locked_super+0x98/0x160 fs/super.c:330
+ deactivate_super+0xb1/0xd0 fs/super.c:361
+ cleanup_mnt+0x2ae/0x3d0 fs/namespace.c:1254
+ task_work_run+0x16f/0x270 kernel/task_work.c:179
+ exit_task_work include/linux/task_work.h:38 [inline]
+ do_exit+0xa9a/0x29a0 kernel/exit.c:874
+ do_group_exit+0xd4/0x2a0 kernel/exit.c:1024
+ __do_sys_exit_group kernel/exit.c:1035 [inline]
+ __se_sys_exit_group kernel/exit.c:1033 [inline]
+ __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1033
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f309be71a09
+Code: Unable to access opcode bytes at 0x7f309be719df.
+RSP: 002b:00007fff171df518 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
+RAX: ffffffffffffffda RBX: 00007f309bef7330 RCX: 00007f309be71a09
+RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000001
+RBP: 0000000000000001 R08: ffffffffffffffc0 R09: 00007f309bef1e40
+R10: 0000000000010600 R11: 0000000000000246 R12: 00007f309bef7330
+R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000001
+ </TASK>
+Modules linked in:
+---[ end trace 0000000000000000 ]---
+RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869
+Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc
+RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
+RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007
+RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000
+R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50
+FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0
+
+Cc: <stable@vger.kernel.org>
+Reported-and-tested-by: syzbot+e1246909d526a9d470fa@syzkaller.appspotmail.com
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/segment.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -204,6 +204,8 @@ void f2fs_abort_atomic_write(struct inod
+ f2fs_i_size_write(inode, fi->original_i_size);
+ fi->original_i_size = 0;
+ }
++ /* avoid stale dirty inode during eviction */
++ sync_inode_metadata(inode, 0);
+ }
+
+ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
--- /dev/null
+From b8bd342d50cbf606666488488f9fea374aceb2d5 Mon Sep 17 00:00:00 2001
+From: ruanmeisi <ruan.meisi@zte.com.cn>
+Date: Tue, 25 Apr 2023 19:13:54 +0800
+Subject: fuse: nlookup missing decrement in fuse_direntplus_link
+
+From: ruanmeisi <ruan.meisi@zte.com.cn>
+
+commit b8bd342d50cbf606666488488f9fea374aceb2d5 upstream.
+
+During our debugging of glusterfs, we found an Assertion failed error:
+inode_lookup >= nlookup, which was caused by the nlookup value in the
+kernel being greater than that in the FUSE file system.
+
+The issue was introduced by fuse_direntplus_link, where in the function,
+fuse_iget increments nlookup, and if d_splice_alias returns failure,
+fuse_direntplus_link returns failure without decrementing nlookup
+https://github.com/gluster/glusterfs/pull/4081
+
+Signed-off-by: ruanmeisi <ruan.meisi@zte.com.cn>
+Fixes: 0b05b18381ee ("fuse: implement NFS-like readdirplus support")
+Cc: <stable@vger.kernel.org> # v3.9
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/readdir.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/fuse/readdir.c
++++ b/fs/fuse/readdir.c
+@@ -243,8 +243,16 @@ retry:
+ dput(dentry);
+ dentry = alias;
+ }
+- if (IS_ERR(dentry))
++ if (IS_ERR(dentry)) {
++ if (!IS_ERR(inode)) {
++ struct fuse_inode *fi = get_fuse_inode(inode);
++
++ spin_lock(&fi->lock);
++ fi->nlookup--;
++ spin_unlock(&fi->lock);
++ }
+ return PTR_ERR(dentry);
++ }
+ }
+ if (fc->readdirplus_auto)
+ set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
--- /dev/null
+From 23316be8a9d450f33a21f1efe7d89570becbec58 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Sun, 16 Jul 2023 04:28:04 +0200
+Subject: hwspinlock: qcom: add missing regmap config for SFPB MMIO implementation
+
+From: Christian Marangi <ansuelsmth@gmail.com>
+
+commit 23316be8a9d450f33a21f1efe7d89570becbec58 upstream.
+
+Commit 5d4753f741d8 ("hwspinlock: qcom: add support for MMIO on older
+SoCs") introduced and made regmap_config mandatory in the of_data struct
+but didn't add the regmap_config for sfpb based devices.
+
+SFPB based devices can both use the legacy syscon way to probe or the
+new MMIO way and currently device that use the MMIO way are broken as
+they lack the definition of the now required regmap_config and always
+return -EINVAL (and indirectly makes fail probing everything that
+depends on it, smem, nandc with smem-parser...)
+
+Fix this by correctly adding the missing regmap_config and restore
+function of hwspinlock on SFPB based devices with MMIO implementation.
+
+Cc: stable@vger.kernel.org
+Fixes: 5d4753f741d8 ("hwspinlock: qcom: add support for MMIO on older SoCs")
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Link: https://lore.kernel.org/r/20230716022804.21239-1-ansuelsmth@gmail.com
+Signed-off-by: Bjorn Andersson <andersson@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hwspinlock/qcom_hwspinlock.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/hwspinlock/qcom_hwspinlock.c
++++ b/drivers/hwspinlock/qcom_hwspinlock.c
+@@ -69,9 +69,18 @@ static const struct hwspinlock_ops qcom_
+ .unlock = qcom_hwspinlock_unlock,
+ };
+
++static const struct regmap_config sfpb_mutex_config = {
++ .reg_bits = 32,
++ .reg_stride = 4,
++ .val_bits = 32,
++ .max_register = 0x100,
++ .fast_io = true,
++};
++
+ static const struct qcom_hwspinlock_of_data of_sfpb_mutex = {
+ .offset = 0x4,
+ .stride = 0x4,
++ .regmap_config = &sfpb_mutex_config,
+ };
+
+ static const struct regmap_config tcsr_msm8226_mutex_config = {
--- /dev/null
+From 590a809ff743e7bd890ba5fb36bc38e20a36de53 Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Fri, 14 Jul 2023 10:55:27 +0800
+Subject: jbd2: check 'jh->b_transaction' before removing it from checkpoint
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 590a809ff743e7bd890ba5fb36bc38e20a36de53 upstream.
+
+Following process will corrupt ext4 image:
+Step 1:
+jbd2_journal_commit_transaction
+ __jbd2_journal_insert_checkpoint(jh, commit_transaction)
+ // Put jh into trans1->t_checkpoint_list
+ journal->j_checkpoint_transactions = commit_transaction
+ // Put trans1 into journal->j_checkpoint_transactions
+
+Step 2:
+do_get_write_access
+ test_clear_buffer_dirty(bh) // clear buffer dirty,set jbd dirty
+ __jbd2_journal_file_buffer(jh, transaction) // jh belongs to trans2
+
+Step 3:
+drop_cache
+ journal_shrink_one_cp_list
+ jbd2_journal_try_remove_checkpoint
+ if (!trylock_buffer(bh)) // lock bh, true
+ if (buffer_dirty(bh)) // buffer is not dirty
+ __jbd2_journal_remove_checkpoint(jh)
+ // remove jh from trans1->t_checkpoint_list
+
+Step 4:
+jbd2_log_do_checkpoint
+ trans1 = journal->j_checkpoint_transactions
+ // jh is not in trans1->t_checkpoint_list
+ jbd2_cleanup_journal_tail(journal) // trans1 is done
+
+Step 5: Power cut, trans2 is not committed, jh is lost in next mounting.
+
+Fix it by checking 'jh->b_transaction' before remove it from checkpoint.
+
+Cc: stable@kernel.org
+Fixes: 46f881b5b175 ("jbd2: fix a race when checking checkpoint buffer busy")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230714025528.564988-3-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/checkpoint.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -639,6 +639,8 @@ int jbd2_journal_try_remove_checkpoint(s
+ {
+ struct buffer_head *bh = jh2bh(jh);
+
++ if (jh->b_transaction)
++ return -EBUSY;
+ if (!trylock_buffer(bh))
+ return -EBUSY;
+ if (buffer_dirty(bh)) {
--- /dev/null
+From 2dfba3bb40ad8536b9fa802364f2d40da31aa88e Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 26 Jun 2023 15:33:22 +0800
+Subject: jbd2: correct the end of the journal recovery scan range
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 2dfba3bb40ad8536b9fa802364f2d40da31aa88e upstream.
+
+We got a filesystem inconsistency issue below while running generic/475
+I/O failure pressure test with fast_commit feature enabled.
+
+ Symlink /p3/d3/d1c/d6c/dd6/dce/l101 (inode #132605) is invalid.
+
+If fast_commit feature is enabled, a special fast_commit journal area is
+appended to the end of the normal journal area. The journal->j_last
+point to the first unused block behind the normal journal area instead
+of the whole log area, and the journal->j_fc_last point to the first
+unused block behind the fast_commit journal area. While doing journal
+recovery, do_one_pass(PASS_SCAN) should first scan the normal journal
+area and turn around to the first block once it meet journal->j_last,
+but the wrap() macro misuse the journal->j_fc_last, so the recovering
+could not read the next magic block (commit block perhaps) and would end
+early mistakenly and missing tN and every transaction after it in the
+following example. Finally, it could lead to filesystem inconsistency.
+
+ | normal journal area | fast commit area |
+ +-------------------------------------------------+------------------+
+ | tN(rere) | tN+1 |~| tN-x |...| tN-1 | tN(front) | .... |
+ +-------------------------------------------------+------------------+
+ / / /
+ start journal->j_last journal->j_fc_last
+
+This patch fix it by use the correct ending journal->j_last.
+
+Fixes: 5b849b5f96b4 ("jbd2: fast commit recovery path")
+Cc: stable@kernel.org
+Reported-by: Theodore Ts'o <tytso@mit.edu>
+Link: https://lore.kernel.org/linux-ext4/20230613043120.GB1584772@mit.edu/
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230626073322.3956567-1-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/recovery.c | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+--- a/fs/jbd2/recovery.c
++++ b/fs/jbd2/recovery.c
+@@ -229,12 +229,8 @@ static int count_tags(journal_t *journal
+ /* Make sure we wrap around the log correctly! */
+ #define wrap(journal, var) \
+ do { \
+- unsigned long _wrap_last = \
+- jbd2_has_feature_fast_commit(journal) ? \
+- (journal)->j_fc_last : (journal)->j_last; \
+- \
+- if (var >= _wrap_last) \
+- var -= (_wrap_last - (journal)->j_first); \
++ if (var >= (journal)->j_last) \
++ var -= ((journal)->j_last - (journal)->j_first); \
+ } while (0)
+
+ static int fc_do_one_pass(journal_t *journal,
+@@ -517,9 +513,7 @@ static int do_one_pass(journal_t *journa
+ break;
+
+ jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+- next_commit_ID, next_log_block,
+- jbd2_has_feature_fast_commit(journal) ?
+- journal->j_fc_last : journal->j_last);
++ next_commit_ID, next_log_block, journal->j_last);
+
+ /* Skip over each chunk of the transaction looking
+ * either the next descriptor block or the final commit
--- /dev/null
+From 373ac521799d9e97061515aca6ec6621789036bb Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Fri, 14 Jul 2023 10:55:26 +0800
+Subject: jbd2: fix checkpoint cleanup performance regression
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 373ac521799d9e97061515aca6ec6621789036bb upstream.
+
+journal_clean_one_cp_list() has been merged into
+journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
+committing process is just a best effort, it should stop scan once it
+meet a busy buffer, or else it will cause a lot of invalid buffer scan
+and checks. We catch a performance regression when doing fs_mark tests
+below.
+
+Test cmd:
+ ./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
+
+Before merging checkpoint buffer cleanup:
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 8304.9 49033
+
+After merging checkpoint buffer cleanup:
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 7649.0 50012
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 2107.1 50871
+
+After merging checkpoint buffer cleanup, the total loop count in
+journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
+100,000+ in general), most of them are invalid. This patch fix it
+through passing 'shrink_type' into journal_shrink_one_cp_list() and add
+a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
+buffer. After fix, the loop count descending back to 10,000+.
+
+After this fix:
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 8558.4 49109
+
+Cc: stable@kernel.org
+Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/checkpoint.c | 20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -349,6 +349,8 @@ int jbd2_cleanup_journal_tail(journal_t
+
+ /* Checkpoint list management */
+
++enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
++
+ /*
+ * journal_shrink_one_cp_list
+ *
+@@ -360,7 +362,8 @@ int jbd2_cleanup_journal_tail(journal_t
+ * Called with j_list_lock held.
+ */
+ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
+- bool destroy, bool *released)
++ enum shrink_type type,
++ bool *released)
+ {
+ struct journal_head *last_jh;
+ struct journal_head *next_jh = jh;
+@@ -376,12 +379,15 @@ static unsigned long journal_shrink_one_
+ jh = next_jh;
+ next_jh = jh->b_cpnext;
+
+- if (destroy) {
++ if (type == SHRINK_DESTROY) {
+ ret = __jbd2_journal_remove_checkpoint(jh);
+ } else {
+ ret = jbd2_journal_try_remove_checkpoint(jh);
+- if (ret < 0)
+- continue;
++ if (ret < 0) {
++ if (type == SHRINK_BUSY_SKIP)
++ continue;
++ break;
++ }
+ }
+
+ nr_freed++;
+@@ -445,7 +451,7 @@ again:
+ tid = transaction->t_tid;
+
+ freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+- false, &released);
++ SHRINK_BUSY_SKIP, &released);
+ nr_freed += freed;
+ (*nr_to_scan) -= min(*nr_to_scan, freed);
+ if (*nr_to_scan == 0)
+@@ -485,19 +491,21 @@ out:
+ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
+ {
+ transaction_t *transaction, *last_transaction, *next_transaction;
++ enum shrink_type type;
+ bool released;
+
+ transaction = journal->j_checkpoint_transactions;
+ if (!transaction)
+ return;
+
++ type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
+ last_transaction = transaction->t_cpprev;
+ next_transaction = transaction;
+ do {
+ transaction = next_transaction;
+ next_transaction = transaction->t_cpnext;
+ journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+- destroy, &released);
++ type, &released);
+ /*
+ * This function only frees up some memory if possible so we
+ * dont have an obligation to finish processing. Bail out if
--- /dev/null
+From 92382d744176f230101d54f5c017bccd62770f01 Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Mon, 7 Aug 2023 08:36:28 -0700
+Subject: lib: test_scanf: Add explicit type cast to result initialization in test_number_prefix()
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit 92382d744176f230101d54f5c017bccd62770f01 upstream.
+
+A recent change in clang allows it to consider more expressions as
+compile time constants, which causes it to point out an implicit
+conversion in the scanf tests:
+
+ lib/test_scanf.c:661:2: warning: implicit conversion from 'int' to 'unsigned char' changes value from -168 to 88 [-Wconstant-conversion]
+ 661 | test_number_prefix(unsigned char, "0xA7", "%2hhx%hhx", 0, 0xa7, 2, check_uchar);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ lib/test_scanf.c:609:29: note: expanded from macro 'test_number_prefix'
+ 609 | T result[2] = {~expect[0], ~expect[1]}; \
+ | ~ ^~~~~~~~~~
+ 1 warning generated.
+
+The result of the bitwise negation is the type of the operand after
+going through the integer promotion rules, so this truncation is
+expected but harmless, as the initial values in the result array get
+overwritten by _test() anyways. Add an explicit cast to the expected
+type in test_number_prefix() to silence the warning. There is no
+functional change, as all the tests still pass with GCC 13.1.0 and clang
+18.0.0.
+
+Cc: stable@vger.kernel.org
+Link: https://github.com/ClangBuiltLinux/linuxq/issues/1899
+Link: https://github.com/llvm/llvm-project/commit/610ec954e1f81c0e8fcadedcd25afe643f5a094e
+Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Reviewed-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Link: https://lore.kernel.org/r/20230807-test_scanf-wconstant-conversion-v2-1-839ca39083e1@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/test_scanf.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/test_scanf.c
++++ b/lib/test_scanf.c
+@@ -606,7 +606,7 @@ static void __init numbers_slice(void)
+ #define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn) \
+ do { \
+ const T expect[2] = { expect0, expect1 }; \
+- T result[2] = {~expect[0], ~expect[1]}; \
++ T result[2] = { (T)~expect[0], (T)~expect[1] }; \
+ \
+ _test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]); \
+ } while (0)
sh-boards-fix-ceu-buffer-size-passed-to-dma_declare_.patch
multi-gen-lru-avoid-race-in-inc_min_seq.patch
net-mlx5-free-irq-rmap-and-notifier-on-kernel-shutdown.patch
+arc-atomics-add-compiler-barrier-to-atomic-operations.patch
+clocksource-drivers-arm_arch_timer-disable-timer-before-programming-cval.patch
+dmaengine-sh-rz-dmac-fix-destination-and-source-data-size-setting.patch
+jbd2-fix-checkpoint-cleanup-performance-regression.patch
+jbd2-check-jh-b_transaction-before-removing-it-from-checkpoint.patch
+jbd2-correct-the-end-of-the-journal-recovery-scan-range.patch
+ext4-add-correct-group-descriptors-and-reserved-gdt-blocks-to-system-zone.patch
+ext4-fix-memory-leaks-in-ext4_fname_-setup_filename-prepare_lookup.patch
+f2fs-flush-inode-if-atomic-file-is-aborted.patch
+f2fs-avoid-false-alarm-of-circular-locking.patch
+lib-test_scanf-add-explicit-type-cast-to-result-initialization-in-test_number_prefix.patch
+hwspinlock-qcom-add-missing-regmap-config-for-sfpb-mmio-implementation.patch
+ata-ahci-add-elkhart-lake-ahci-controller.patch
+ata-pata_falcon-fix-io-base-selection-for-q40.patch
+ata-sata_gemini-add-missing-module_description.patch
+ata-pata_ftide010-add-missing-module_description.patch
+fuse-nlookup-missing-decrement-in-fuse_direntplus_link.patch
+btrfs-zoned-do-not-zone-finish-data-relocation-block-group.patch
+btrfs-fix-start-transaction-qgroup-rsv-double-free.patch
+btrfs-free-qgroup-rsv-on-io-failure.patch
+btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch