--- /dev/null
+From 42f51fb24fd39cc547c086ab3d8a314cc603a91c Mon Sep 17 00:00:00 2001
+From: Pavel Kozlov <pavel.kozlov@synopsys.com>
+Date: Tue, 15 Aug 2023 19:11:36 +0400
+Subject: ARC: atomics: Add compiler barrier to atomic operations...
+
+From: Pavel Kozlov <pavel.kozlov@synopsys.com>
+
+commit 42f51fb24fd39cc547c086ab3d8a314cc603a91c upstream.
+
+... to avoid unwanted gcc optimizations
+
+SMP kernels fail to boot with commit 596ff4a09b89
+("cpumask: re-introduce constant-sized cpumask optimizations").
+
+|
+| percpu: BUG: failure at mm/percpu.c:2981/pcpu_build_alloc_info()!
+|
+
+The write operation performed by the SCOND instruction in the atomic
+inline asm code is not properly passed to the compiler. The compiler
+cannot correctly optimize a nested loop that runs through the cpumask
+in the pcpu_build_alloc_info() function.
+
+Fix this by add a compiler barrier (memory clobber in inline asm).
+
+Apparently atomic ops used to have memory clobber implicitly via
+surrounding smp_mb(). However commit b64be6836993c431e
+("ARC: atomics: implement relaxed variants") removed the smp_mb() for
+the relaxed variants, but failed to add the explicit compiler barrier.
+
+Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/135
+Cc: <stable@vger.kernel.org> # v6.3+
+Fixes: b64be6836993c43 ("ARC: atomics: implement relaxed variants")
+Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
+Signed-off-by: Vineet Gupta <vgupta@kernel.org>
+[vgupta: tweaked the changelog and added Fixes tag]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/include/asm/atomic-llsc.h | 6 +++---
+ arch/arc/include/asm/atomic64-arcv2.h | 6 +++---
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/arc/include/asm/atomic-llsc.h
++++ b/arch/arc/include/asm/atomic-llsc.h
+@@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int
+ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
+ : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ } \
+
+ #define ATOMIC_OP_RETURN(op, asm_op) \
+@@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_ret
+ : [val] "=&r" (val) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ \
+ return val; \
+ }
+@@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op
+ [orig] "=&r" (orig) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+- : "cc"); \
++ : "cc", "memory"); \
+ \
+ return orig; \
+ }
+--- a/arch/arc/include/asm/atomic64-arcv2.h
++++ b/arch/arc/include/asm/atomic64-arcv2.h
+@@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s6
+ " bnz 1b \n" \
+ : "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); \
++ : "cc", "memory"); \
+ } \
+
+ #define ATOMIC64_OP_RETURN(op, op1, op2) \
+@@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_r
+ " bnz 1b \n" \
+ : [val] "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); /* memory clobber comes from smp_mb() */ \
++ : "cc", "memory"); \
+ \
+ return val; \
+ }
+@@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_##
+ " bnz 1b \n" \
+ : "=&r"(orig), "=&r"(val) \
+ : "r"(&v->counter), "ir"(a) \
+- : "cc"); /* memory clobber comes from smp_mb() */ \
++ : "cc", "memory"); \
+ \
+ return orig; \
+ }
--- /dev/null
+From 2a2df98ec592667927b5c1351afa6493ea125c9f Mon Sep 17 00:00:00 2001
+From: Werner Fischer <devlists@wefi.net>
+Date: Tue, 29 Aug 2023 13:33:58 +0200
+Subject: ata: ahci: Add Elkhart Lake AHCI controller
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Werner Fischer <devlists@wefi.net>
+
+commit 2a2df98ec592667927b5c1351afa6493ea125c9f upstream.
+
+Elkhart Lake is the successor of Apollo Lake and Gemini Lake. These
+CPUs and their PCHs are used in mobile and embedded environments.
+
+With this patch I suggest that Elkhart Lake SATA controllers [1] should
+use the default LPM policy for mobile chipsets.
+The disadvantage of missing hot-plug support with this setting should
+not be an issue, as those CPUs are used in embedded environments and
+not in servers with hot-plug backplanes.
+
+We discovered that the Elkhart Lake SATA controllers have been missing
+in ahci.c after a customer reported the throttling of his SATA SSD
+after a short period of higher I/O. We determined the high temperature
+of the SSD controller in idle mode as the root cause for that.
+
+Depending on the used SSD, we have seen up to 1.8 Watt lower system
+idle power usage and up to 30°C lower SSD controller temperatures in
+our tests, when we set med_power_with_dipm manually. I have provided a
+table showing seven different SATA SSDs from ATP, Intel/Solidigm and
+Samsung [2].
+
+Intel lists a total of 3 SATA controller IDs (4B60, 4B62, 4B63) in [1]
+for those mobile PCHs.
+This commit just adds 0x4b63 as I do not have test systems with 0x4b60
+and 0x4b62 SATA controllers.
+I have tested this patch with a system which uses 0x4b63 as SATA
+controller.
+
+[1] https://sata-io.org/product/8803
+[2] https://www.thomas-krenn.com/en/wiki/SATA_Link_Power_Management#Example_LES_v4
+
+Signed-off-by: Werner Fischer <devlists@wefi.net>
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/ahci.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -421,6 +421,8 @@ static const struct pci_device_id ahci_p
+ { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
+ { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
+ { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
++ /* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */
++ { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_low_power }, /* Elkhart Lake AHCI */
+
+ /* JMicron 360/1/3/5/6, match class to avoid IDE function */
+ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
--- /dev/null
+From 8a1f00b753ecfdb117dc1a07e68c46d80e7923ea Mon Sep 17 00:00:00 2001
+From: Michael Schmitz <schmitzmic@gmail.com>
+Date: Sun, 27 Aug 2023 16:13:47 +1200
+Subject: ata: pata_falcon: fix IO base selection for Q40
+
+From: Michael Schmitz <schmitzmic@gmail.com>
+
+commit 8a1f00b753ecfdb117dc1a07e68c46d80e7923ea upstream.
+
+With commit 44b1fbc0f5f3 ("m68k/q40: Replace q40ide driver
+with pata_falcon and falconide"), the Q40 IDE driver was
+replaced by pata_falcon.c.
+
+Both IO and memory resources were defined for the Q40 IDE
+platform device, but definition of the IDE register addresses
+was modeled after the Falcon case, both in use of the memory
+resources and in including register shift and byte vs. word
+offset in the address.
+
+This was correct for the Falcon case, which does not apply
+any address translation to the register addresses. In the
+Q40 case, all of device base address, byte access offset
+and register shift is included in the platform specific
+ISA access translation (in asm/mm_io.h).
+
+As a consequence, such address translation gets applied
+twice, and register addresses are mangled.
+
+Use the device base address from the platform IO resource
+for Q40 (the IO address translation will then add the correct
+ISA window base address and byte access offset), with register
+shift 1. Use MMIO base address and register shift 2 as before
+for Falcon.
+
+Encode PIO_OFFSET into IO port addresses for all registers
+for Q40 except the data transfer register. Encode the MMIO
+offset there (pata_falcon_data_xfer() directly uses raw IO
+with no address translation).
+
+Reported-by: William R Sowerbutts <will@sowerbutts.com>
+Closes: https://lore.kernel.org/r/CAMuHMdUU62jjunJh9cqSqHT87B0H0A4udOOPs=WN7WZKpcagVA@mail.gmail.com
+Link: https://lore.kernel.org/r/CAMuHMdUU62jjunJh9cqSqHT87B0H0A4udOOPs=WN7WZKpcagVA@mail.gmail.com
+Fixes: 44b1fbc0f5f3 ("m68k/q40: Replace q40ide driver with pata_falcon and falconide")
+Cc: stable@vger.kernel.org
+Cc: Finn Thain <fthain@linux-m68k.org>
+Cc: Geert Uytterhoeven <geert@linux-m68k.org>
+Tested-by: William R Sowerbutts <will@sowerbutts.com>
+Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
+Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/pata_falcon.c | 50 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 29 insertions(+), 21 deletions(-)
+
+--- a/drivers/ata/pata_falcon.c
++++ b/drivers/ata/pata_falcon.c
+@@ -123,8 +123,8 @@ static int __init pata_falcon_init_one(s
+ struct resource *base_res, *ctl_res, *irq_res;
+ struct ata_host *host;
+ struct ata_port *ap;
+- void __iomem *base;
+- int irq = 0;
++ void __iomem *base, *ctl_base;
++ int irq = 0, io_offset = 1, reg_shift = 2; /* Falcon defaults */
+
+ dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n");
+
+@@ -165,26 +165,34 @@ static int __init pata_falcon_init_one(s
+ ap->pio_mask = ATA_PIO4;
+ ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
+
+- base = (void __iomem *)base_mem_res->start;
+ /* N.B. this assumes data_addr will be used for word-sized I/O only */
+- ap->ioaddr.data_addr = base + 0 + 0 * 4;
+- ap->ioaddr.error_addr = base + 1 + 1 * 4;
+- ap->ioaddr.feature_addr = base + 1 + 1 * 4;
+- ap->ioaddr.nsect_addr = base + 1 + 2 * 4;
+- ap->ioaddr.lbal_addr = base + 1 + 3 * 4;
+- ap->ioaddr.lbam_addr = base + 1 + 4 * 4;
+- ap->ioaddr.lbah_addr = base + 1 + 5 * 4;
+- ap->ioaddr.device_addr = base + 1 + 6 * 4;
+- ap->ioaddr.status_addr = base + 1 + 7 * 4;
+- ap->ioaddr.command_addr = base + 1 + 7 * 4;
+-
+- base = (void __iomem *)ctl_mem_res->start;
+- ap->ioaddr.altstatus_addr = base + 1;
+- ap->ioaddr.ctl_addr = base + 1;
+-
+- ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
+- (unsigned long)base_mem_res->start,
+- (unsigned long)ctl_mem_res->start);
++ ap->ioaddr.data_addr = (void __iomem *)base_mem_res->start;
++
++ if (base_res) { /* only Q40 has IO resources */
++ io_offset = 0x10000;
++ reg_shift = 0;
++ base = (void __iomem *)base_res->start;
++ ctl_base = (void __iomem *)ctl_res->start;
++ } else {
++ base = (void __iomem *)base_mem_res->start;
++ ctl_base = (void __iomem *)ctl_mem_res->start;
++ }
++
++ ap->ioaddr.error_addr = base + io_offset + (1 << reg_shift);
++ ap->ioaddr.feature_addr = base + io_offset + (1 << reg_shift);
++ ap->ioaddr.nsect_addr = base + io_offset + (2 << reg_shift);
++ ap->ioaddr.lbal_addr = base + io_offset + (3 << reg_shift);
++ ap->ioaddr.lbam_addr = base + io_offset + (4 << reg_shift);
++ ap->ioaddr.lbah_addr = base + io_offset + (5 << reg_shift);
++ ap->ioaddr.device_addr = base + io_offset + (6 << reg_shift);
++ ap->ioaddr.status_addr = base + io_offset + (7 << reg_shift);
++ ap->ioaddr.command_addr = base + io_offset + (7 << reg_shift);
++
++ ap->ioaddr.altstatus_addr = ctl_base + io_offset;
++ ap->ioaddr.ctl_addr = ctl_base + io_offset;
++
++ ata_port_desc(ap, "cmd %px ctl %px data %px",
++ base, ctl_base, ap->ioaddr.data_addr);
+
+ irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (irq_res && irq_res->start > 0) {
--- /dev/null
+From 7274eef5729037300f29d14edeb334a47a098f65 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Thu, 24 Aug 2023 07:41:59 +0900
+Subject: ata: pata_ftide010: Add missing MODULE_DESCRIPTION
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 7274eef5729037300f29d14edeb334a47a098f65 upstream.
+
+Add the missing MODULE_DESCRIPTION() to avoid warnings such as:
+
+WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/ata/pata_ftide010.o
+
+when compiling with W=1.
+
+Fixes: be4e456ed3a5 ("ata: Add driver for Faraday Technology FTIDE010")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/pata_ftide010.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/ata/pata_ftide010.c
++++ b/drivers/ata/pata_ftide010.c
+@@ -567,6 +567,7 @@ static struct platform_driver pata_ftide
+ };
+ module_platform_driver(pata_ftide010_driver);
+
++MODULE_DESCRIPTION("low level driver for Faraday Technology FTIDE010");
+ MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+ MODULE_LICENSE("GPL");
+ MODULE_ALIAS("platform:" DRV_NAME);
--- /dev/null
+From 8566572bf3b4d6e416a4bf2110dbb4817d11ba59 Mon Sep 17 00:00:00 2001
+From: Damien Le Moal <dlemoal@kernel.org>
+Date: Thu, 24 Aug 2023 07:43:18 +0900
+Subject: ata: sata_gemini: Add missing MODULE_DESCRIPTION
+
+From: Damien Le Moal <dlemoal@kernel.org>
+
+commit 8566572bf3b4d6e416a4bf2110dbb4817d11ba59 upstream.
+
+Add the missing MODULE_DESCRIPTION() to avoid warnings such as:
+
+WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/ata/sata_gemini.o
+
+when compiling with W=1.
+
+Fixes: be4e456ed3a5 ("ata: Add driver for Faraday Technology FTIDE010")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/ata/sata_gemini.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/ata/sata_gemini.c
++++ b/drivers/ata/sata_gemini.c
+@@ -428,6 +428,7 @@ static struct platform_driver gemini_sat
+ };
+ module_platform_driver(gemini_sata_driver);
+
++MODULE_DESCRIPTION("low level driver for Cortina Systems Gemini SATA bridge");
+ MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+ MODULE_LICENSE("GPL");
+ MODULE_ALIAS("platform:" DRV_NAME);
--- /dev/null
+From 4490e803e1fe9fab8db5025e44e23b55df54078b Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 26 Jul 2023 16:56:57 +0100
+Subject: btrfs: don't start transaction when joining with TRANS_JOIN_NOSTART
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 4490e803e1fe9fab8db5025e44e23b55df54078b upstream.
+
+When joining a transaction with TRANS_JOIN_NOSTART, if we don't find a
+running transaction we end up creating one. This goes against the purpose
+of TRANS_JOIN_NOSTART which is to join a running transaction if its state
+is at or below the state TRANS_STATE_COMMIT_START, otherwise return an
+-ENOENT error and don't start a new transaction. So fix this to not create
+a new transaction if there's no running transaction at or below that
+state.
+
+CC: stable@vger.kernel.org # 4.14+
+Fixes: a6d155d2e363 ("Btrfs: fix deadlock between fiemap and transaction commits")
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/transaction.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -292,10 +292,11 @@ loop:
+ spin_unlock(&fs_info->trans_lock);
+
+ /*
+- * If we are ATTACH, we just want to catch the current transaction,
+- * and commit it. If there is no transaction, just return ENOENT.
++ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
++ * current transaction, and commit it. If there is no transaction, just
++ * return ENOENT.
+ */
+- if (type == TRANS_ATTACH)
++ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
+ return -ENOENT;
+
+ /*
--- /dev/null
+From a6496849671a5bc9218ecec25a983253b34351b1 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 21 Jul 2023 09:02:07 -0700
+Subject: btrfs: fix start transaction qgroup rsv double free
+
+From: Boris Burkov <boris@bur.io>
+
+commit a6496849671a5bc9218ecec25a983253b34351b1 upstream.
+
+btrfs_start_transaction reserves metadata space of the PERTRANS type
+before it identifies a transaction to start/join. This allows flushing
+when reserving that space without a deadlock. However, it results in a
+race which temporarily breaks qgroup rsv accounting.
+
+T1 T2
+start_transaction
+do_stuff
+ start_transaction
+ qgroup_reserve_meta_pertrans
+commit_transaction
+ qgroup_free_meta_all_pertrans
+ hit an error starting txn
+ goto reserve_fail
+ qgroup_free_meta_pertrans (already freed!)
+
+The basic issue is that there is nothing preventing another commit from
+committing before start_transaction finishes (in fact sometimes we
+intentionally wait for it) so any error path that frees the reserve is
+at risk of this race.
+
+While this exact space was getting freed anyway, and it's not a huge
+deal to double free it (just a warning, the free code catches this), it
+can result in incorrectly freeing some other pertrans reservation in
+this same reservation, which could then lead to spuriously granting
+reservations we might not have the space for. Therefore, I do believe it
+is worth fixing.
+
+To fix it, use the existing prealloc->pertrans conversion mechanism.
+When we first reserve the space, we reserve prealloc space and only when
+we are sure we have a transaction do we convert it to pertrans. This way
+any racing commits do not blow away our reservation, but we still get a
+pertrans reservation that is freed when _this_ transaction gets committed.
+
+This issue can be reproduced by running generic/269 with either qgroups
+or squotas enabled via mkfs on the scratch device.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+CC: stable@vger.kernel.org # 5.10+
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/transaction.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -591,8 +591,13 @@ start_transaction(struct btrfs_root *roo
+ u64 delayed_refs_bytes = 0;
+
+ qgroup_reserved = num_items * fs_info->nodesize;
+- ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
+- enforce_qgroups);
++ /*
++ * Use prealloc for now, as there might be a currently running
++ * transaction that could free this reserved space prematurely
++ * by committing.
++ */
++ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserved,
++ enforce_qgroups, false);
+ if (ret)
+ return ERR_PTR(ret);
+
+@@ -705,6 +710,14 @@ again:
+ h->reloc_reserved = reloc_reserved;
+ }
+
++ /*
++ * Now that we have found a transaction to be a part of, convert the
++ * qgroup reservation from prealloc to pertrans. A different transaction
++ * can't race in and free our pertrans out from under us.
++ */
++ if (qgroup_reserved)
++ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
++
+ got_it:
+ if (!current->journal_info)
+ current->journal_info = h;
+@@ -752,7 +765,7 @@ alloc_fail:
+ btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
+ num_bytes, NULL);
+ reserve_fail:
+- btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
++ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
+ return ERR_PTR(ret);
+ }
+
--- /dev/null
+From e28b02118b94e42be3355458a2406c6861e2dd32 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 21 Jul 2023 09:02:06 -0700
+Subject: btrfs: free qgroup rsv on io failure
+
+From: Boris Burkov <boris@bur.io>
+
+commit e28b02118b94e42be3355458a2406c6861e2dd32 upstream.
+
+If we do a write whose bio suffers an error, we will never reclaim the
+qgroup reserved space for it. We allocate the space in the write_iter
+codepath, then release the reservation as we allocate the ordered
+extent, but we only create a delayed ref if the ordered extent finishes.
+If it has an error, we simply leak the rsv. This is apparent in running
+any error injecting (dmerror) fstests like btrfs/146 or btrfs/160. Such
+tests fail due to dmesg on umount complaining about the leaked qgroup
+data space.
+
+When we clean up other aspects of space on failed ordered_extents, also
+free the qgroup rsv.
+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+CC: stable@vger.kernel.org # 5.10+
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -3359,6 +3359,13 @@ out:
+ btrfs_free_reserved_extent(fs_info,
+ ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes, 1);
++ /*
++ * Actually free the qgroup rsv which was released when
++ * the ordered extent was created.
++ */
++ btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
++ ordered_extent->qgroup_rsv,
++ BTRFS_QGROUP_RSV_DATA);
+ }
+ }
+
--- /dev/null
+From e7f1326cc24e22b38afc3acd328480a1183f9e79 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Mon, 31 Jul 2023 11:13:00 -0400
+Subject: btrfs: set page extent mapped after read_folio in relocate_one_page
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit e7f1326cc24e22b38afc3acd328480a1183f9e79 upstream.
+
+One of the CI runs triggered the following panic
+
+ assertion failed: PagePrivate(page) && page->private, in fs/btrfs/subpage.c:229
+ ------------[ cut here ]------------
+ kernel BUG at fs/btrfs/subpage.c:229!
+ Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
+ CPU: 0 PID: 923660 Comm: btrfs Not tainted 6.5.0-rc3+ #1
+ pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
+ pc : btrfs_subpage_assert+0xbc/0xf0
+ lr : btrfs_subpage_assert+0xbc/0xf0
+ sp : ffff800093213720
+ x29: ffff800093213720 x28: ffff8000932138b4 x27: 000000000c280000
+ x26: 00000001b5d00000 x25: 000000000c281000 x24: 000000000c281fff
+ x23: 0000000000001000 x22: 0000000000000000 x21: ffffff42b95bf880
+ x20: ffff42b9528e0000 x19: 0000000000001000 x18: ffffffffffffffff
+ x17: 667274622f736620 x16: 6e69202c65746176 x15: 0000000000000028
+ x14: 0000000000000003 x13: 00000000002672d7 x12: 0000000000000000
+ x11: ffffcd3f0ccd9204 x10: ffffcd3f0554ae50 x9 : ffffcd3f0379528c
+ x8 : ffff800093213428 x7 : 0000000000000000 x6 : ffffcd3f091771e8
+ x5 : ffff42b97f333948 x4 : 0000000000000000 x3 : 0000000000000000
+ x2 : 0000000000000000 x1 : ffff42b9556cde80 x0 : 000000000000004f
+ Call trace:
+ btrfs_subpage_assert+0xbc/0xf0
+ btrfs_subpage_set_dirty+0x38/0xa0
+ btrfs_page_set_dirty+0x58/0x88
+ relocate_one_page+0x204/0x5f0
+ relocate_file_extent_cluster+0x11c/0x180
+ relocate_data_extent+0xd0/0xf8
+ relocate_block_group+0x3d0/0x4e8
+ btrfs_relocate_block_group+0x2d8/0x490
+ btrfs_relocate_chunk+0x54/0x1a8
+ btrfs_balance+0x7f4/0x1150
+ btrfs_ioctl+0x10f0/0x20b8
+ __arm64_sys_ioctl+0x120/0x11d8
+ invoke_syscall.constprop.0+0x80/0xd8
+ do_el0_svc+0x6c/0x158
+ el0_svc+0x50/0x1b0
+ el0t_64_sync_handler+0x120/0x130
+ el0t_64_sync+0x194/0x198
+ Code: 91098021 b0007fa0 91346000 97e9c6d2 (d4210000)
+
+This is the same problem outlined in 17b17fcd6d44 ("btrfs:
+set_page_extent_mapped after read_folio in btrfs_cont_expand") , and the
+fix is the same. I originally looked for the same pattern elsewhere in
+our code, but mistakenly skipped over this code because I saw the page
+cache readahead before we set_page_extent_mapped, not realizing that
+this was only in the !page case, that we can still end up with a
+!uptodate page and then do the btrfs_read_folio further down.
+
+The fix here is the same as the above mentioned patch, move the
+set_page_extent_mapped call to after the btrfs_read_folio() block to
+make sure that we have the subpage blocksize stuff setup properly before
+using the page.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/relocation.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -3006,9 +3006,6 @@ static int relocate_one_page(struct inod
+ if (!page)
+ return -ENOMEM;
+ }
+- ret = set_page_extent_mapped(page);
+- if (ret < 0)
+- goto release_page;
+
+ if (PageReadahead(page))
+ page_cache_async_readahead(inode->i_mapping, ra, NULL,
+@@ -3024,6 +3021,15 @@ static int relocate_one_page(struct inod
+ }
+ }
+
++ /*
++ * We could have lost page private when we dropped the lock to read the
++ * page above, make sure we set_page_extent_mapped here so we have any
++ * of the subpage blocksize stuff we need in place.
++ */
++ ret = set_page_extent_mapped(page);
++ if (ret < 0)
++ goto release_page;
++
+ page_start = page_offset(page);
+ page_end = page_start + PAGE_SIZE - 1;
+
--- /dev/null
+From 332581bde2a419d5f12a93a1cdc2856af649a3cc Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 21 Jul 2023 16:42:14 +0900
+Subject: btrfs: zoned: do not zone finish data relocation block group
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 332581bde2a419d5f12a93a1cdc2856af649a3cc upstream.
+
+When multiple writes happen at once, we may need to sacrifice a currently
+active block group to be zone finished for a new allocation. We choose a
+block group with the least free space left, and zone finish it.
+
+To do the finishing, we need to send IOs for already allocated region
+and wait for them and on-going IOs. Otherwise, these IOs fail because the
+zone is already finished at the time the IO reach a device.
+
+However, if a block group dedicated to the data relocation is zone
+finished, there is a chance that finishing it before an ongoing write IO
+reaches the device. That is because there is timing gap between an
+allocation is done (block_group->reservations == 0, as pre-allocation is
+done) and an ordered extent is created when the relocation IO starts.
+Thus, if we finish the zone between them, we can fail the IOs.
+
+We cannot simply use "fs_info->data_reloc_bg == block_group->start" to
+avoid the zone finishing. Because, the data_reloc_bg may already switch to
+a new block group, while there are still ongoing write IOs to the old
+data_reloc_bg.
+
+So, this patch reworks the BLOCK_GROUP_FLAG_ZONED_DATA_RELOC bit to
+indicate there is a data relocation allocation and/or ongoing write to the
+block group. The bit is set on allocation and cleared in end_io function of
+the last IO for the currently allocated region.
+
+To change the timing of the bit setting also solves the issue that the bit
+being left even after there is no IO going on. With the current code, if
+the data_reloc_bg switches after the last IO to the current data_reloc_bg,
+the bit is set at this timing and there is no one clearing that bit. As a
+result, that block group is kept unallocatable for anything.
+
+Fixes: 343d8a30851c ("btrfs: zoned: prevent allocation from previous data relocation BG")
+Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 43 +++++++++++++++++++++++--------------------
+ fs/btrfs/zoned.c | 16 +++++++++++++---
+ 2 files changed, 36 insertions(+), 23 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3709,7 +3709,8 @@ static int do_allocation_zoned(struct bt
+ fs_info->data_reloc_bg == 0);
+
+ if (block_group->ro ||
+- test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
++ (!ffe_ctl->for_data_reloc &&
++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
+ ret = 1;
+ goto out;
+ }
+@@ -3752,8 +3753,26 @@ static int do_allocation_zoned(struct bt
+ if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
+ fs_info->treelog_bg = block_group->start;
+
+- if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
+- fs_info->data_reloc_bg = block_group->start;
++ if (ffe_ctl->for_data_reloc) {
++ if (!fs_info->data_reloc_bg)
++ fs_info->data_reloc_bg = block_group->start;
++ /*
++ * Do not allow allocations from this block group, unless it is
++ * for data relocation. Compared to increasing the ->ro, setting
++ * the ->zoned_data_reloc_ongoing flag still allows nocow
++ * writers to come in. See btrfs_inc_nocow_writers().
++ *
++ * We need to disable an allocation to avoid an allocation of
++ * regular (non-relocation data) extent. With mix of relocation
++ * extents and regular extents, we can dispatch WRITE commands
++ * (for relocation extents) and ZONE APPEND commands (for
++ * regular extents) at the same time to the same zone, which
++ * easily break the write pointer.
++ *
++ * Also, this flag avoids this block group to be zone finished.
++ */
++ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
++ }
+
+ ffe_ctl->found_offset = start + block_group->alloc_offset;
+ block_group->alloc_offset += num_bytes;
+@@ -3771,24 +3790,8 @@ static int do_allocation_zoned(struct bt
+ out:
+ if (ret && ffe_ctl->for_treelog)
+ fs_info->treelog_bg = 0;
+- if (ret && ffe_ctl->for_data_reloc &&
+- fs_info->data_reloc_bg == block_group->start) {
+- /*
+- * Do not allow further allocations from this block group.
+- * Compared to increasing the ->ro, setting the
+- * ->zoned_data_reloc_ongoing flag still allows nocow
+- * writers to come in. See btrfs_inc_nocow_writers().
+- *
+- * We need to disable an allocation to avoid an allocation of
+- * regular (non-relocation data) extent. With mix of relocation
+- * extents and regular extents, we can dispatch WRITE commands
+- * (for relocation extents) and ZONE APPEND commands (for
+- * regular extents) at the same time to the same zone, which
+- * easily break the write pointer.
+- */
+- set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
++ if (ret && ffe_ctl->for_data_reloc)
+ fs_info->data_reloc_bg = 0;
+- }
+ spin_unlock(&fs_info->relocation_bg_lock);
+ spin_unlock(&fs_info->treelog_bg_lock);
+ spin_unlock(&block_group->lock);
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2017,6 +2017,10 @@ static int do_zone_finish(struct btrfs_b
+ * and block_group->meta_write_pointer for metadata.
+ */
+ if (!fully_written) {
++ if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
++ spin_unlock(&block_group->lock);
++ return -EAGAIN;
++ }
+ spin_unlock(&block_group->lock);
+
+ ret = btrfs_inc_block_group_ro(block_group, false);
+@@ -2045,7 +2049,9 @@ static int do_zone_finish(struct btrfs_b
+ return 0;
+ }
+
+- if (block_group->reserved) {
++ if (block_group->reserved ||
++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
++ &block_group->runtime_flags)) {
+ spin_unlock(&block_group->lock);
+ btrfs_dec_block_group_ro(block_group);
+ return -EAGAIN;
+@@ -2276,7 +2282,10 @@ void btrfs_zoned_release_data_reloc_bg(s
+
+ /* All relocation extents are written. */
+ if (block_group->start + block_group->alloc_offset == logical + length) {
+- /* Now, release this block group for further allocations. */
++ /*
++ * Now, release this block group for further allocations and
++ * zone finish.
++ */
+ clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
+ &block_group->runtime_flags);
+ }
+@@ -2300,7 +2309,8 @@ int btrfs_zone_finish_one_bg(struct btrf
+
+ spin_lock(&block_group->lock);
+ if (block_group->reserved || block_group->alloc_offset == 0 ||
+- (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
++ (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
++ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+ spin_unlock(&block_group->lock);
+ continue;
+ }
--- /dev/null
+From 5b135b382a360f4c87cf8896d1465b0b07f10cb0 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Tue, 8 Aug 2023 01:12:40 +0900
+Subject: btrfs: zoned: re-enable metadata over-commit for zoned mode
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 5b135b382a360f4c87cf8896d1465b0b07f10cb0 upstream.
+
+Now that, we can re-enable metadata over-commit. As we moved the activation
+from the reservation time to the write time, we no longer need to ensure
+all the reserved bytes is properly activated.
+
+Without the metadata over-commit, it suffers from lower performance because
+it needs to flush the delalloc items more often and allocate more block
+groups. Re-enabling metadata over-commit will solve the issue.
+
+Fixes: 79417d040f4f ("btrfs: zoned: disable metadata overcommit for zoned")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/space-info.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 356638f54fef..d7e8cd4f140c 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -389,11 +389,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+ return 0;
+
+ used = btrfs_space_info_used(space_info, true);
+- if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
+- (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
+- avail = 0;
+- else
+- avail = calc_available_free_space(fs_info, space_info, flush);
++ avail = calc_available_free_space(fs_info, space_info, flush);
+
+ if (used + bytes < space_info->total_bytes + avail)
+ return 1;
+--
+2.42.0
+
--- /dev/null
+From e7d65e40ab5a5940785c5922f317602d0268caaf Mon Sep 17 00:00:00 2001
+From: Walter Chang <walter.chang@mediatek.com>
+Date: Mon, 17 Jul 2023 17:07:34 +0800
+Subject: clocksource/drivers/arm_arch_timer: Disable timer before programming CVAL
+
+From: Walter Chang <walter.chang@mediatek.com>
+
+commit e7d65e40ab5a5940785c5922f317602d0268caaf upstream.
+
+Due to the fact that the use of `writeq_relaxed()` to program CVAL is
+not guaranteed to be atomic, it is necessary to disable the timer before
+programming CVAL.
+
+However, if the MMIO timer is already enabled and has not yet expired,
+there is a possibility of unexpected behavior occurring: when the CPU
+enters the idle state during this period, and if the CPU's local event
+is earlier than the broadcast event, the following process occurs:
+
+tick_broadcast_enter()
+ tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER)
+ __tick_broadcast_oneshot_control()
+ ___tick_broadcast_oneshot_control()
+ tick_broadcast_set_event()
+ clockevents_program_event()
+ set_next_event_mem()
+
+During this process, the MMIO timer remains enabled while programming
+CVAL. To prevent such behavior, disable timer explicitly prior to
+programming CVAL.
+
+Fixes: 8b82c4f883a7 ("clocksource/drivers/arm_arch_timer: Move MMIO timer programming over to CVAL")
+Cc: stable@vger.kernel.org
+Signed-off-by: Walter Chang <walter.chang@mediatek.com>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
+Link: https://lore.kernel.org/r/20230717090735.19370-1-walter.chang@mediatek.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/clocksource/arm_arch_timer.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/clocksource/arm_arch_timer.c
++++ b/drivers/clocksource/arm_arch_timer.c
+@@ -792,6 +792,13 @@ static __always_inline void set_next_eve
+ u64 cnt;
+
+ ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
++
++ /* Timer must be disabled before programming CVAL */
++ if (ctrl & ARCH_TIMER_CTRL_ENABLE) {
++ ctrl &= ~ARCH_TIMER_CTRL_ENABLE;
++ arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
++ }
++
+ ctrl |= ARCH_TIMER_CTRL_ENABLE;
+ ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
+
--- /dev/null
+From c6ec8c83a29fb3aec3efa6fabbf5344498f57c7f Mon Sep 17 00:00:00 2001
+From: Hien Huynh <hien.huynh.px@renesas.com>
+Date: Thu, 6 Jul 2023 12:21:50 +0100
+Subject: dmaengine: sh: rz-dmac: Fix destination and source data size setting
+
+From: Hien Huynh <hien.huynh.px@renesas.com>
+
+commit c6ec8c83a29fb3aec3efa6fabbf5344498f57c7f upstream.
+
+Before setting DDS and SDS values, we need to clear its value first
+otherwise, we get incorrect results when we change/update the DMA bus
+width several times due to the 'OR' expression.
+
+Fixes: 5000d37042a6 ("dmaengine: sh: Add DMAC driver for RZ/G2L SoC")
+Cc: stable@kernel.org
+Signed-off-by: Hien Huynh <hien.huynh.px@renesas.com>
+Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Link: https://lore.kernel.org/r/20230706112150.198941-3-biju.das.jz@bp.renesas.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma/sh/rz-dmac.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/dma/sh/rz-dmac.c
++++ b/drivers/dma/sh/rz-dmac.c
+@@ -9,6 +9,7 @@
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+ */
+
++#include <linux/bitfield.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/dmaengine.h>
+ #include <linux/interrupt.h>
+@@ -145,8 +146,8 @@ struct rz_dmac {
+ #define CHCFG_REQD BIT(3)
+ #define CHCFG_SEL(bits) ((bits) & 0x07)
+ #define CHCFG_MEM_COPY (0x80400008)
+-#define CHCFG_FILL_DDS(a) (((a) << 16) & GENMASK(19, 16))
+-#define CHCFG_FILL_SDS(a) (((a) << 12) & GENMASK(15, 12))
++#define CHCFG_FILL_DDS_MASK GENMASK(19, 16)
++#define CHCFG_FILL_SDS_MASK GENMASK(15, 12)
+ #define CHCFG_FILL_TM(a) (((a) & BIT(5)) << 22)
+ #define CHCFG_FILL_AM(a) (((a) & GENMASK(4, 2)) << 6)
+ #define CHCFG_FILL_LVL(a) (((a) & BIT(1)) << 5)
+@@ -607,13 +608,15 @@ static int rz_dmac_config(struct dma_cha
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+- channel->chcfg |= CHCFG_FILL_DDS(val);
++ channel->chcfg &= ~CHCFG_FILL_DDS_MASK;
++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val);
+
+ val = rz_dmac_ds_to_val_mapping(config->src_addr_width);
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+- channel->chcfg |= CHCFG_FILL_SDS(val);
++ channel->chcfg &= ~CHCFG_FILL_SDS_MASK;
++ channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val);
+
+ return 0;
+ }
--- /dev/null
+From 68228da51c9a436872a4ef4b5a7692e29f7e5bc7 Mon Sep 17 00:00:00 2001
+From: Wang Jianjian <wangjianjian0@foxmail.com>
+Date: Thu, 3 Aug 2023 00:28:39 +0800
+Subject: ext4: add correct group descriptors and reserved GDT blocks to system zone
+
+From: Wang Jianjian <wangjianjian0@foxmail.com>
+
+commit 68228da51c9a436872a4ef4b5a7692e29f7e5bc7 upstream.
+
+When setup_system_zone, flex_bg is not initialized so it is always 1.
+Use a new helper function, ext4_num_base_meta_blocks() which does not
+depend on sbi->s_log_groups_per_flex being initialized.
+
+[ Squashed two patches in the Link URL's below together into a single
+ commit, which is simpler to review/understand. Also fix checkpatch
+ warnings. --TYT ]
+
+Cc: stable@kernel.org
+Signed-off-by: Wang Jianjian <wangjianjian0@foxmail.com>
+Link: https://lore.kernel.org/r/tencent_21AF0D446A9916ED5C51492CC6C9A0A77B05@qq.com
+Link: https://lore.kernel.org/r/tencent_D744D1450CC169AEA77FCF0A64719909ED05@qq.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/balloc.c | 15 +++++++++++----
+ fs/ext4/block_validity.c | 8 ++++----
+ fs/ext4/ext4.h | 2 ++
+ 3 files changed, 17 insertions(+), 8 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -913,11 +913,11 @@ unsigned long ext4_bg_num_gdb(struct sup
+ }
+
+ /*
+- * This function returns the number of file system metadata clusters at
++ * This function returns the number of file system metadata blocks at
+ * the beginning of a block group, including the reserved gdt blocks.
+ */
+-static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+- ext4_group_t block_group)
++unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
++ ext4_group_t block_group)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned num;
+@@ -935,8 +935,15 @@ static unsigned ext4_num_base_meta_clust
+ } else { /* For META_BG_BLOCK_GROUPS */
+ num += ext4_bg_num_gdb_meta(sb, block_group);
+ }
+- return EXT4_NUM_B2C(sbi, num);
++ return num;
+ }
++
++static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
++ ext4_group_t block_group)
++{
++ return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
++}
++
+ /**
+ * ext4_inode_to_goal_block - return a hint for block allocation
+ * @inode: inode for block allocation
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_
+ struct ext4_system_blocks *system_blks;
+ struct ext4_group_desc *gdp;
+ ext4_group_t i;
+- int flex_size = ext4_flex_bg_size(sbi);
+ int ret;
+
+ system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
+@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_
+ return -ENOMEM;
+
+ for (i=0; i < ngroups; i++) {
++ unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
++
+ cond_resched();
+- if (ext4_bg_has_super(sb, i) &&
+- ((i < 5) || ((i % flex_size) == 0))) {
++ if (meta_blks != 0) {
+ ret = add_system_zone(system_blks,
+ ext4_group_first_block_no(sb, i),
+- ext4_bg_num_gdb(sb, i) + 1, 0);
++ meta_blks, 0);
+ if (ret)
+ goto err;
+ }
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -3084,6 +3084,8 @@ extern const char *ext4_decode_error(str
+ extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+ ext4_group_t block_group,
+ unsigned int flags);
++extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
++ ext4_group_t block_group);
+
+ extern __printf(7, 8)
+ void __ext4_error(struct super_block *, const char *, unsigned int, bool,
--- /dev/null
+From 194505b55dd7899da114a4d47825204eefc0fff5 Mon Sep 17 00:00:00 2001
+From: Brian Foster <bfoster@redhat.com>
+Date: Thu, 10 Aug 2023 12:55:59 -0400
+Subject: ext4: drop dio overwrite only flag and associated warning
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 194505b55dd7899da114a4d47825204eefc0fff5 upstream.
+
+The commit referenced below opened up concurrent unaligned dio under
+shared locking for pure overwrites. In doing so, it enabled use of
+the IOMAP_DIO_OVERWRITE_ONLY flag and added a warning on unexpected
+-EAGAIN returns as an extra precaution, since ext4 does not retry
+writes in such cases. The flag itself is advisory in this case since
+ext4 checks for unaligned I/Os and uses appropriate locking up
+front, rather than on a retry in response to -EAGAIN.
+
+As it turns out, the warning check is susceptible to false positives
+because there are scenarios where -EAGAIN can be expected from lower
+layers without necessarily having IOCB_NOWAIT set on the iocb. For
+example, one instance of the warning has been seen where io_uring
+sets IOCB_HIPRI, which in turn results in REQ_POLLED|REQ_NOWAIT on
+the bio. This results in -EAGAIN if the block layer is unable to
+allocate a request, etc. [Note that there is an outstanding patch to
+untangle REQ_POLLED and REQ_NOWAIT such that the latter relies on
+IOCB_NOWAIT, which would also address this instance of the warning.]
+
+Another instance of the warning has been reproduced by syzbot. A dio
+write is interrupted down in __get_user_pages_locked() waiting on
+the mm lock and returns -EAGAIN up the stack. If the iomap dio
+iteration layer has made no progress on the write to this point,
+-EAGAIN returns up to the filesystem and triggers the warning.
+
+This use of the overwrite flag in ext4 is precautionary and
+half-baked. I.e., ext4 doesn't actually implement overwrite checking
+in the iomap callbacks when the flag is set, so the only extra
+verification it provides are i_size checks in the generic iomap dio
+layer. Combined with the tendency for false positives, the added
+verification is not worth the extra trouble. Remove the flag,
+associated warning, and update the comments to document when
+concurrent unaligned dio writes are allowed and why said flag is not
+used.
+
+Cc: stable@kernel.org
+Reported-by: syzbot+5050ad0fb47527b1808a@syzkaller.appspotmail.com
+Reported-by: Pengfei Xu <pengfei.xu@intel.com>
+Fixes: 310ee0902b8d ("ext4: allow concurrent unaligned dio overwrites")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230810165559.946222-1-bfoster@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/file.c | 25 ++++++++++---------------
+ 1 file changed, 10 insertions(+), 15 deletions(-)
+
+diff --git a/fs/ext4/file.c b/fs/ext4/file.c
+index 2071b1e4322c..e99cc17b6bd2 100644
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -476,6 +476,11 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
+ * required to change security info in file_modified(), for extending
+ * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
+ * extents (as partial block zeroing may be required).
++ *
++ * Note that unaligned writes are allowed under shared lock so long as
++ * they are pure overwrites. Otherwise, concurrent unaligned writes risk
++ * data corruption due to partial block zeroing in the dio layer, and so
++ * the I/O must occur exclusively.
+ */
+ if (*ilock_shared &&
+ ((!IS_NOSEC(inode) || *extend || !overwrite ||
+@@ -492,21 +497,12 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
+
+ /*
+ * Now that locking is settled, determine dio flags and exclusivity
+- * requirements. Unaligned writes are allowed under shared lock so long
+- * as they are pure overwrites. Set the iomap overwrite only flag as an
+- * added precaution in this case. Even though this is unnecessary, we
+- * can detect and warn on unexpected -EAGAIN if an unsafe unaligned
+- * write is ever submitted.
+- *
+- * Otherwise, concurrent unaligned writes risk data corruption due to
+- * partial block zeroing in the dio layer, and so the I/O must occur
+- * exclusively. The inode lock is already held exclusive if the write is
+- * non-overwrite or extending, so drain all outstanding dio and set the
+- * force wait dio flag.
++ * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
++ * behavior already. The inode lock is already held exclusive if the
++ * write is non-overwrite or extending, so drain all outstanding dio and
++ * set the force wait dio flag.
+ */
+- if (*ilock_shared && unaligned_io) {
+- *dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
+- } else if (!*ilock_shared && (unaligned_io || *extend)) {
++ if (!*ilock_shared && (unaligned_io || *extend)) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+@@ -608,7 +604,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ iomap_ops = &ext4_iomap_overwrite_ops;
+ ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
+ dio_flags, NULL, 0);
+- WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
+ if (ret == -ENOTBLK)
+ ret = 0;
+
+--
+2.42.0
+
--- /dev/null
+From bc056e7163ac7db945366de219745cf94f32a3e6 Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Mon, 24 Jul 2023 20:10:58 +0800
+Subject: ext4: fix BUG in ext4_mb_new_inode_pa() due to overflow
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit bc056e7163ac7db945366de219745cf94f32a3e6 upstream.
+
+When we calculate the end position of ext4_free_extent, this position may
+be exactly where ext4_lblk_t (i.e. uint) overflows. For example, if
+ac_g_ex.fe_logical is 4294965248 and ac_orig_goal_len is 2048, then the
+computed end is 0x100000000, which is 0. If ac->ac_o_ex.fe_logical is not
+the first case of adjusting the best extent, that is, new_bex_end > 0, the
+following BUG_ON will be triggered:
+
+=========================================================
+kernel BUG at fs/ext4/mballoc.c:5116!
+invalid opcode: 0000 [#1] PREEMPT SMP PTI
+CPU: 3 PID: 673 Comm: xfs_io Tainted: G E 6.5.0-rc1+ #279
+RIP: 0010:ext4_mb_new_inode_pa+0xc5/0x430
+Call Trace:
+ <TASK>
+ ext4_mb_use_best_found+0x203/0x2f0
+ ext4_mb_try_best_found+0x163/0x240
+ ext4_mb_regular_allocator+0x158/0x1550
+ ext4_mb_new_blocks+0x86a/0xe10
+ ext4_ext_map_blocks+0xb0c/0x13a0
+ ext4_map_blocks+0x2cd/0x8f0
+ ext4_iomap_begin+0x27b/0x400
+ iomap_iter+0x222/0x3d0
+ __iomap_dio_rw+0x243/0xcb0
+ iomap_dio_rw+0x16/0x80
+=========================================================
+
+A simple reproducer demonstrating the problem:
+
+ mkfs.ext4 -F /dev/sda -b 4096 100M
+ mount /dev/sda /tmp/test
+ fallocate -l1M /tmp/test/tmp
+ fallocate -l10M /tmp/test/file
+ fallocate -i -o 1M -l16777203M /tmp/test/file
+ fsstress -d /tmp/test -l 0 -n 100000 -p 8 &
+ sleep 10 && killall -9 fsstress
+ rm -f /tmp/test/tmp
+ xfs_io -c "open -ad /tmp/test/file" -c "pwrite -S 0xff 0 8192"
+
+We simply refactor the logic for adjusting the best extent by adding
+a temporary ext4_free_extent ex and use extent_logical_end() to avoid
+overflow, which also simplifies the code.
+
+Cc: stable@kernel.org # 6.4
+Fixes: 93cdf49f6eca ("ext4: Fix best extent lstart adjustment logic in ext4_mb_new_inode_pa()")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20230724121059.11834-3-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c | 31 ++++++++++++++-----------------
+ 1 file changed, 14 insertions(+), 17 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -5181,8 +5181,11 @@ ext4_mb_new_inode_pa(struct ext4_allocat
+ pa = ac->ac_pa;
+
+ if (ac->ac_b_ex.fe_len < ac->ac_orig_goal_len) {
+- int new_bex_start;
+- int new_bex_end;
++ struct ext4_free_extent ex = {
++ .fe_logical = ac->ac_g_ex.fe_logical,
++ .fe_len = ac->ac_orig_goal_len,
++ };
++ loff_t orig_goal_end = extent_logical_end(sbi, &ex);
+
+ /* we can't allocate as much as normalizer wants.
+ * so, found space must get proper lstart
+@@ -5201,29 +5204,23 @@ ext4_mb_new_inode_pa(struct ext4_allocat
+ * still cover original start
+ * 3. Else, keep the best ex at start of original request.
+ */
+- new_bex_end = ac->ac_g_ex.fe_logical +
+- EXT4_C2B(sbi, ac->ac_orig_goal_len);
+- new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+- if (ac->ac_o_ex.fe_logical >= new_bex_start)
+- goto adjust_bex;
++ ex.fe_len = ac->ac_b_ex.fe_len;
+
+- new_bex_start = ac->ac_g_ex.fe_logical;
+- new_bex_end =
+- new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+- if (ac->ac_o_ex.fe_logical < new_bex_end)
++ ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len);
++ if (ac->ac_o_ex.fe_logical >= ex.fe_logical)
+ goto adjust_bex;
+
+- new_bex_start = ac->ac_o_ex.fe_logical;
+- new_bex_end =
+- new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
++ ex.fe_logical = ac->ac_g_ex.fe_logical;
++ if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
++ goto adjust_bex;
+
++ ex.fe_logical = ac->ac_o_ex.fe_logical;
+ adjust_bex:
+- ac->ac_b_ex.fe_logical = new_bex_start;
++ ac->ac_b_ex.fe_logical = ex.fe_logical;
+
+ BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
+ BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
+- BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical +
+- EXT4_C2B(sbi, ac->ac_orig_goal_len)));
++ BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
+ }
+
+ pa->pa_lstart = ac->ac_b_ex.fe_logical;
--- /dev/null
+From 7ca4b085f430f3774c3838b3da569ceccd6a0177 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= <lhenriques@suse.de>
+Date: Thu, 3 Aug 2023 10:17:13 +0100
+Subject: ext4: fix memory leaks in ext4_fname_{setup_filename,prepare_lookup}
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Luís Henriques <lhenriques@suse.de>
+
+commit 7ca4b085f430f3774c3838b3da569ceccd6a0177 upstream.
+
+If the filename casefolding fails, we'll be leaking memory from the
+fscrypt_name struct, namely from the 'crypto_buf.name' member.
+
+Make sure we free it in the error path on both ext4_fname_setup_filename()
+and ext4_fname_prepare_lookup() functions.
+
+Cc: stable@kernel.org
+Fixes: 1ae98e295fa2 ("ext4: optimize match for casefolded encrypted dirs")
+Signed-off-by: Luís Henriques <lhenriques@suse.de>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20230803091713.13239-1-lhenriques@suse.de
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/crypto.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/crypto.c
++++ b/fs/ext4/crypto.c
+@@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct ino
+
+ #if IS_ENABLED(CONFIG_UNICODE)
+ err = ext4_fname_setup_ci_filename(dir, iname, fname);
++ if (err)
++ ext4_fname_free_filename(fname);
+ #endif
+ return err;
+ }
+@@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct ino
+
+ #if IS_ENABLED(CONFIG_UNICODE)
+ err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
++ if (err)
++ ext4_fname_free_filename(fname);
+ #endif
+ return err;
+ }
--- /dev/null
+From 768d612f79822d30a1e7d132a4d4b05337ce42ec Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Tue, 15 Aug 2023 15:08:08 +0800
+Subject: ext4: fix slab-use-after-free in ext4_es_insert_extent()
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit 768d612f79822d30a1e7d132a4d4b05337ce42ec upstream.
+
+Yikebaer reported an issue:
+==================================================================
+BUG: KASAN: slab-use-after-free in ext4_es_insert_extent+0xc68/0xcb0
+fs/ext4/extents_status.c:894
+Read of size 4 at addr ffff888112ecc1a4 by task syz-executor/8438
+
+CPU: 1 PID: 8438 Comm: syz-executor Not tainted 6.5.0-rc5 #1
+Call Trace:
+ [...]
+ kasan_report+0xba/0xf0 mm/kasan/report.c:588
+ ext4_es_insert_extent+0xc68/0xcb0 fs/ext4/extents_status.c:894
+ ext4_map_blocks+0x92a/0x16f0 fs/ext4/inode.c:680
+ ext4_alloc_file_blocks.isra.0+0x2df/0xb70 fs/ext4/extents.c:4462
+ ext4_zero_range fs/ext4/extents.c:4622 [inline]
+ ext4_fallocate+0x251c/0x3ce0 fs/ext4/extents.c:4721
+ [...]
+
+Allocated by task 8438:
+ [...]
+ kmem_cache_zalloc include/linux/slab.h:693 [inline]
+ __es_alloc_extent fs/ext4/extents_status.c:469 [inline]
+ ext4_es_insert_extent+0x672/0xcb0 fs/ext4/extents_status.c:873
+ ext4_map_blocks+0x92a/0x16f0 fs/ext4/inode.c:680
+ ext4_alloc_file_blocks.isra.0+0x2df/0xb70 fs/ext4/extents.c:4462
+ ext4_zero_range fs/ext4/extents.c:4622 [inline]
+ ext4_fallocate+0x251c/0x3ce0 fs/ext4/extents.c:4721
+ [...]
+
+Freed by task 8438:
+ [...]
+ kmem_cache_free+0xec/0x490 mm/slub.c:3823
+ ext4_es_try_to_merge_right fs/ext4/extents_status.c:593 [inline]
+ __es_insert_extent+0x9f4/0x1440 fs/ext4/extents_status.c:802
+ ext4_es_insert_extent+0x2ca/0xcb0 fs/ext4/extents_status.c:882
+ ext4_map_blocks+0x92a/0x16f0 fs/ext4/inode.c:680
+ ext4_alloc_file_blocks.isra.0+0x2df/0xb70 fs/ext4/extents.c:4462
+ ext4_zero_range fs/ext4/extents.c:4622 [inline]
+ ext4_fallocate+0x251c/0x3ce0 fs/ext4/extents.c:4721
+ [...]
+==================================================================
+
+The flow of issue triggering is as follows:
+1. remove es
+ raw es es removed es1
+|-------------------| -> |----|.......|------|
+
+2. insert es
+ es insert es1 merge with es es1 merge with es and free es1
+|----|.......|------| -> |------------|------| -> |-------------------|
+
+es merges with newes, then merges with es1, frees es1, then determines
+if es1->es_len is 0 and triggers a UAF.
+
+The code flow is as follows:
+ext4_es_insert_extent
+ es1 = __es_alloc_extent(true);
+ es2 = __es_alloc_extent(true);
+ __es_remove_extent(inode, lblk, end, NULL, es1)
+ __es_insert_extent(inode, &newes, es1) ---> insert es1 to es tree
+ __es_insert_extent(inode, &newes, es2)
+ ext4_es_try_to_merge_right
+ ext4_es_free_extent(inode, es1) ---> es1 is freed
+ if (es1 && !es1->es_len)
+ // Trigger UAF by determining if es1 is used.
+
+We determine whether es1 or es2 is used immediately after calling
+__es_remove_extent() or __es_insert_extent() to avoid triggering a
+UAF if es1 or es2 is freed.
+
+Reported-by: Yikebaer Aizezi <yikebaer61@gmail.com>
+Closes: https://lore.kernel.org/lkml/CALcu4raD4h9coiyEBL4Bm0zjDwxC2CyPiTwsP3zFuhot6y9Beg@mail.gmail.com
+Fixes: 2a69c450083d ("ext4: using nofail preallocation in ext4_es_insert_extent()")
+Cc: stable@kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230815070808.3377171-1-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/extents_status.c | 44 +++++++++++++++++++++++++++-------------
+ 1 file changed, 30 insertions(+), 14 deletions(-)
+
+diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
+index 9b5b8951afb4..6f7de14c0fa8 100644
+--- a/fs/ext4/extents_status.c
++++ b/fs/ext4/extents_status.c
+@@ -878,23 +878,29 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+ err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
+ if (err1 != 0)
+ goto error;
++ /* Free preallocated extent if it didn't get used. */
++ if (es1) {
++ if (!es1->es_len)
++ __es_free_extent(es1);
++ es1 = NULL;
++ }
+
+ err2 = __es_insert_extent(inode, &newes, es2);
+ if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
+ err2 = 0;
+ if (err2 != 0)
+ goto error;
++ /* Free preallocated extent if it didn't get used. */
++ if (es2) {
++ if (!es2->es_len)
++ __es_free_extent(es2);
++ es2 = NULL;
++ }
+
+ if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
+ (status & EXTENT_STATUS_WRITTEN ||
+ status & EXTENT_STATUS_UNWRITTEN))
+ __revise_pending(inode, lblk, len);
+-
+- /* es is pre-allocated but not used, free it. */
+- if (es1 && !es1->es_len)
+- __es_free_extent(es1);
+- if (es2 && !es2->es_len)
+- __es_free_extent(es2);
+ error:
+ write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err1 || err2)
+@@ -1491,8 +1497,12 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
+ */
+ write_lock(&EXT4_I(inode)->i_es_lock);
+ err = __es_remove_extent(inode, lblk, end, &reserved, es);
+- if (es && !es->es_len)
+- __es_free_extent(es);
++ /* Free preallocated extent if it didn't get used. */
++ if (es) {
++ if (!es->es_len)
++ __es_free_extent(es);
++ es = NULL;
++ }
+ write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err)
+ goto retry;
+@@ -2047,19 +2057,25 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
+ err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
+ if (err1 != 0)
+ goto error;
++ /* Free preallocated extent if it didn't get used. */
++ if (es1) {
++ if (!es1->es_len)
++ __es_free_extent(es1);
++ es1 = NULL;
++ }
+
+ err2 = __es_insert_extent(inode, &newes, es2);
+ if (err2 != 0)
+ goto error;
++ /* Free preallocated extent if it didn't get used. */
++ if (es2) {
++ if (!es2->es_len)
++ __es_free_extent(es2);
++ es2 = NULL;
++ }
+
+ if (allocated)
+ __insert_pending(inode, lblk);
+-
+- /* es is pre-allocated but not used, free it. */
+- if (es1 && !es1->es_len)
+- __es_free_extent(es1);
+- if (es2 && !es2->es_len)
+- __es_free_extent(es2);
+ error:
+ write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err1 || err2)
+--
+2.42.0
+
--- /dev/null
+From 5c13e2388bf3426fd69a89eb46e50469e9624e56 Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Fri, 18 Aug 2023 11:34:32 -0700
+Subject: f2fs: avoid false alarm of circular locking
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 5c13e2388bf3426fd69a89eb46e50469e9624e56 upstream.
+
+======================================================
+WARNING: possible circular locking dependency detected
+6.5.0-rc5-syzkaller-00353-gae545c3283dc #0 Not tainted
+------------------------------------------------------
+syz-executor273/5027 is trying to acquire lock:
+ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_down_write fs/f2fs/f2fs.h:2133 [inline]
+ffff888077fe1fb0 (&fi->i_sem){+.+.}-{3:3}, at: f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644
+
+but task is already holding lock:
+ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_down_read fs/f2fs/f2fs.h:2108 [inline]
+ffff888077fe07c8 (&fi->i_xattr_sem){.+.+}-{3:3}, at: f2fs_add_dentry+0x92/0x230 fs/f2fs/dir.c:783
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (&fi->i_xattr_sem){.+.+}-{3:3}:
+ down_read+0x9c/0x470 kernel/locking/rwsem.c:1520
+ f2fs_down_read fs/f2fs/f2fs.h:2108 [inline]
+ f2fs_getxattr+0xb1e/0x12c0 fs/f2fs/xattr.c:532
+ __f2fs_get_acl+0x5a/0x900 fs/f2fs/acl.c:179
+ f2fs_acl_create fs/f2fs/acl.c:377 [inline]
+ f2fs_init_acl+0x15c/0xb30 fs/f2fs/acl.c:420
+ f2fs_init_inode_metadata+0x159/0x1290 fs/f2fs/dir.c:558
+ f2fs_add_regular_entry+0x79e/0xb90 fs/f2fs/dir.c:740
+ f2fs_add_dentry+0x1de/0x230 fs/f2fs/dir.c:788
+ f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827
+ f2fs_add_link fs/f2fs/f2fs.h:3554 [inline]
+ f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781
+ vfs_mkdir+0x532/0x7e0 fs/namei.c:4117
+ do_mkdirat+0x2a9/0x330 fs/namei.c:4140
+ __do_sys_mkdir fs/namei.c:4160 [inline]
+ __se_sys_mkdir fs/namei.c:4158 [inline]
+ __x64_sys_mkdir+0xf2/0x140 fs/namei.c:4158
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+-> #0 (&fi->i_sem){+.+.}-{3:3}:
+ check_prev_add kernel/locking/lockdep.c:3142 [inline]
+ check_prevs_add kernel/locking/lockdep.c:3261 [inline]
+ validate_chain kernel/locking/lockdep.c:3876 [inline]
+ __lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144
+ lock_acquire kernel/locking/lockdep.c:5761 [inline]
+ lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726
+ down_write+0x93/0x200 kernel/locking/rwsem.c:1573
+ f2fs_down_write fs/f2fs/f2fs.h:2133 [inline]
+ f2fs_add_inline_entry+0x300/0x6f0 fs/f2fs/inline.c:644
+ f2fs_add_dentry+0xa6/0x230 fs/f2fs/dir.c:784
+ f2fs_do_add_link+0x190/0x280 fs/f2fs/dir.c:827
+ f2fs_add_link fs/f2fs/f2fs.h:3554 [inline]
+ f2fs_mkdir+0x377/0x620 fs/f2fs/namei.c:781
+ vfs_mkdir+0x532/0x7e0 fs/namei.c:4117
+ ovl_do_mkdir fs/overlayfs/overlayfs.h:196 [inline]
+ ovl_mkdir_real+0xb5/0x370 fs/overlayfs/dir.c:146
+ ovl_workdir_create+0x3de/0x820 fs/overlayfs/super.c:309
+ ovl_make_workdir fs/overlayfs/super.c:711 [inline]
+ ovl_get_workdir fs/overlayfs/super.c:864 [inline]
+ ovl_fill_super+0xdab/0x6180 fs/overlayfs/super.c:1400
+ vfs_get_super+0xf9/0x290 fs/super.c:1152
+ vfs_get_tree+0x88/0x350 fs/super.c:1519
+ do_new_mount fs/namespace.c:3335 [inline]
+ path_mount+0x1492/0x1ed0 fs/namespace.c:3662
+ do_mount fs/namespace.c:3675 [inline]
+ __do_sys_mount fs/namespace.c:3884 [inline]
+ __se_sys_mount fs/namespace.c:3861 [inline]
+ __x64_sys_mount+0x293/0x310 fs/namespace.c:3861
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ rlock(&fi->i_xattr_sem);
+ lock(&fi->i_sem);
+ lock(&fi->i_xattr_sem);
+ lock(&fi->i_sem);
+
+Cc: <stable@vger.kernel.org>
+Reported-and-tested-by: syzbot+e5600587fa9cbf8e3826@syzkaller.appspotmail.com
+Fixes: 5eda1ad1aaff "f2fs: fix deadlock in i_xattr_sem and inode page lock"
+Tested-by: Guenter Roeck <linux@roeck-us.net>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/f2fs.h | 24 +++++++++++++++---------
+ fs/f2fs/inline.c | 3 ++-
+ 2 files changed, 17 insertions(+), 10 deletions(-)
+
+--- a/fs/f2fs/f2fs.h
++++ b/fs/f2fs/f2fs.h
+@@ -2122,15 +2122,6 @@ static inline int f2fs_down_read_trylock
+ return down_read_trylock(&sem->internal_rwsem);
+ }
+
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass)
+-{
+- down_read_nested(&sem->internal_rwsem, subclass);
+-}
+-#else
+-#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem)
+-#endif
+-
+ static inline void f2fs_up_read(struct f2fs_rwsem *sem)
+ {
+ up_read(&sem->internal_rwsem);
+@@ -2141,6 +2132,21 @@ static inline void f2fs_down_write(struc
+ down_write(&sem->internal_rwsem);
+ }
+
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass)
++{
++ down_read_nested(&sem->internal_rwsem, subclass);
++}
++
++static inline void f2fs_down_write_nested(struct f2fs_rwsem *sem, int subclass)
++{
++ down_write_nested(&sem->internal_rwsem, subclass);
++}
++#else
++#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem)
++#define f2fs_down_write_nested(sem, subclass) f2fs_down_write(sem)
++#endif
++
+ static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem)
+ {
+ return down_write_trylock(&sem->internal_rwsem);
+--- a/fs/f2fs/inline.c
++++ b/fs/f2fs/inline.c
+@@ -641,7 +641,8 @@ int f2fs_add_inline_entry(struct inode *
+ }
+
+ if (inode) {
+- f2fs_down_write(&F2FS_I(inode)->i_sem);
++ f2fs_down_write_nested(&F2FS_I(inode)->i_sem,
++ SINGLE_DEPTH_NESTING);
+ page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
--- /dev/null
+From a3ab55746612247ce3dcaac6de66f5ffc055b9df Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Fri, 7 Jul 2023 07:03:13 -0700
+Subject: f2fs: flush inode if atomic file is aborted
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit a3ab55746612247ce3dcaac6de66f5ffc055b9df upstream.
+
+Let's flush the inode being aborted atomic operation to avoid stale dirty
+inode during eviction in this call stack:
+
+ f2fs_mark_inode_dirty_sync+0x22/0x40 [f2fs]
+ f2fs_abort_atomic_write+0xc4/0xf0 [f2fs]
+ f2fs_evict_inode+0x3f/0x690 [f2fs]
+ ? sugov_start+0x140/0x140
+ evict+0xc3/0x1c0
+ evict_inodes+0x17b/0x210
+ generic_shutdown_super+0x32/0x120
+ kill_block_super+0x21/0x50
+ deactivate_locked_super+0x31/0x90
+ cleanup_mnt+0x100/0x160
+ task_work_run+0x59/0x90
+ do_exit+0x33b/0xa50
+ do_group_exit+0x2d/0x80
+ __x64_sys_exit_group+0x14/0x20
+ do_syscall_64+0x3b/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+This triggers f2fs_bug_on() in f2fs_evict_inode:
+ f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
+
+This fixes the syzbot report:
+
+loop0: detected capacity change from 0 to 131072
+F2FS-fs (loop0): invalid crc value
+F2FS-fs (loop0): Found nat_bits in checkpoint
+F2FS-fs (loop0): Mounted with checkpoint version = 48b305e4
+------------[ cut here ]------------
+kernel BUG at fs/f2fs/inode.c:869!
+invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+CPU: 0 PID: 5014 Comm: syz-executor220 Not tainted 6.4.0-syzkaller-11479-g6cd06ab12d1a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023
+RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869
+Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc
+RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
+RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007
+RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000
+R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50
+FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0
+Call Trace:
+ <TASK>
+ evict+0x2ed/0x6b0 fs/inode.c:665
+ dispose_list+0x117/0x1e0 fs/inode.c:698
+ evict_inodes+0x345/0x440 fs/inode.c:748
+ generic_shutdown_super+0xaf/0x480 fs/super.c:478
+ kill_block_super+0x64/0xb0 fs/super.c:1417
+ kill_f2fs_super+0x2af/0x3c0 fs/f2fs/super.c:4704
+ deactivate_locked_super+0x98/0x160 fs/super.c:330
+ deactivate_super+0xb1/0xd0 fs/super.c:361
+ cleanup_mnt+0x2ae/0x3d0 fs/namespace.c:1254
+ task_work_run+0x16f/0x270 kernel/task_work.c:179
+ exit_task_work include/linux/task_work.h:38 [inline]
+ do_exit+0xa9a/0x29a0 kernel/exit.c:874
+ do_group_exit+0xd4/0x2a0 kernel/exit.c:1024
+ __do_sys_exit_group kernel/exit.c:1035 [inline]
+ __se_sys_exit_group kernel/exit.c:1033 [inline]
+ __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:1033
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f309be71a09
+Code: Unable to access opcode bytes at 0x7f309be719df.
+RSP: 002b:00007fff171df518 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
+RAX: ffffffffffffffda RBX: 00007f309bef7330 RCX: 00007f309be71a09
+RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000001
+RBP: 0000000000000001 R08: ffffffffffffffc0 R09: 00007f309bef1e40
+R10: 0000000000010600 R11: 0000000000000246 R12: 00007f309bef7330
+R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000001
+ </TASK>
+Modules linked in:
+---[ end trace 0000000000000000 ]---
+RIP: 0010:f2fs_evict_inode+0x172d/0x1e00 fs/f2fs/inode.c:869
+Code: ff df 48 c1 ea 03 80 3c 02 00 0f 85 6a 06 00 00 8b 75 40 ba 01 00 00 00 4c 89 e7 e8 6d ce 06 00 e9 aa fc ff ff e8 63 22 e2 fd <0f> 0b e8 5c 22 e2 fd 48 c7 c0 a8 3a 18 8d 48 ba 00 00 00 00 00 fc
+RSP: 0018:ffffc90003a6fa00 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
+RDX: ffff8880273b8000 RSI: ffffffff83a2bd0d RDI: 0000000000000007
+RBP: ffff888077db91b0 R08: 0000000000000007 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000001 R12: ffff888029a3c000
+R13: ffff888077db9660 R14: ffff888029a3c0b8 R15: ffff888077db9c50
+FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f1909bb9000 CR3: 00000000276a9000 CR4: 0000000000350ef0
+
+Cc: <stable@vger.kernel.org>
+Reported-and-tested-by: syzbot+e1246909d526a9d470fa@syzkaller.appspotmail.com
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/segment.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/f2fs/segment.c
++++ b/fs/f2fs/segment.c
+@@ -205,6 +205,8 @@ void f2fs_abort_atomic_write(struct inod
+ f2fs_i_size_write(inode, fi->original_i_size);
+ fi->original_i_size = 0;
+ }
++ /* avoid stale dirty inode during eviction */
++ sync_inode_metadata(inode, 0);
+ }
+
+ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
--- /dev/null
+From d2d9bb3b6d2fbccb5b33d3a85a2830971625a4ea Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Thu, 19 Jan 2023 10:47:00 -0800
+Subject: f2fs: get out of a repeat loop when getting a locked data page
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit d2d9bb3b6d2fbccb5b33d3a85a2830971625a4ea upstream.
+
+https://bugzilla.kernel.org/show_bug.cgi?id=216050
+
+Somehow we're getting a page which has a different mapping.
+Let's avoid the infinite loop.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/data.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/fs/f2fs/data.c
++++ b/fs/f2fs/data.c
+@@ -1389,18 +1389,14 @@ struct page *f2fs_get_lock_data_page(str
+ {
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+-repeat:
++
+ page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL);
+ if (IS_ERR(page))
+ return page;
+
+ /* wait for read completion */
+ lock_page(page);
+- if (unlikely(page->mapping != mapping)) {
+- f2fs_put_page(page, 1);
+- goto repeat;
+- }
+- if (unlikely(!PageUptodate(page))) {
++ if (unlikely(page->mapping != mapping || !PageUptodate(page))) {
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
+ }
--- /dev/null
+From b8bd342d50cbf606666488488f9fea374aceb2d5 Mon Sep 17 00:00:00 2001
+From: ruanmeisi <ruan.meisi@zte.com.cn>
+Date: Tue, 25 Apr 2023 19:13:54 +0800
+Subject: fuse: nlookup missing decrement in fuse_direntplus_link
+
+From: ruanmeisi <ruan.meisi@zte.com.cn>
+
+commit b8bd342d50cbf606666488488f9fea374aceb2d5 upstream.
+
+During our debugging of glusterfs, we found an Assertion failed error:
+inode_lookup >= nlookup, which was caused by the nlookup value in the
+kernel being greater than that in the FUSE file system.
+
+The issue was introduced by fuse_direntplus_link, where in the function,
+fuse_iget increments nlookup, and if d_splice_alias returns failure,
+fuse_direntplus_link returns failure without decrementing nlookup
+https://github.com/gluster/glusterfs/pull/4081
+
+Signed-off-by: ruanmeisi <ruan.meisi@zte.com.cn>
+Fixes: 0b05b18381ee ("fuse: implement NFS-like readdirplus support")
+Cc: <stable@vger.kernel.org> # v3.9
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/readdir.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/fuse/readdir.c
++++ b/fs/fuse/readdir.c
+@@ -243,8 +243,16 @@ retry:
+ dput(dentry);
+ dentry = alias;
+ }
+- if (IS_ERR(dentry))
++ if (IS_ERR(dentry)) {
++ if (!IS_ERR(inode)) {
++ struct fuse_inode *fi = get_fuse_inode(inode);
++
++ spin_lock(&fi->lock);
++ fi->nlookup--;
++ spin_unlock(&fi->lock);
++ }
+ return PTR_ERR(dentry);
++ }
+ }
+ if (fc->readdirplus_auto)
+ set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
--- /dev/null
+From 23316be8a9d450f33a21f1efe7d89570becbec58 Mon Sep 17 00:00:00 2001
+From: Christian Marangi <ansuelsmth@gmail.com>
+Date: Sun, 16 Jul 2023 04:28:04 +0200
+Subject: hwspinlock: qcom: add missing regmap config for SFPB MMIO implementation
+
+From: Christian Marangi <ansuelsmth@gmail.com>
+
+commit 23316be8a9d450f33a21f1efe7d89570becbec58 upstream.
+
+Commit 5d4753f741d8 ("hwspinlock: qcom: add support for MMIO on older
+SoCs") introduced and made regmap_config mandatory in the of_data struct
+but didn't add the regmap_config for sfpb based devices.
+
+SFPB based devices can both use the legacy syscon way to probe or the
+new MMIO way and currently device that use the MMIO way are broken as
+they lack the definition of the now required regmap_config and always
+return -EINVAL (and indirectly makes fail probing everything that
+depends on it, smem, nandc with smem-parser...)
+
+Fix this by correctly adding the missing regmap_config and restore
+function of hwspinlock on SFPB based devices with MMIO implementation.
+
+Cc: stable@vger.kernel.org
+Fixes: 5d4753f741d8 ("hwspinlock: qcom: add support for MMIO on older SoCs")
+Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
+Link: https://lore.kernel.org/r/20230716022804.21239-1-ansuelsmth@gmail.com
+Signed-off-by: Bjorn Andersson <andersson@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hwspinlock/qcom_hwspinlock.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/drivers/hwspinlock/qcom_hwspinlock.c
++++ b/drivers/hwspinlock/qcom_hwspinlock.c
+@@ -69,9 +69,18 @@ static const struct hwspinlock_ops qcom_
+ .unlock = qcom_hwspinlock_unlock,
+ };
+
++static const struct regmap_config sfpb_mutex_config = {
++ .reg_bits = 32,
++ .reg_stride = 4,
++ .val_bits = 32,
++ .max_register = 0x100,
++ .fast_io = true,
++};
++
+ static const struct qcom_hwspinlock_of_data of_sfpb_mutex = {
+ .offset = 0x4,
+ .stride = 0x4,
++ .regmap_config = &sfpb_mutex_config,
+ };
+
+ static const struct regmap_config tcsr_msm8226_mutex_config = {
--- /dev/null
+From 590a809ff743e7bd890ba5fb36bc38e20a36de53 Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Fri, 14 Jul 2023 10:55:27 +0800
+Subject: jbd2: check 'jh->b_transaction' before removing it from checkpoint
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 590a809ff743e7bd890ba5fb36bc38e20a36de53 upstream.
+
+Following process will corrupt ext4 image:
+Step 1:
+jbd2_journal_commit_transaction
+ __jbd2_journal_insert_checkpoint(jh, commit_transaction)
+ // Put jh into trans1->t_checkpoint_list
+ journal->j_checkpoint_transactions = commit_transaction
+ // Put trans1 into journal->j_checkpoint_transactions
+
+Step 2:
+do_get_write_access
+ test_clear_buffer_dirty(bh) // clear buffer dirty,set jbd dirty
+ __jbd2_journal_file_buffer(jh, transaction) // jh belongs to trans2
+
+Step 3:
+drop_cache
+ journal_shrink_one_cp_list
+ jbd2_journal_try_remove_checkpoint
+ if (!trylock_buffer(bh)) // lock bh, true
+ if (buffer_dirty(bh)) // buffer is not dirty
+ __jbd2_journal_remove_checkpoint(jh)
+ // remove jh from trans1->t_checkpoint_list
+
+Step 4:
+jbd2_log_do_checkpoint
+ trans1 = journal->j_checkpoint_transactions
+ // jh is not in trans1->t_checkpoint_list
+ jbd2_cleanup_journal_tail(journal) // trans1 is done
+
+Step 5: Power cut, trans2 is not committed, jh is lost in next mounting.
+
+Fix it by checking 'jh->b_transaction' before remove it from checkpoint.
+
+Cc: stable@kernel.org
+Fixes: 46f881b5b175 ("jbd2: fix a race when checking checkpoint buffer busy")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230714025528.564988-3-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/checkpoint.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -639,6 +639,8 @@ int jbd2_journal_try_remove_checkpoint(s
+ {
+ struct buffer_head *bh = jh2bh(jh);
+
++ if (jh->b_transaction)
++ return -EBUSY;
+ if (!trylock_buffer(bh))
+ return -EBUSY;
+ if (buffer_dirty(bh)) {
--- /dev/null
+From 2dfba3bb40ad8536b9fa802364f2d40da31aa88e Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Mon, 26 Jun 2023 15:33:22 +0800
+Subject: jbd2: correct the end of the journal recovery scan range
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 2dfba3bb40ad8536b9fa802364f2d40da31aa88e upstream.
+
+We got a filesystem inconsistency issue below while running generic/475
+I/O failure pressure test with fast_commit feature enabled.
+
+ Symlink /p3/d3/d1c/d6c/dd6/dce/l101 (inode #132605) is invalid.
+
+If fast_commit feature is enabled, a special fast_commit journal area is
+appended to the end of the normal journal area. The journal->j_last
+point to the first unused block behind the normal journal area instead
+of the whole log area, and the journal->j_fc_last point to the first
+unused block behind the fast_commit journal area. While doing journal
+recovery, do_one_pass(PASS_SCAN) should first scan the normal journal
+area and turn around to the first block once it meet journal->j_last,
+but the wrap() macro misuse the journal->j_fc_last, so the recovering
+could not read the next magic block (commit block perhaps) and would end
+early mistakenly and missing tN and every transaction after it in the
+following example. Finally, it could lead to filesystem inconsistency.
+
+ | normal journal area | fast commit area |
+ +-------------------------------------------------+------------------+
+ | tN(rere) | tN+1 |~| tN-x |...| tN-1 | tN(front) | .... |
+ +-------------------------------------------------+------------------+
+ / / /
+ start journal->j_last journal->j_fc_last
+
+This patch fix it by use the correct ending journal->j_last.
+
+Fixes: 5b849b5f96b4 ("jbd2: fast commit recovery path")
+Cc: stable@kernel.org
+Reported-by: Theodore Ts'o <tytso@mit.edu>
+Link: https://lore.kernel.org/linux-ext4/20230613043120.GB1584772@mit.edu/
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230626073322.3956567-1-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/recovery.c | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+--- a/fs/jbd2/recovery.c
++++ b/fs/jbd2/recovery.c
+@@ -230,12 +230,8 @@ static int count_tags(journal_t *journal
+ /* Make sure we wrap around the log correctly! */
+ #define wrap(journal, var) \
+ do { \
+- unsigned long _wrap_last = \
+- jbd2_has_feature_fast_commit(journal) ? \
+- (journal)->j_fc_last : (journal)->j_last; \
+- \
+- if (var >= _wrap_last) \
+- var -= (_wrap_last - (journal)->j_first); \
++ if (var >= (journal)->j_last) \
++ var -= ((journal)->j_last - (journal)->j_first); \
+ } while (0)
+
+ static int fc_do_one_pass(journal_t *journal,
+@@ -524,9 +520,7 @@ static int do_one_pass(journal_t *journa
+ break;
+
+ jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+- next_commit_ID, next_log_block,
+- jbd2_has_feature_fast_commit(journal) ?
+- journal->j_fc_last : journal->j_last);
++ next_commit_ID, next_log_block, journal->j_last);
+
+ /* Skip over each chunk of the transaction looking
+ * either the next descriptor block or the final commit
--- /dev/null
+From 373ac521799d9e97061515aca6ec6621789036bb Mon Sep 17 00:00:00 2001
+From: Zhang Yi <yi.zhang@huawei.com>
+Date: Fri, 14 Jul 2023 10:55:26 +0800
+Subject: jbd2: fix checkpoint cleanup performance regression
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+commit 373ac521799d9e97061515aca6ec6621789036bb upstream.
+
+journal_clean_one_cp_list() has been merged into
+journal_shrink_one_cp_list(), but do chekpoint buffer cleanup from the
+committing process is just a best effort, it should stop scan once it
+meet a busy buffer, or else it will cause a lot of invalid buffer scan
+and checks. We catch a performance regression when doing fs_mark tests
+below.
+
+Test cmd:
+ ./fs_mark -d scratch -s 1024 -n 10000 -t 1 -D 100 -N 100
+
+Before merging checkpoint buffer cleanup:
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 8304.9 49033
+
+After merging checkpoint buffer cleanup:
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 7649.0 50012
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 2107.1 50871
+
+After merging checkpoint buffer cleanup, the total loop count in
+journal_shrink_one_cp_list() could be up to 6,261,600+ (50,000+ ~
+100,000+ in general), most of them are invalid. This patch fix it
+through passing 'shrink_type' into journal_shrink_one_cp_list() and add
+a new 'SHRINK_BUSY_STOP' to indicate it should stop once meet a busy
+buffer. After fix, the loop count descending back to 10,000+.
+
+After this fix:
+ FSUse% Count Size Files/sec App Overhead
+ 95 10000 1024 8558.4 49109
+
+Cc: stable@kernel.org
+Fixes: b98dba273a0e ("jbd2: remove journal_clean_one_cp_list()")
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230714025528.564988-2-yi.zhang@huaweicloud.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/checkpoint.c | 20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -349,6 +349,8 @@ int jbd2_cleanup_journal_tail(journal_t
+
+ /* Checkpoint list management */
+
++enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
++
+ /*
+ * journal_shrink_one_cp_list
+ *
+@@ -360,7 +362,8 @@ int jbd2_cleanup_journal_tail(journal_t
+ * Called with j_list_lock held.
+ */
+ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
+- bool destroy, bool *released)
++ enum shrink_type type,
++ bool *released)
+ {
+ struct journal_head *last_jh;
+ struct journal_head *next_jh = jh;
+@@ -376,12 +379,15 @@ static unsigned long journal_shrink_one_
+ jh = next_jh;
+ next_jh = jh->b_cpnext;
+
+- if (destroy) {
++ if (type == SHRINK_DESTROY) {
+ ret = __jbd2_journal_remove_checkpoint(jh);
+ } else {
+ ret = jbd2_journal_try_remove_checkpoint(jh);
+- if (ret < 0)
+- continue;
++ if (ret < 0) {
++ if (type == SHRINK_BUSY_SKIP)
++ continue;
++ break;
++ }
+ }
+
+ nr_freed++;
+@@ -445,7 +451,7 @@ again:
+ tid = transaction->t_tid;
+
+ freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+- false, &released);
++ SHRINK_BUSY_SKIP, &released);
+ nr_freed += freed;
+ (*nr_to_scan) -= min(*nr_to_scan, freed);
+ if (*nr_to_scan == 0)
+@@ -485,19 +491,21 @@ out:
+ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
+ {
+ transaction_t *transaction, *last_transaction, *next_transaction;
++ enum shrink_type type;
+ bool released;
+
+ transaction = journal->j_checkpoint_transactions;
+ if (!transaction)
+ return;
+
++ type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
+ last_transaction = transaction->t_cpprev;
+ next_transaction = transaction;
+ do {
+ transaction = next_transaction;
+ next_transaction = transaction->t_cpnext;
+ journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+- destroy, &released);
++ type, &released);
+ /*
+ * This function only frees up some memory if possible so we
+ * dont have an obligation to finish processing. Bail out if
--- /dev/null
+From 92382d744176f230101d54f5c017bccd62770f01 Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Mon, 7 Aug 2023 08:36:28 -0700
+Subject: lib: test_scanf: Add explicit type cast to result initialization in test_number_prefix()
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit 92382d744176f230101d54f5c017bccd62770f01 upstream.
+
+A recent change in clang allows it to consider more expressions as
+compile time constants, which causes it to point out an implicit
+conversion in the scanf tests:
+
+ lib/test_scanf.c:661:2: warning: implicit conversion from 'int' to 'unsigned char' changes value from -168 to 88 [-Wconstant-conversion]
+ 661 | test_number_prefix(unsigned char, "0xA7", "%2hhx%hhx", 0, 0xa7, 2, check_uchar);
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ lib/test_scanf.c:609:29: note: expanded from macro 'test_number_prefix'
+ 609 | T result[2] = {~expect[0], ~expect[1]}; \
+ | ~ ^~~~~~~~~~
+ 1 warning generated.
+
+The result of the bitwise negation is the type of the operand after
+going through the integer promotion rules, so this truncation is
+expected but harmless, as the initial values in the result array get
+overwritten by _test() anyways. Add an explicit cast to the expected
+type in test_number_prefix() to silence the warning. There is no
+functional change, as all the tests still pass with GCC 13.1.0 and clang
+18.0.0.
+
+Cc: stable@vger.kernel.org
+Link: https://github.com/ClangBuiltLinux/linuxq/issues/1899
+Link: https://github.com/llvm/llvm-project/commit/610ec954e1f81c0e8fcadedcd25afe643f5a094e
+Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Reviewed-by: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Link: https://lore.kernel.org/r/20230807-test_scanf-wconstant-conversion-v2-1-839ca39083e1@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/test_scanf.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/lib/test_scanf.c
++++ b/lib/test_scanf.c
+@@ -606,7 +606,7 @@ static void __init numbers_slice(void)
+ #define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn) \
+ do { \
+ const T expect[2] = { expect0, expect1 }; \
+- T result[2] = {~expect[0], ~expect[1]}; \
++ T result[2] = { (T)~expect[0], (T)~expect[1] }; \
+ \
+ _test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]); \
+ } while (0)
--- /dev/null
+From 6f0df8e16eb543167f2929cb756e695709a3551d Mon Sep 17 00:00:00 2001
+From: Johannes Weiner <hannes@cmpxchg.org>
+Date: Wed, 23 Aug 2023 15:54:30 -0700
+Subject: memcontrol: ensure memcg acquired by id is properly set up
+
+From: Johannes Weiner <hannes@cmpxchg.org>
+
+commit 6f0df8e16eb543167f2929cb756e695709a3551d upstream.
+
+In the eviction recency check, we attempt to retrieve the memcg to which
+the folio belonged when it was evicted, by the memcg id stored in the
+shadow entry. However, there is a chance that the retrieved memcg is not
+the original memcg that has been killed, but a new one which happens to
+have the same id.
+
+This is a somewhat unfortunate, but acceptable and rare inaccuracy in the
+heuristics. However, if we retrieve this new memcg between its allocation
+and when it is properly attached to the memcg hierarchy, we could run into
+the following NULL pointer exception during the memcg hierarchy traversal
+done in mem_cgroup_get_nr_swap_pages():
+
+[ 155757.793456] BUG: kernel NULL pointer dereference, address: 00000000000000c0
+[ 155757.807568] #PF: supervisor read access in kernel mode
+[ 155757.818024] #PF: error_code(0x0000) - not-present page
+[ 155757.828482] PGD 401f77067 P4D 401f77067 PUD 401f76067 PMD 0
+[ 155757.839985] Oops: 0000 [#1] SMP
+[ 155757.887870] RIP: 0010:mem_cgroup_get_nr_swap_pages+0x3d/0xb0
+[ 155757.899377] Code: 29 19 4a 02 48 39 f9 74 63 48 8b 97 c0 00 00 00 48 8b b7 58 02 00 00 48 2b b7 c0 01 00 00 48 39 f0 48 0f 4d c6 48 39 d1 74 42 <48> 8b b2 c0 00 00 00 48 8b ba 58 02 00 00 48 2b ba c0 01 00 00 48
+[ 155757.937125] RSP: 0018:ffffc9002ecdfbc8 EFLAGS: 00010286
+[ 155757.947755] RAX: 00000000003a3b1c RBX: 000007ffffffffff RCX: ffff888280183000
+[ 155757.962202] RDX: 0000000000000000 RSI: 0007ffffffffffff RDI: ffff888bbc2d1000
+[ 155757.976648] RBP: 0000000000000001 R08: 000000000000000b R09: ffff888ad9cedba0
+[ 155757.991094] R10: ffffea0039c07900 R11: 0000000000000010 R12: ffff888b23a7b000
+[ 155758.005540] R13: 0000000000000000 R14: ffff888bbc2d1000 R15: 000007ffffc71354
+[ 155758.019991] FS: 00007f6234c68640(0000) GS:ffff88903f9c0000(0000) knlGS:0000000000000000
+[ 155758.036356] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 155758.048023] CR2: 00000000000000c0 CR3: 0000000a83eb8004 CR4: 00000000007706e0
+[ 155758.062473] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 155758.076924] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 155758.091376] PKRU: 55555554
+[ 155758.096957] Call Trace:
+[ 155758.102016] <TASK>
+[ 155758.106502] ? __die+0x78/0xc0
+[ 155758.112793] ? page_fault_oops+0x286/0x380
+[ 155758.121175] ? exc_page_fault+0x5d/0x110
+[ 155758.129209] ? asm_exc_page_fault+0x22/0x30
+[ 155758.137763] ? mem_cgroup_get_nr_swap_pages+0x3d/0xb0
+[ 155758.148060] workingset_test_recent+0xda/0x1b0
+[ 155758.157133] workingset_refault+0xca/0x1e0
+[ 155758.165508] filemap_add_folio+0x4d/0x70
+[ 155758.173538] page_cache_ra_unbounded+0xed/0x190
+[ 155758.182919] page_cache_sync_ra+0xd6/0x1e0
+[ 155758.191738] filemap_read+0x68d/0xdf0
+[ 155758.199495] ? mlx5e_napi_poll+0x123/0x940
+[ 155758.207981] ? __napi_schedule+0x55/0x90
+[ 155758.216095] __x64_sys_pread64+0x1d6/0x2c0
+[ 155758.224601] do_syscall_64+0x3d/0x80
+[ 155758.232058] entry_SYSCALL_64_after_hwframe+0x46/0xb0
+[ 155758.242473] RIP: 0033:0x7f62c29153b5
+[ 155758.249938] Code: e8 48 89 75 f0 89 7d f8 48 89 4d e0 e8 b4 e6 f7 ff 41 89 c0 4c 8b 55 e0 48 8b 55 e8 48 8b 75 f0 8b 7d f8 b8 11 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 45 f8 e8 e7 e6 f7 ff 48 8b
+[ 155758.288005] RSP: 002b:00007f6234c5ffd0 EFLAGS: 00000293 ORIG_RAX: 0000000000000011
+[ 155758.303474] RAX: ffffffffffffffda RBX: 00007f628c4e70c0 RCX: 00007f62c29153b5
+[ 155758.318075] RDX: 000000000003c041 RSI: 00007f61d2986000 RDI: 0000000000000076
+[ 155758.332678] RBP: 00007f6234c5fff0 R08: 0000000000000000 R09: 0000000064d5230c
+[ 155758.347452] R10: 000000000027d450 R11: 0000000000000293 R12: 000000000003c041
+[ 155758.362044] R13: 00007f61d2986000 R14: 00007f629e11b060 R15: 000000000027d450
+[ 155758.376661] </TASK>
+
+This patch fixes the issue by moving the memcg's id publication from the
+alloc stage to online stage, ensuring that any memcg acquired via id must
+be connected to the memcg tree.
+
+Link: https://lkml.kernel.org/r/20230823225430.166925-1-nphamcs@gmail.com
+Fixes: f78dfc7b77d5 ("workingset: fix confusion around eviction vs refault container")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Co-developed-by: Nhat Pham <nphamcs@gmail.com>
+Signed-off-by: Nhat Pham <nphamcs@gmail.com>
+Acked-by: Shakeel Butt <shakeelb@google.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c | 22 +++++++++++++++++-----
+ 1 file changed, 17 insertions(+), 5 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -5329,7 +5329,6 @@ static struct mem_cgroup *mem_cgroup_all
+ INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
+ memcg->deferred_split_queue.split_queue_len = 0;
+ #endif
+- idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+ lru_gen_init_memcg(memcg);
+ return memcg;
+ fail:
+@@ -5401,14 +5400,27 @@ static int mem_cgroup_css_online(struct
+ if (alloc_shrinker_info(memcg))
+ goto offline_kmem;
+
+- /* Online state pins memcg ID, memcg ID pins CSS */
+- refcount_set(&memcg->id.ref, 1);
+- css_get(css);
+-
+ if (unlikely(mem_cgroup_is_root(memcg)))
+ queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
+ FLUSH_TIME);
+ lru_gen_online_memcg(memcg);
++
++ /* Online state pins memcg ID, memcg ID pins CSS */
++ refcount_set(&memcg->id.ref, 1);
++ css_get(css);
++
++ /*
++ * Ensure mem_cgroup_from_id() works once we're fully online.
++ *
++ * We could do this earlier and require callers to filter with
++ * css_tryget_online(). But right now there are no users that
++ * need earlier access, and the workingset code relies on the
++ * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So
++ * publish it here at the end of onlining. This matches the
++ * regular ID destruction during offlining.
++ */
++ idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
++
+ return 0;
+ offline_kmem:
+ memcg_offline_kmem(memcg);
--- /dev/null
+From a2cb9cd6a3949a3804ad9fd7da234892ce6719ec Mon Sep 17 00:00:00 2001
+From: Ekansh Gupta <quic_ekangupt@quicinc.com>
+Date: Fri, 11 Aug 2023 12:56:42 +0100
+Subject: misc: fastrpc: Fix incorrect DMA mapping unmap request
+
+From: Ekansh Gupta <quic_ekangupt@quicinc.com>
+
+commit a2cb9cd6a3949a3804ad9fd7da234892ce6719ec upstream.
+
+Scatterlist table is obtained during map create request and the same
+table is used for DMA mapping unmap. In case there is any failure
+while getting the sg_table, ERR_PTR is returned instead of sg_table.
+
+When the map is getting freed, there is only a non-NULL check of
+sg_table which will also be true in case failure was returned instead
+of sg_table. This would result in improper unmap request. Add proper
+check before setting map table to avoid bad unmap request.
+
+Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Ekansh Gupta <quic_ekangupt@quicinc.com>
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230811115643.38578-3-srinivas.kandagatla@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/misc/fastrpc.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/misc/fastrpc.c
++++ b/drivers/misc/fastrpc.c
+@@ -756,6 +756,7 @@ static int fastrpc_map_create(struct fas
+ {
+ struct fastrpc_session_ctx *sess = fl->sctx;
+ struct fastrpc_map *map = NULL;
++ struct sg_table *table;
+ int err = 0;
+
+ if (!fastrpc_map_lookup(fl, fd, ppmap, true))
+@@ -783,11 +784,12 @@ static int fastrpc_map_create(struct fas
+ goto attach_err;
+ }
+
+- map->table = dma_buf_map_attachment_unlocked(map->attach, DMA_BIDIRECTIONAL);
+- if (IS_ERR(map->table)) {
+- err = PTR_ERR(map->table);
++ table = dma_buf_map_attachment_unlocked(map->attach, DMA_BIDIRECTIONAL);
++ if (IS_ERR(table)) {
++ err = PTR_ERR(table);
+ goto map_err;
+ }
++ map->table = table;
+
+ if (attr & FASTRPC_ATTR_SECUREMAP) {
+ map->phys = sg_phys(map->table->sgl);
--- /dev/null
+From ada6c2d99aedd1eac2f633d03c652e070bc2ea74 Mon Sep 17 00:00:00 2001
+From: Ekansh Gupta <quic_ekangupt@quicinc.com>
+Date: Fri, 11 Aug 2023 12:56:41 +0100
+Subject: misc: fastrpc: Fix remote heap allocation request
+
+From: Ekansh Gupta <quic_ekangupt@quicinc.com>
+
+commit ada6c2d99aedd1eac2f633d03c652e070bc2ea74 upstream.
+
+Remote heap is used by DSP audioPD on need basis. This memory is
+allocated from reserved CMA memory region and is then shared with
+audioPD to use it for it's functionality.
+
+Current implementation of remote heap is not allocating the memory
+from CMA region, instead it is allocating the memory from SMMU
+context bank. The arguments passed to scm call for the reassignment
+of ownership is also not correct. Added changes to allocate CMA
+memory and have a proper ownership reassignment.
+
+Fixes: 532ad70c6d44 ("misc: fastrpc: Add mmap request assigning for static PD pool")
+Cc: stable <stable@kernel.org>
+Tested-by: Ekansh Gupta <quic_ekangupt@quicinc.com>
+Signed-off-by: Ekansh Gupta <quic_ekangupt@quicinc.com>
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230811115643.38578-2-srinivas.kandagatla@linaro.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/misc/fastrpc.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/misc/fastrpc.c
++++ b/drivers/misc/fastrpc.c
+@@ -1871,7 +1871,11 @@ static int fastrpc_req_mmap(struct fastr
+ return -EINVAL;
+ }
+
+- err = fastrpc_buf_alloc(fl, fl->sctx->dev, req.size, &buf);
++ if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR)
++ err = fastrpc_remote_heap_alloc(fl, dev, req.size, &buf);
++ else
++ err = fastrpc_buf_alloc(fl, dev, req.size, &buf);
++
+ if (err) {
+ dev_err(dev, "failed to allocate buffer\n");
+ return err;
+@@ -1910,12 +1914,8 @@ static int fastrpc_req_mmap(struct fastr
+
+ /* Add memory to static PD pool, protection thru hypervisor */
+ if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) {
+- struct qcom_scm_vmperm perm;
+-
+- perm.vmid = QCOM_SCM_VMID_HLOS;
+- perm.perm = QCOM_SCM_PERM_RWX;
+- err = qcom_scm_assign_mem(buf->phys, buf->size,
+- &fl->cctx->perms, &perm, 1);
++ err = qcom_scm_assign_mem(buf->phys, (u64)buf->size,
++ &fl->cctx->perms, fl->cctx->vmperms, fl->cctx->vmcount);
+ if (err) {
+ dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
+ buf->phys, buf->size, err);
linux-export-fix-reference-to-exported-functions-for-parisc64.patch
watchdog-advantech_ec_wdt-fix-kconfig-dependencies.patch
drm-amd-display-temporary-disable-mst-dp-colorspace-property.patch
+arc-atomics-add-compiler-barrier-to-atomic-operations.patch
+clocksource-drivers-arm_arch_timer-disable-timer-before-programming-cval.patch
+dmaengine-sh-rz-dmac-fix-destination-and-source-data-size-setting.patch
+misc-fastrpc-fix-remote-heap-allocation-request.patch
+misc-fastrpc-fix-incorrect-dma-mapping-unmap-request.patch
+jbd2-fix-checkpoint-cleanup-performance-regression.patch
+jbd2-check-jh-b_transaction-before-removing-it-from-checkpoint.patch
+jbd2-correct-the-end-of-the-journal-recovery-scan-range.patch
+ext4-fix-slab-use-after-free-in-ext4_es_insert_extent.patch
+ext4-add-correct-group-descriptors-and-reserved-gdt-blocks-to-system-zone.patch
+ext4-fix-memory-leaks-in-ext4_fname_-setup_filename-prepare_lookup.patch
+ext4-drop-dio-overwrite-only-flag-and-associated-warning.patch
+ext4-fix-bug-in-ext4_mb_new_inode_pa-due-to-overflow.patch
+f2fs-get-out-of-a-repeat-loop-when-getting-a-locked-data-page.patch
+f2fs-flush-inode-if-atomic-file-is-aborted.patch
+f2fs-avoid-false-alarm-of-circular-locking.patch
+lib-test_scanf-add-explicit-type-cast-to-result-initialization-in-test_number_prefix.patch
+hwspinlock-qcom-add-missing-regmap-config-for-sfpb-mmio-implementation.patch
+memcontrol-ensure-memcg-acquired-by-id-is-properly-set-up.patch
+ata-ahci-add-elkhart-lake-ahci-controller.patch
+ata-pata_falcon-fix-io-base-selection-for-q40.patch
+ata-sata_gemini-add-missing-module_description.patch
+ata-pata_ftide010-add-missing-module_description.patch
+fuse-nlookup-missing-decrement-in-fuse_direntplus_link.patch
+btrfs-zoned-do-not-zone-finish-data-relocation-block-group.patch
+btrfs-fix-start-transaction-qgroup-rsv-double-free.patch
+btrfs-free-qgroup-rsv-on-io-failure.patch
+btrfs-don-t-start-transaction-when-joining-with-trans_join_nostart.patch
+btrfs-set-page-extent-mapped-after-read_folio-in-relocate_one_page.patch
+btrfs-zoned-re-enable-metadata-over-commit-for-zoned-mode.patch