--- /dev/null
+From stable+bounces-216826-greg=kroah.com@vger.kernel.org Tue Feb 17 14:35:05 2026
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 17 Feb 2026 13:34:07 +0000
+Subject: arm64: mm: Batch dsb and isb when populating pgtables
+To: stable@vger.kernel.org
+Cc: Ryan Roberts <ryan.roberts@arm.com>, catalin.marinas@arm.com, will@kernel.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Jack Aboutboul <jaboutboul@microsoft.com>, Sharath George John <sgeorgejohn@microsoft.com>, Noah Meyerhans <nmeyerhans@microsoft.com>, Jim Perrin <Jim.Perrin@microsoft.com>, Itaru Kitayama <itaru.kitayama@fujitsu.com>, Eric Chanudet <echanude@redhat.com>, Mark Rutland <mark.rutland@arm.com>, Ard Biesheuvel <ardb@kernel.org>
+Message-ID: <20260217133411.2881311-3-ryan.roberts@arm.com>
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+[ Upstream commit 1fcb7cea8a5f7747e02230f816c2c80b060d9517 ]
+
+After removing uneccessary TLBIs, the next bottleneck when creating the
+page tables for the linear map is DSB and ISB, which were previously
+issued per-pte in __set_pte(). Since we are writing multiple ptes in a
+given pte table, we can elide these barriers and insert them once we
+have finished writing to the table.
+
+Execution time of map_mem(), which creates the kernel linear map page
+tables, was measured on different machines with different RAM configs:
+
+ | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra
+ | VM, 16G | VM, 64G | VM, 256G | Metal, 512G
+---------------|-------------|-------------|-------------|-------------
+ | ms (%) | ms (%) | ms (%) | ms (%)
+---------------|-------------|-------------|-------------|-------------
+before | 78 (0%) | 435 (0%) | 1723 (0%) | 3779 (0%)
+after | 11 (-86%) | 161 (-63%) | 656 (-62%) | 1654 (-56%)
+
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com>
+Tested-by: Eric Chanudet <echanude@redhat.com>
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240412131908.433043-3-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Ryan: Trivial backport ]
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/pgtable.h | 7 ++++++-
+ arch/arm64/mm/mmu.c | 11 ++++++++++-
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -262,9 +262,14 @@ static inline pte_t pte_mkdevmap(pte_t p
+ return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
+ }
+
+-static inline void set_pte(pte_t *ptep, pte_t pte)
++static inline void set_pte_nosync(pte_t *ptep, pte_t pte)
+ {
+ WRITE_ONCE(*ptep, pte);
++}
++
++static inline void set_pte(pte_t *ptep, pte_t pte)
++{
++ set_pte_nosync(ptep, pte);
+
+ /*
+ * Only if the new pte is valid and kernel, otherwise TLB maintenance
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -175,7 +175,11 @@ static void init_pte(pte_t *ptep, unsign
+ do {
+ pte_t old_pte = READ_ONCE(*ptep);
+
+- set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
++ /*
++ * Required barriers to make this visible to the table walker
++ * are deferred to the end of alloc_init_cont_pte().
++ */
++ set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot));
+
+ /*
+ * After the PTE entry has been populated once, we
+@@ -229,6 +233,11 @@ static void alloc_init_cont_pte(pmd_t *p
+ phys += next - addr;
+ } while (addr = next, addr != end);
+
++ /*
++ * Note: barriers and maintenance necessary to clear the fixmap slot
++ * ensure that all previous pgtable writes are visible to the table
++ * walker.
++ */
+ pte_clear_fixmap();
+ }
+
--- /dev/null
+From stable+bounces-216827-greg=kroah.com@vger.kernel.org Tue Feb 17 14:34:55 2026
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 17 Feb 2026 13:34:08 +0000
+Subject: arm64: mm: Don't remap pgtables for allocate vs populate
+To: stable@vger.kernel.org
+Cc: Ryan Roberts <ryan.roberts@arm.com>, catalin.marinas@arm.com, will@kernel.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Jack Aboutboul <jaboutboul@microsoft.com>, Sharath George John <sgeorgejohn@microsoft.com>, Noah Meyerhans <nmeyerhans@microsoft.com>, Jim Perrin <Jim.Perrin@microsoft.com>, Mark Rutland <mark.rutland@arm.com>, Itaru Kitayama <itaru.kitayama@fujitsu.com>, Eric Chanudet <echanude@redhat.com>, Ard Biesheuvel <ardb@kernel.org>
+Message-ID: <20260217133411.2881311-4-ryan.roberts@arm.com>
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+[ Upstream commit 0e9df1c905d8293d333ace86c13d147382f5caf9 ]
+
+During linear map pgtable creation, each pgtable is fixmapped /
+fixunmapped twice; once during allocation to zero the memory, and a
+again during population to write the entries. This means each table has
+2 TLB invalidations issued against it. Let's fix this so that each table
+is only fixmapped/fixunmapped once, halving the number of TLBIs, and
+improving performance.
+
+Achieve this by separating allocation and initialization (zeroing) of
+the page. The allocated page is now fixmapped directly by the walker and
+initialized, before being populated and finally fixunmapped.
+
+This approach keeps the change small, but has the side effect that late
+allocations (using __get_free_page()) must also go through the generic
+memory clearing routine. So let's tell __get_free_page() not to zero the
+memory to avoid duplication.
+
+Additionally this approach means that fixmap/fixunmap is still used for
+late pgtable modifications. That's not technically needed since the
+memory is all mapped in the linear map by that point. That's left as a
+possible future optimization if found to be needed.
+
+Execution time of map_mem(), which creates the kernel linear map page
+tables, was measured on different machines with different RAM configs:
+
+ | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra
+ | VM, 16G | VM, 64G | VM, 256G | Metal, 512G
+---------------|-------------|-------------|-------------|-------------
+ | ms (%) | ms (%) | ms (%) | ms (%)
+---------------|-------------|-------------|-------------|-------------
+before | 11 (0%) | 161 (0%) | 656 (0%) | 1654 (0%)
+after | 10 (-11%) | 104 (-35%) | 438 (-33%) | 1223 (-26%)
+
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Suggested-by: Mark Rutland <mark.rutland@arm.com>
+Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com>
+Tested-by: Eric Chanudet <echanude@redhat.com>
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240412131908.433043-4-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Ryan: Trivial backport ]
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/mmu.c | 58 ++++++++++++++++++++++++++--------------------------
+ 1 file changed, 29 insertions(+), 29 deletions(-)
+
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -106,28 +106,12 @@ EXPORT_SYMBOL(phys_mem_access_prot);
+ static phys_addr_t __init early_pgtable_alloc(int shift)
+ {
+ phys_addr_t phys;
+- void *ptr;
+
+ phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0,
+ MEMBLOCK_ALLOC_NOLEAKTRACE);
+ if (!phys)
+ panic("Failed to allocate page table page\n");
+
+- /*
+- * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+- * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+- * any level of table.
+- */
+- ptr = pte_set_fixmap(phys);
+-
+- memset(ptr, 0, PAGE_SIZE);
+-
+- /*
+- * Implicit barriers also ensure the zeroed page is visible to the page
+- * table walker
+- */
+- pte_clear_fixmap();
+-
+ return phys;
+ }
+
+@@ -169,6 +153,14 @@ bool pgattr_change_is_safe(u64 old, u64
+ return ((old ^ new) & ~mask) == 0;
+ }
+
++static void init_clear_pgtable(void *table)
++{
++ clear_page(table);
++
++ /* Ensure the zeroing is observed by page table walks. */
++ dsb(ishst);
++}
++
+ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot)
+ {
+@@ -211,12 +203,15 @@ static void alloc_init_cont_pte(pmd_t *p
+ pmdval |= PMD_TABLE_PXN;
+ BUG_ON(!pgtable_alloc);
+ pte_phys = pgtable_alloc(PAGE_SHIFT);
++ ptep = pte_set_fixmap(pte_phys);
++ init_clear_pgtable(ptep);
++ ptep += pte_index(addr);
+ __pmd_populate(pmdp, pte_phys, pmdval);
+- pmd = READ_ONCE(*pmdp);
++ } else {
++ BUG_ON(pmd_bad(pmd));
++ ptep = pte_set_fixmap_offset(pmdp, addr);
+ }
+- BUG_ON(pmd_bad(pmd));
+
+- ptep = pte_set_fixmap_offset(pmdp, addr);
+ do {
+ pgprot_t __prot = prot;
+
+@@ -295,12 +290,15 @@ static void alloc_init_cont_pmd(pud_t *p
+ pudval |= PUD_TABLE_PXN;
+ BUG_ON(!pgtable_alloc);
+ pmd_phys = pgtable_alloc(PMD_SHIFT);
++ pmdp = pmd_set_fixmap(pmd_phys);
++ init_clear_pgtable(pmdp);
++ pmdp += pmd_index(addr);
+ __pud_populate(pudp, pmd_phys, pudval);
+- pud = READ_ONCE(*pudp);
++ } else {
++ BUG_ON(pud_bad(pud));
++ pmdp = pmd_set_fixmap_offset(pudp, addr);
+ }
+- BUG_ON(pud_bad(pud));
+
+- pmdp = pmd_set_fixmap_offset(pudp, addr);
+ do {
+ pgprot_t __prot = prot;
+
+@@ -338,12 +336,15 @@ static void alloc_init_pud(pgd_t *pgdp,
+ p4dval |= P4D_TABLE_PXN;
+ BUG_ON(!pgtable_alloc);
+ pud_phys = pgtable_alloc(PUD_SHIFT);
++ pudp = pud_set_fixmap(pud_phys);
++ init_clear_pgtable(pudp);
++ pudp += pud_index(addr);
+ __p4d_populate(p4dp, pud_phys, p4dval);
+- p4d = READ_ONCE(*p4dp);
++ } else {
++ BUG_ON(p4d_bad(p4d));
++ pudp = pud_set_fixmap_offset(p4dp, addr);
+ }
+- BUG_ON(p4d_bad(p4d));
+
+- pudp = pud_set_fixmap_offset(p4dp, addr);
+ do {
+ pud_t old_pud = READ_ONCE(*pudp);
+
+@@ -425,11 +426,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdi
+
+ static phys_addr_t __pgd_pgtable_alloc(int shift)
+ {
+- void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
+- BUG_ON(!ptr);
++ /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
++ void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO);
+
+- /* Ensure the zeroed page is visible to the page table walker */
+- dsb(ishst);
++ BUG_ON(!ptr);
+ return __pa(ptr);
+ }
+
--- /dev/null
+From stable+bounces-216825-greg=kroah.com@vger.kernel.org Tue Feb 17 14:34:47 2026
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 17 Feb 2026 13:34:06 +0000
+Subject: arm64: mm: Don't remap pgtables per-cont(pte|pmd) block
+To: stable@vger.kernel.org
+Cc: Ryan Roberts <ryan.roberts@arm.com>, catalin.marinas@arm.com, will@kernel.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Jack Aboutboul <jaboutboul@microsoft.com>, Sharath George John <sgeorgejohn@microsoft.com>, Noah Meyerhans <nmeyerhans@microsoft.com>, Jim Perrin <Jim.Perrin@microsoft.com>, Itaru Kitayama <itaru.kitayama@fujitsu.com>, Eric Chanudet <echanude@redhat.com>, Mark Rutland <mark.rutland@arm.com>, Ard Biesheuvel <ardb@kernel.org>
+Message-ID: <20260217133411.2881311-2-ryan.roberts@arm.com>
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+[ Upstream commit 5c63db59c5f89925add57642be4f789d0d671ccd ]
+
+A large part of the kernel boot time is creating the kernel linear map
+page tables. When rodata=full, all memory is mapped by pte. And when
+there is lots of physical ram, there are lots of pte tables to populate.
+The primary cost associated with this is mapping and unmapping the pte
+table memory in the fixmap; at unmap time, the TLB entry must be
+invalidated and this is expensive.
+
+Previously, each pmd and pte table was fixmapped/fixunmapped for each
+cont(pte|pmd) block of mappings (16 entries with 4K granule). This means
+we ended up issuing 32 TLBIs per (pmd|pte) table during the population
+phase.
+
+Let's fix that, and fixmap/fixunmap each page once per population, for a
+saving of 31 TLBIs per (pmd|pte) table. This gives a significant boot
+speedup.
+
+Execution time of map_mem(), which creates the kernel linear map page
+tables, was measured on different machines with different RAM configs:
+
+ | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra
+ | VM, 16G | VM, 64G | VM, 256G | Metal, 512G
+---------------|-------------|-------------|-------------|-------------
+ | ms (%) | ms (%) | ms (%) | ms (%)
+---------------|-------------|-------------|-------------|-------------
+before | 168 (0%) | 2198 (0%) | 8644 (0%) | 17447 (0%)
+after | 78 (-53%) | 435 (-80%) | 1723 (-80%) | 3779 (-78%)
+
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com>
+Tested-by: Eric Chanudet <echanude@redhat.com>
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240412131908.433043-2-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Ryan: Trivial backport ]
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/mmu.c | 27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -169,12 +169,9 @@ bool pgattr_change_is_safe(u64 old, u64
+ return ((old ^ new) & ~mask) == 0;
+ }
+
+-static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
++static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot)
+ {
+- pte_t *ptep;
+-
+- ptep = pte_set_fixmap_offset(pmdp, addr);
+ do {
+ pte_t old_pte = READ_ONCE(*ptep);
+
+@@ -189,8 +186,6 @@ static void init_pte(pmd_t *pmdp, unsign
+
+ phys += PAGE_SIZE;
+ } while (ptep++, addr += PAGE_SIZE, addr != end);
+-
+- pte_clear_fixmap();
+ }
+
+ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
+@@ -201,6 +196,7 @@ static void alloc_init_cont_pte(pmd_t *p
+ {
+ unsigned long next;
+ pmd_t pmd = READ_ONCE(*pmdp);
++ pte_t *ptep;
+
+ BUG_ON(pmd_sect(pmd));
+ if (pmd_none(pmd)) {
+@@ -216,6 +212,7 @@ static void alloc_init_cont_pte(pmd_t *p
+ }
+ BUG_ON(pmd_bad(pmd));
+
++ ptep = pte_set_fixmap_offset(pmdp, addr);
+ do {
+ pgprot_t __prot = prot;
+
+@@ -226,20 +223,21 @@ static void alloc_init_cont_pte(pmd_t *p
+ (flags & NO_CONT_MAPPINGS) == 0)
+ __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+
+- init_pte(pmdp, addr, next, phys, __prot);
++ init_pte(ptep, addr, next, phys, __prot);
+
++ ptep += pte_index(next) - pte_index(addr);
+ phys += next - addr;
+ } while (addr = next, addr != end);
++
++ pte_clear_fixmap();
+ }
+
+-static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
++static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ phys_addr_t (*pgtable_alloc)(int), int flags)
+ {
+ unsigned long next;
+- pmd_t *pmdp;
+
+- pmdp = pmd_set_fixmap_offset(pudp, addr);
+ do {
+ pmd_t old_pmd = READ_ONCE(*pmdp);
+
+@@ -265,8 +263,6 @@ static void init_pmd(pud_t *pudp, unsign
+ }
+ phys += next - addr;
+ } while (pmdp++, addr = next, addr != end);
+-
+- pmd_clear_fixmap();
+ }
+
+ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
+@@ -276,6 +272,7 @@ static void alloc_init_cont_pmd(pud_t *p
+ {
+ unsigned long next;
+ pud_t pud = READ_ONCE(*pudp);
++ pmd_t *pmdp;
+
+ /*
+ * Check for initial section mappings in the pgd/pud.
+@@ -294,6 +291,7 @@ static void alloc_init_cont_pmd(pud_t *p
+ }
+ BUG_ON(pud_bad(pud));
+
++ pmdp = pmd_set_fixmap_offset(pudp, addr);
+ do {
+ pgprot_t __prot = prot;
+
+@@ -304,10 +302,13 @@ static void alloc_init_cont_pmd(pud_t *p
+ (flags & NO_CONT_MAPPINGS) == 0)
+ __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+
+- init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
++ init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
+
++ pmdp += pmd_index(next) - pmd_index(addr);
+ phys += next - addr;
+ } while (addr = next, addr != end);
++
++ pmd_clear_fixmap();
+ }
+
+ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
--- /dev/null
+From stable+bounces-217704-greg=kroah.com@vger.kernel.org Mon Feb 23 09:07:37 2026
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 23 Feb 2026 18:33:48 +1030
+Subject: btrfs: always fallback to buffered write if the inode requires checksum
+To: linux-btrfs@vger.kernel.org
+Cc: stable@vger.kernel.org, Christoph Hellwig <hch@infradead.org>, Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>
+Message-ID: <5c3a9c8f484ed1ba8fe897e67057eec24968f7bd.1771833812.git.wqu@suse.com>
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 968f19c5b1b7d5595423b0ac0020cc18dfed8cb5 upstream.
+
+[BUG]
+It is a long known bug that VM image on btrfs can lead to data csum
+mismatch, if the qemu is using direct-io for the image (this is commonly
+known as cache mode 'none').
+
+[CAUSE]
+Inside the VM, if the fs is EXT4 or XFS, or even NTFS from Windows, the
+fs is allowed to dirty/modify the folio even if the folio is under
+writeback (as long as the address space doesn't have AS_STABLE_WRITES
+flag inherited from the block device).
+
+This is a valid optimization to improve the concurrency, and since these
+filesystems have no extra checksum on data, the content change is not a
+problem at all.
+
+But the final write into the image file is handled by btrfs, which needs
+the content not to be modified during writeback, or the checksum will
+not match the data (checksum is calculated before submitting the bio).
+
+So EXT4/XFS/NTRFS assume they can modify the folio under writeback, but
+btrfs requires no modification, this leads to the false csum mismatch.
+
+This is only a controlled example, there are even cases where
+multi-thread programs can submit a direct IO write, then another thread
+modifies the direct IO buffer for whatever reason.
+
+For such cases, btrfs has no sane way to detect such cases and leads to
+false data csum mismatch.
+
+[FIX]
+I have considered the following ideas to solve the problem:
+
+- Make direct IO to always skip data checksum
+ This not only requires a new incompatible flag, as it breaks the
+ current per-inode NODATASUM flag.
+ But also requires extra handling for no csum found cases.
+
+ And this also reduces our checksum protection.
+
+- Let hardware handle all the checksum
+ AKA, just nodatasum mount option.
+ That requires trust for hardware (which is not that trustful in a lot
+ of cases), and it's not generic at all.
+
+- Always fallback to buffered write if the inode requires checksum
+ This was suggested by Christoph, and is the solution utilized by this
+ patch.
+
+ The cost is obvious, the extra buffer copying into page cache, thus it
+ reduces the performance.
+ But at least it's still user configurable, if the end user still wants
+ the zero-copy performance, just set NODATASUM flag for the inode
+ (which is a common practice for VM images on btrfs).
+
+ Since we cannot trust user space programs to keep the buffer
+ consistent during direct IO, we have no choice but always falling back
+ to buffered IO. At least by this, we avoid the more deadly false data
+ checksum mismatch error.
+
+Cc: stable@vger.kernel.org # 6.6
+[ Conflicts caused by code extracted into direct-io.c ]
+Suggested-by: Christoph Hellwig <hch@infradead.org>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1514,6 +1514,22 @@ relock:
+ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
+ goto buffered;
+ }
++ /*
++ * We can't control the folios being passed in, applications can write
++ * to them while a direct IO write is in progress. This means the
++ * content might change after we calculated the data checksum.
++ * Therefore we can end up storing a checksum that doesn't match the
++ * persisted data.
++ *
++ * To be extra safe and avoid false data checksum mismatch, if the
++ * inode requires data checksum, just fallback to buffered IO.
++ * For buffered IO we have full control of page cache and can ensure
++ * no one is modifying the content during writeback.
++ */
++ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
++ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
++ goto buffered;
++ }
+
+ /*
+ * The iov_iter can be mapped to the same file range we are writing to.
--- /dev/null
+From stable+bounces-217853-greg=kroah.com@vger.kernel.org Tue Feb 24 04:37:02 2026
+From: Bin Lan <lanbincn@139.com>
+Date: Tue, 24 Feb 2026 03:32:14 +0000
+Subject: btrfs: fix NULL dereference on root when tracing inode eviction
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: "Miquel Sabaté Solà " <mssola@mssola.com>, syzbot+d991fea1b4b23b1f6bf8@syzkaller.appspotmail.com, "David Sterba" <dsterba@suse.com>, "Bin Lan" <lanbincn@139.com>
+Message-ID: <20260224033214.4976-1-lanbincn@139.com>
+
+From: Miquel Sabaté Solà <mssola@mssola.com>
+
+[ Upstream commit f157dd661339fc6f5f2b574fe2429c43bd309534 ]
+
+When evicting an inode the first thing we do is to setup tracing for it,
+which implies fetching the root's id. But in btrfs_evict_inode() the
+root might be NULL, as implied in the next check that we do in
+btrfs_evict_inode().
+
+Hence, we either should set the ->root_objectid to 0 in case the root is
+NULL, or we move tracing setup after checking that the root is not
+NULL. Setting the rootid to 0 at least gives us the possibility to trace
+this call even in the case when the root is NULL, so that's the solution
+taken here.
+
+Fixes: 1abe9b8a138c ("Btrfs: add initial tracepoint support for btrfs")
+Reported-by: syzbot+d991fea1b4b23b1f6bf8@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=d991fea1b4b23b1f6bf8
+Signed-off-by: Miquel Sabaté Solà <mssola@mssola.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[ Adjust context ]
+Signed-off-by: Bin Lan <lanbincn@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/btrfs.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/trace/events/btrfs.h
++++ b/include/trace/events/btrfs.h
+@@ -225,8 +225,8 @@ DECLARE_EVENT_CLASS(btrfs__inode,
+ __entry->generation = BTRFS_I(inode)->generation;
+ __entry->last_trans = BTRFS_I(inode)->last_trans;
+ __entry->logged_trans = BTRFS_I(inode)->logged_trans;
+- __entry->root_objectid =
+- BTRFS_I(inode)->root->root_key.objectid;
++ __entry->root_objectid = BTRFS_I(inode)->root ?
++ btrfs_root_id(BTRFS_I(inode)->root) : 0;
+ ),
+
+ TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu "
--- /dev/null
+From stable+bounces-219751-greg=kroah.com@vger.kernel.org Thu Feb 26 06:05:28 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Thu, 26 Feb 2026 13:04:18 +0800
+Subject: dm-verity: disable recursive forward error correction
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Mikulas Patocka <mpatocka@redhat.com>, Guangwu Zhang <guazhang@redhat.com>, Sami Tolvanen <samitolvanen@google.com>, Eric Biggers <ebiggers@kernel.org>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260226050418.159241-1-black.hawk@163.com>
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit d9f3e47d3fae0c101d9094bc956ed24e7a0ee801 ]
+
+There are two problems with the recursive correction:
+
+1. It may cause denial-of-service. In fec_read_bufs, there is a loop that
+has 253 iterations. For each iteration, we may call verity_hash_for_block
+recursively. There is a limit of 4 nested recursions - that means that
+there may be at most 253^4 (4 billion) iterations. Red Hat QE team
+actually created an image that pushes dm-verity to this limit - and this
+image just makes the udev-worker process get stuck in the 'D' state.
+
+2. It doesn't work. In fec_read_bufs we store data into the variable
+"fio->bufs", but fio bufs is shared between recursive invocations, if
+"verity_hash_for_block" invoked correction recursively, it would
+overwrite partially filled fio->bufs.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reported-by: Guangwu Zhang <guazhang@redhat.com>
+Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
+Reviewed-by: Eric Biggers <ebiggers@kernel.org>
+[ The context change is due to the commit bdf253d580d7
+("dm-verity: remove support for asynchronous hashes")
+in v6.18 and the commit 9356fcfe0ac4
+("dm verity: set DM_TARGET_SINGLETON feature flag") in v6.9
+which are irrelevant to the logic of this patch. ]
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-verity-fec.c | 4 +---
+ drivers/md/dm-verity-fec.h | 3 ---
+ 2 files changed, 1 insertion(+), 6 deletions(-)
+
+--- a/drivers/md/dm-verity-fec.c
++++ b/drivers/md/dm-verity-fec.c
+@@ -439,10 +439,8 @@ int verity_fec_decode(struct dm_verity *
+ if (!verity_fec_is_enabled(v))
+ return -EOPNOTSUPP;
+
+- if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
+- DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
++ if (fio->level)
+ return -EIO;
+- }
+
+ fio->level++;
+
+--- a/drivers/md/dm-verity-fec.h
++++ b/drivers/md/dm-verity-fec.h
+@@ -23,9 +23,6 @@
+ #define DM_VERITY_FEC_BUF_MAX \
+ (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
+
+-/* maximum recursion level for verity_fec_decode */
+-#define DM_VERITY_FEC_MAX_RECURSION 4
+-
+ #define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
+ #define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
+ #define DM_VERITY_OPT_FEC_START "fec_start"
--- /dev/null
+From stable+bounces-217861-greg=kroah.com@vger.kernel.org Tue Feb 24 06:50:52 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Tue, 24 Feb 2026 13:49:43 +0800
+Subject: dst: fix races in rt6_uncached_list_del() and rt_del_uncached_list()
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Eric Dumazet <edumazet@google.com>, syzbot+179fc225724092b8b2b2@syzkaller.appspotmail.com, Martin KaFai Lau <martin.lau@kernel.org>, David Ahern <dsahern@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260224054943.3324184-1-black.hawk@163.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9a6f0c4d5796ab89b5a28a890ce542344d58bd69 ]
+
+syzbot was able to crash the kernel in rt6_uncached_list_flush_dev()
+in an interesting way [1]
+
+Crash happens in list_del_init()/INIT_LIST_HEAD() while writing
+list->prev, while the prior write on list->next went well.
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ WRITE_ONCE(list->next, list); // This went well
+ WRITE_ONCE(list->prev, list); // Crash, @list has been freed.
+}
+
+Issue here is that rt6_uncached_list_del() did not attempt to lock
+ul->lock, as list_empty(&rt->dst.rt_uncached) returned
+true because the WRITE_ONCE(list->next, list) happened on the other CPU.
+
+We might use list_del_init_careful() and list_empty_careful(),
+or make sure rt6_uncached_list_del() always grabs the spinlock
+whenever rt->dst.rt_uncached_list has been set.
+
+A similar fix is neeed for IPv4.
+
+[1]
+
+ BUG: KASAN: slab-use-after-free in INIT_LIST_HEAD include/linux/list.h:46 [inline]
+ BUG: KASAN: slab-use-after-free in list_del_init include/linux/list.h:296 [inline]
+ BUG: KASAN: slab-use-after-free in rt6_uncached_list_flush_dev net/ipv6/route.c:191 [inline]
+ BUG: KASAN: slab-use-after-free in rt6_disable_ip+0x633/0x730 net/ipv6/route.c:5020
+Write of size 8 at addr ffff8880294cfa78 by task kworker/u8:14/3450
+
+CPU: 0 UID: 0 PID: 3450 Comm: kworker/u8:14 Tainted: G L syzkaller #0 PREEMPT_{RT,(full)}
+Tainted: [L]=SOFTLOCKUP
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025
+Workqueue: netns cleanup_net
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120
+ print_address_description mm/kasan/report.c:378 [inline]
+ print_report+0xca/0x240 mm/kasan/report.c:482
+ kasan_report+0x118/0x150 mm/kasan/report.c:595
+ INIT_LIST_HEAD include/linux/list.h:46 [inline]
+ list_del_init include/linux/list.h:296 [inline]
+ rt6_uncached_list_flush_dev net/ipv6/route.c:191 [inline]
+ rt6_disable_ip+0x633/0x730 net/ipv6/route.c:5020
+ addrconf_ifdown+0x143/0x18a0 net/ipv6/addrconf.c:3853
+ addrconf_notify+0x1bc/0x1050 net/ipv6/addrconf.c:-1
+ notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85
+ call_netdevice_notifiers_extack net/core/dev.c:2268 [inline]
+ call_netdevice_notifiers net/core/dev.c:2282 [inline]
+ netif_close_many+0x29c/0x410 net/core/dev.c:1785
+ unregister_netdevice_many_notify+0xb50/0x2330 net/core/dev.c:12353
+ ops_exit_rtnl_list net/core/net_namespace.c:187 [inline]
+ ops_undo_list+0x3dc/0x990 net/core/net_namespace.c:248
+ cleanup_net+0x4de/0x7b0 net/core/net_namespace.c:696
+ process_one_work kernel/workqueue.c:3257 [inline]
+ process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+ worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+ kthread+0x711/0x8a0 kernel/kthread.c:463
+ ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+ </TASK>
+
+Allocated by task 803:
+ kasan_save_stack mm/kasan/common.c:57 [inline]
+ kasan_save_track+0x3e/0x80 mm/kasan/common.c:78
+ unpoison_slab_object mm/kasan/common.c:340 [inline]
+ __kasan_slab_alloc+0x6c/0x80 mm/kasan/common.c:366
+ kasan_slab_alloc include/linux/kasan.h:253 [inline]
+ slab_post_alloc_hook mm/slub.c:4953 [inline]
+ slab_alloc_node mm/slub.c:5263 [inline]
+ kmem_cache_alloc_noprof+0x18d/0x6c0 mm/slub.c:5270
+ dst_alloc+0x105/0x170 net/core/dst.c:89
+ ip6_dst_alloc net/ipv6/route.c:342 [inline]
+ icmp6_dst_alloc+0x75/0x460 net/ipv6/route.c:3333
+ mld_sendpack+0x683/0xe60 net/ipv6/mcast.c:1844
+ mld_send_cr net/ipv6/mcast.c:2154 [inline]
+ mld_ifc_work+0x83e/0xd60 net/ipv6/mcast.c:2693
+ process_one_work kernel/workqueue.c:3257 [inline]
+ process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+ worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+ kthread+0x711/0x8a0 kernel/kthread.c:463
+ ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+
+Freed by task 20:
+ kasan_save_stack mm/kasan/common.c:57 [inline]
+ kasan_save_track+0x3e/0x80 mm/kasan/common.c:78
+ kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:584
+ poison_slab_object mm/kasan/common.c:253 [inline]
+ __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:285
+ kasan_slab_free include/linux/kasan.h:235 [inline]
+ slab_free_hook mm/slub.c:2540 [inline]
+ slab_free mm/slub.c:6670 [inline]
+ kmem_cache_free+0x18f/0x8d0 mm/slub.c:6781
+ dst_destroy+0x235/0x350 net/core/dst.c:121
+ rcu_do_batch kernel/rcu/tree.c:2605 [inline]
+ rcu_core kernel/rcu/tree.c:2857 [inline]
+ rcu_cpu_kthread+0xba5/0x1af0 kernel/rcu/tree.c:2945
+ smpboot_thread_fn+0x542/0xa60 kernel/smpboot.c:160
+ kthread+0x711/0x8a0 kernel/kthread.c:463
+ ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+
+Last potentially related work creation:
+ kasan_save_stack+0x3e/0x60 mm/kasan/common.c:57
+ kasan_record_aux_stack+0xbd/0xd0 mm/kasan/generic.c:556
+ __call_rcu_common kernel/rcu/tree.c:3119 [inline]
+ call_rcu+0xee/0x890 kernel/rcu/tree.c:3239
+ refdst_drop include/net/dst.h:266 [inline]
+ skb_dst_drop include/net/dst.h:278 [inline]
+ skb_release_head_state+0x71/0x360 net/core/skbuff.c:1156
+ skb_release_all net/core/skbuff.c:1180 [inline]
+ __kfree_skb net/core/skbuff.c:1196 [inline]
+ sk_skb_reason_drop+0xe9/0x170 net/core/skbuff.c:1234
+ kfree_skb_reason include/linux/skbuff.h:1322 [inline]
+ tcf_kfree_skb_list include/net/sch_generic.h:1127 [inline]
+ __dev_xmit_skb net/core/dev.c:4260 [inline]
+ __dev_queue_xmit+0x26aa/0x3210 net/core/dev.c:4785
+ NF_HOOK_COND include/linux/netfilter.h:307 [inline]
+ ip6_output+0x340/0x550 net/ipv6/ip6_output.c:247
+ NF_HOOK+0x9e/0x380 include/linux/netfilter.h:318
+ mld_sendpack+0x8d4/0xe60 net/ipv6/mcast.c:1855
+ mld_send_cr net/ipv6/mcast.c:2154 [inline]
+ mld_ifc_work+0x83e/0xd60 net/ipv6/mcast.c:2693
+ process_one_work kernel/workqueue.c:3257 [inline]
+ process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+ worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+ kthread+0x711/0x8a0 kernel/kthread.c:463
+ ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+
+The buggy address belongs to the object at ffff8880294cfa00
+ which belongs to the cache ip6_dst_cache of size 232
+The buggy address is located 120 bytes inside of
+ freed 232-byte region [ffff8880294cfa00, ffff8880294cfae8)
+
+The buggy address belongs to the physical page:
+page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x294cf
+memcg:ffff88803536b781
+flags: 0x80000000000000(node=0|zone=1)
+page_type: f5(slab)
+raw: 0080000000000000 ffff88802ff1c8c0 ffffea0000bf2bc0 dead000000000006
+raw: 0000000000000000 00000000800c000c 00000000f5000000 ffff88803536b781
+page dumped because: kasan: bad access detected
+page_owner tracks the page as allocated
+page last allocated via order 0, migratetype Unmovable, gfp_mask 0x52820(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP), pid 9, tgid 9 (kworker/0:0), ts 91119585830, free_ts 91088628818
+ set_page_owner include/linux/page_owner.h:32 [inline]
+ post_alloc_hook+0x234/0x290 mm/page_alloc.c:1857
+ prep_new_page mm/page_alloc.c:1865 [inline]
+ get_page_from_freelist+0x28c0/0x2960 mm/page_alloc.c:3915
+ __alloc_frozen_pages_noprof+0x181/0x370 mm/page_alloc.c:5210
+ alloc_pages_mpol+0xd1/0x380 mm/mempolicy.c:2486
+ alloc_slab_page mm/slub.c:3075 [inline]
+ allocate_slab+0x86/0x3b0 mm/slub.c:3248
+ new_slab mm/slub.c:3302 [inline]
+ ___slab_alloc+0xb10/0x13e0 mm/slub.c:4656
+ __slab_alloc+0xc6/0x1f0 mm/slub.c:4779
+ __slab_alloc_node mm/slub.c:4855 [inline]
+ slab_alloc_node mm/slub.c:5251 [inline]
+ kmem_cache_alloc_noprof+0x101/0x6c0 mm/slub.c:5270
+ dst_alloc+0x105/0x170 net/core/dst.c:89
+ ip6_dst_alloc net/ipv6/route.c:342 [inline]
+ icmp6_dst_alloc+0x75/0x460 net/ipv6/route.c:3333
+ mld_sendpack+0x683/0xe60 net/ipv6/mcast.c:1844
+ mld_send_cr net/ipv6/mcast.c:2154 [inline]
+ mld_ifc_work+0x83e/0xd60 net/ipv6/mcast.c:2693
+ process_one_work kernel/workqueue.c:3257 [inline]
+ process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+ worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+ kthread+0x711/0x8a0 kernel/kthread.c:463
+ ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+page last free pid 5859 tgid 5859 stack trace:
+ reset_page_owner include/linux/page_owner.h:25 [inline]
+ free_pages_prepare mm/page_alloc.c:1406 [inline]
+ __free_frozen_pages+0xfe1/0x1170 mm/page_alloc.c:2943
+ discard_slab mm/slub.c:3346 [inline]
+ __put_partials+0x149/0x170 mm/slub.c:3886
+ __slab_free+0x2af/0x330 mm/slub.c:5952
+ qlink_free mm/kasan/quarantine.c:163 [inline]
+ qlist_free_all+0x97/0x100 mm/kasan/quarantine.c:179
+ kasan_quarantine_reduce+0x148/0x160 mm/kasan/quarantine.c:286
+ __kasan_slab_alloc+0x22/0x80 mm/kasan/common.c:350
+ kasan_slab_alloc include/linux/kasan.h:253 [inline]
+ slab_post_alloc_hook mm/slub.c:4953 [inline]
+ slab_alloc_node mm/slub.c:5263 [inline]
+ kmem_cache_alloc_noprof+0x18d/0x6c0 mm/slub.c:5270
+ getname_flags+0xb8/0x540 fs/namei.c:146
+ getname include/linux/fs.h:2498 [inline]
+ do_sys_openat2+0xbc/0x200 fs/open.c:1426
+ do_sys_open fs/open.c:1436 [inline]
+ __do_sys_openat fs/open.c:1452 [inline]
+ __se_sys_openat fs/open.c:1447 [inline]
+ __x64_sys_openat+0x138/0x170 fs/open.c:1447
+ do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+ do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94
+
+Fixes: 8d0b94afdca8 ("ipv6: Keep track of DST_NOCACHE routes in case of iface down/unregister")
+Fixes: 78df76a065ae ("ipv4: take rt_uncached_lock only if needed")
+Reported-by: syzbot+179fc225724092b8b2b2@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/6964cdf2.050a0220.eaf7.009d.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Martin KaFai Lau <martin.lau@kernel.org>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20260112103825.3810713-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dst.c | 1 +
+ net/ipv4/route.c | 4 ++--
+ net/ipv6/route.c | 4 ++--
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -68,6 +68,7 @@ void dst_init(struct dst_entry *dst, str
+ dst->lwtstate = NULL;
+ rcuref_init(&dst->__rcuref, initial_ref);
+ INIT_LIST_HEAD(&dst->rt_uncached);
++ dst->rt_uncached_list = NULL;
+ dst->__use = 0;
+ dst->lastuse = jiffies;
+ dst->flags = flags;
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1546,9 +1546,9 @@ void rt_add_uncached_list(struct rtable
+
+ void rt_del_uncached_list(struct rtable *rt)
+ {
+- if (!list_empty(&rt->dst.rt_uncached)) {
+- struct uncached_list *ul = rt->dst.rt_uncached_list;
++ struct uncached_list *ul = rt->dst.rt_uncached_list;
+
++ if (ul) {
+ spin_lock_bh(&ul->lock);
+ list_del_init(&rt->dst.rt_uncached);
+ spin_unlock_bh(&ul->lock);
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -149,9 +149,9 @@ void rt6_uncached_list_add(struct rt6_in
+
+ void rt6_uncached_list_del(struct rt6_info *rt)
+ {
+- if (!list_empty(&rt->dst.rt_uncached)) {
+- struct uncached_list *ul = rt->dst.rt_uncached_list;
++ struct uncached_list *ul = rt->dst.rt_uncached_list;
+
++ if (ul) {
+ spin_lock_bh(&ul->lock);
+ list_del_init(&rt->dst.rt_uncached);
+ spin_unlock_bh(&ul->lock);
--- /dev/null
+From stable+bounces-219193-greg=kroah.com@vger.kernel.org Wed Feb 25 07:01:18 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Wed, 25 Feb 2026 14:00:20 +0800
+Subject: eth: bnxt: always recalculate features after XDP clearing, fix null-deref
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Jakub Kicinski <kuba@kernel.org>, Michael Chan <michael.chan@broadcom.com>, Somnath Kotur <somnath.kotur@broadcom.com>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260225060020.3361855-1-black.hawk@163.com>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit f0aa6a37a3dbb40b272df5fc6db93c114688adcd ]
+
+Recalculate features when XDP is detached.
+
+Before:
+ # ip li set dev eth0 xdp obj xdp_dummy.bpf.o sec xdp
+ # ip li set dev eth0 xdp off
+ # ethtool -k eth0 | grep gro
+ rx-gro-hw: off [requested on]
+
+After:
+ # ip li set dev eth0 xdp obj xdp_dummy.bpf.o sec xdp
+ # ip li set dev eth0 xdp off
+ # ethtool -k eth0 | grep gro
+ rx-gro-hw: on
+
+The fact that HW-GRO doesn't get re-enabled automatically is just
+a minor annoyance. The real issue is that the features will randomly
+come back during another reconfiguration which just happens to invoke
+netdev_update_features(). The driver doesn't handle reconfiguring
+two things at a time very robustly.
+
+Starting with commit 98ba1d931f61 ("bnxt_en: Fix RSS logic in
+__bnxt_reserve_rings()") we only reconfigure the RSS hash table
+if the "effective" number of Rx rings has changed. If HW-GRO is
+enabled "effective" number of rings is 2x what user sees.
+So if we are in the bad state, with HW-GRO re-enablement "pending"
+after XDP off, and we lower the rings by / 2 - the HW-GRO rings
+doing 2x and the ethtool -L doing / 2 may cancel each other out,
+and the:
+
+ if (old_rx_rings != bp->hw_resc.resv_rx_rings &&
+
+condition in __bnxt_reserve_rings() will be false.
+The RSS map won't get updated, and we'll crash with:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000168
+ RIP: 0010:__bnxt_hwrm_vnic_set_rss+0x13a/0x1a0
+ bnxt_hwrm_vnic_rss_cfg_p5+0x47/0x180
+ __bnxt_setup_vnic_p5+0x58/0x110
+ bnxt_init_nic+0xb72/0xf50
+ __bnxt_open_nic+0x40d/0xab0
+ bnxt_open_nic+0x2b/0x60
+ ethtool_set_channels+0x18c/0x1d0
+
+As we try to access a freed ring.
+
+The issue is present since XDP support was added, really, but
+prior to commit 98ba1d931f61 ("bnxt_en: Fix RSS logic in
+__bnxt_reserve_rings()") it wasn't causing major issues.
+
+Fixes: 1054aee82321 ("bnxt_en: Use NETIF_F_GRO_HW.")
+Fixes: 98ba1d931f61 ("bnxt_en: Fix RSS logic in __bnxt_reserve_rings()")
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Link: https://patch.msgid.link/20250109043057.2888953-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ The context change is due to the commit 1f6e77cb9b32
+("bnxt_en: Add bnxt_l2_filter hash table.") in v6.8 and the commit
+8336a974f37d ("bnxt_en: Save user configured filters in a lookup list")
+in v6.9 which are irrelevant to the logic of this patch. ]
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++++++++++++++++++++-----
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +-
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 -------
+ 3 files changed, 21 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -3996,7 +3996,7 @@ void bnxt_set_ring_params(struct bnxt *b
+ /* Changing allocation mode of RX rings.
+ * TODO: Update when extending xdp_rxq_info to support allocation modes.
+ */
+-int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
++static void __bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
+ {
+ struct net_device *dev = bp->dev;
+
+@@ -4017,15 +4017,30 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp
+ bp->rx_skb_func = bnxt_rx_page_skb;
+ }
+ bp->rx_dir = DMA_BIDIRECTIONAL;
+- /* Disable LRO or GRO_HW */
+- netdev_update_features(dev);
+ } else {
+ dev->max_mtu = bp->max_mtu;
+ bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
+ bp->rx_dir = DMA_FROM_DEVICE;
+ bp->rx_skb_func = bnxt_rx_skb;
+ }
+- return 0;
++}
++
++void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
++{
++ __bnxt_set_rx_skb_mode(bp, page_mode);
++
++ if (!page_mode) {
++ int rx, tx;
++
++ bnxt_get_max_rings(bp, &rx, &tx, true);
++ if (rx > 1) {
++ bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
++ bp->dev->hw_features |= NETIF_F_LRO;
++ }
++ }
++
++ /* Update LRO and GRO_HW availability */
++ netdev_update_features(bp->dev);
+ }
+
+ static void bnxt_free_vnic_attributes(struct bnxt *bp)
+@@ -13773,7 +13788,7 @@ static int bnxt_init_one(struct pci_dev
+ if (rc)
+ goto init_err_pci_clean;
+
+- bnxt_set_rx_skb_mode(bp, false);
++ __bnxt_set_rx_skb_mode(bp, false);
+ bnxt_set_tpa_flags(bp);
+ bnxt_set_ring_params(bp);
+ rc = bnxt_set_dflt_rings(bp, true);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -2332,7 +2332,7 @@ void bnxt_reuse_rx_data(struct bnxt_rx_r
+ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
+ void bnxt_set_tpa_flags(struct bnxt *bp);
+ void bnxt_set_ring_params(struct bnxt *);
+-int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
++void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
+ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
+ int bmap_size, bool async_only);
+ int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -422,15 +422,8 @@ static int bnxt_xdp_set(struct bnxt *bp,
+ bnxt_set_rx_skb_mode(bp, true);
+ xdp_features_set_redirect_target(dev, true);
+ } else {
+- int rx, tx;
+-
+ xdp_features_clear_redirect_target(dev);
+ bnxt_set_rx_skb_mode(bp, false);
+- bnxt_get_max_rings(bp, &rx, &tx, true);
+- if (rx > 1) {
+- bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
+- bp->dev->hw_features |= NETIF_F_LRO;
+- }
+ }
+ bp->tx_nr_rings_xdp = tx_xdp;
+ bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
--- /dev/null
+From stable+bounces-219686-greg=kroah.com@vger.kernel.org Wed Feb 25 19:15:41 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Feb 2026 13:15:35 -0500
+Subject: ext4: always allocate blocks only from groups inode can use
+To: stable@vger.kernel.org
+Cc: Jan Kara <jack@suse.cz>, Baokun Li <libaokun1@huawei.com>, Zhang Yi <yi.zhang@huawei.com>, Pedro Falcato <pfalcato@suse.de>, stable@kernel.org, Theodore Ts'o <tytso@mit.edu>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260225181535.912817-1-sashal@kernel.org>
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 4865c768b563deff1b6a6384e74a62f143427b42 ]
+
+For filesystems with more than 2^32 blocks inodes using indirect block
+based format cannot use blocks beyond the 32-bit limit.
+ext4_mb_scan_groups_linear() takes care to not select these unsupported
+groups for such inodes however other functions selecting groups for
+allocation don't. So far this is harmless because the other selection
+functions are used only with mb_optimize_scan and this is currently
+disabled for inodes with indirect blocks however in the following patch
+we want to enable mb_optimize_scan regardless of inode format.
+
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Acked-by: Pedro Falcato <pfalcato@suse.de>
+Cc: stable@kernel.org
+Link: https://patch.msgid.link/20260114182836.14120-3-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+[ Drop a few hunks not needed in older trees ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c | 20 ++++++++++++++++----
+ 1 file changed, 16 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -883,6 +883,21 @@ mb_update_avg_fragment_size(struct super
+ }
+ }
+
++static ext4_group_t ext4_get_allocation_groups_count(
++ struct ext4_allocation_context *ac)
++{
++ ext4_group_t ngroups = ext4_get_groups_count(ac->ac_sb);
++
++ /* non-extent files are limited to low blocks/groups */
++ if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
++ ngroups = EXT4_SB(ac->ac_sb)->s_blockfile_groups;
++
++ /* Pairs with smp_wmb() in ext4_update_super() */
++ smp_rmb();
++
++ return ngroups;
++}
++
+ /*
+ * Choose next group by traversing largest_free_order lists. Updates *new_cr if
+ * cr level needs an update.
+@@ -2817,10 +2832,7 @@ ext4_mb_regular_allocator(struct ext4_al
+
+ sb = ac->ac_sb;
+ sbi = EXT4_SB(sb);
+- ngroups = ext4_get_groups_count(sb);
+- /* non-extent files are limited to low blocks/groups */
+- if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
+- ngroups = sbi->s_blockfile_groups;
++ ngroups = ext4_get_allocation_groups_count(ac);
+
+ BUG_ON(ac->ac_status == AC_STATUS_FOUND);
+
--- /dev/null
+From stable+bounces-219625-greg=kroah.com@vger.kernel.org Wed Feb 25 14:52:44 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Feb 2026 08:47:44 -0500
+Subject: ext4: fix dirtyclusters double decrement on fs shutdown
+To: stable@vger.kernel.org
+Cc: Brian Foster <bfoster@redhat.com>, Baokun Li <libaokun1@huawei.com>, Theodore Ts'o <tytso@mit.edu>, stable@kernel.org, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260225134744.311174-1-sashal@kernel.org>
+
+From: Brian Foster <bfoster@redhat.com>
+
+[ Upstream commit 94a8cea54cd935c54fa2fba70354757c0fc245e3 ]
+
+fstests test generic/388 occasionally reproduces a warning in
+ext4_put_super() associated with the dirty clusters count:
+
+ WARNING: CPU: 7 PID: 76064 at fs/ext4/super.c:1324 ext4_put_super+0x48c/0x590 [ext4]
+
+Tracing the failure shows that the warning fires due to an
+s_dirtyclusters_counter value of -1. IOW, this appears to be a
+spurious decrement as opposed to some sort of leak. Further tracing
+of the dirty cluster count deltas and an LLM scan of the resulting
+output identified the cause as a double decrement in the error path
+between ext4_mb_mark_diskspace_used() and the caller
+ext4_mb_new_blocks().
+
+First, note that generic/388 is a shutdown vs. fsstress test and so
+produces a random set of operations and shutdown injections. In the
+problematic case, the shutdown triggers an error return from the
+ext4_handle_dirty_metadata() call(s) made from
+ext4_mb_mark_context(). The changed value is non-zero at this point,
+so ext4_mb_mark_diskspace_used() does not exit after the error
+bubbles up from ext4_mb_mark_context(). Instead, the former
+decrements both cluster counters and returns the error up to
+ext4_mb_new_blocks(). The latter falls into the !ar->len out path
+which decrements the dirty clusters counter a second time, creating
+the inconsistency.
+
+To avoid this problem and simplify ownership of the cluster
+reservation in this codepath, lift the counter reduction to a single
+place in the caller. This makes it more clear that
+ext4_mb_new_blocks() is responsible for acquiring cluster
+reservation (via ext4_claim_free_clusters()) in the !delalloc case
+as well as releasing it, regardless of whether it ends up consumed
+or returned due to failure.
+
+Fixes: 0087d9fb3f29 ("ext4: Fix s_dirty_blocks_counter if block allocation failed with nodelalloc")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Link: https://patch.msgid.link/20260113171905.118284-1-bfoster@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+[ Drop mballoc-test changes ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c | 21 +++++----------------
+ 1 file changed, 5 insertions(+), 16 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3999,8 +3999,7 @@ void ext4_exit_mballoc(void)
+ * Returns 0 if success or error code
+ */
+ static noinline_for_stack int
+-ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+- handle_t *handle, unsigned int reserv_clstrs)
++ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, handle_t *handle)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_group_desc *gdp;
+@@ -4086,13 +4085,6 @@ ext4_mb_mark_diskspace_used(struct ext4_
+
+ ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+ percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
+- /*
+- * Now reduce the dirty block count also. Should not go negative
+- */
+- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+- /* release all the reserved blocks if non delalloc */
+- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+- reserv_clstrs);
+
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi,
+@@ -6265,7 +6257,7 @@ repeat:
+ ext4_mb_pa_put_free(ac);
+ }
+ if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+- *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
++ *errp = ext4_mb_mark_diskspace_used(ac, handle);
+ if (*errp) {
+ ext4_discard_allocated_blocks(ac);
+ goto errout;
+@@ -6296,12 +6288,9 @@ errout:
+ out:
+ if (inquota && ar->len < inquota)
+ dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
+- if (!ar->len) {
+- if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
+- /* release all the reserved blocks if non delalloc */
+- percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+- reserv_clstrs);
+- }
++ /* release any reserved blocks */
++ if (reserv_clstrs)
++ percpu_counter_sub(&sbi->s_dirtyclusters_counter, reserv_clstrs);
+
+ trace_ext4_allocate_blocks(ar, (unsigned long long)block);
+
--- /dev/null
+From stable+bounces-217275-greg=kroah.com@vger.kernel.org Wed Feb 18 13:13:16 2026
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Wed, 18 Feb 2026 20:12:42 +0800
+Subject: net: stmmac: dwmac-loongson: Set clk_csr_i to 100-150MHz
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Huacai Chen <chenhuacai@kernel.org>
+Cc: Xuerui Wang <kernel@xen0n.name>, stable@vger.kernel.org, Andrew Lunn <andrew+netdev@lunn.ch>, "David S . Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Yanteng Si <si.yanteng@linux.dev>, linux-kernel@vger.kernel.org, loongarch@lists.linux.dev, netdev@vger.kernel.org, Huacai Chen <chenhuacai@loongson.cn>, Hongliang Wang <wanghongliang@loongson.cn>
+Message-ID: <20260218121242.2545128-1-chenhuacai@loongson.cn>
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit e1aa5ef892fb4fa9014a25e87b64b97347919d37 upstream.
+
+Current clk_csr_i setting of Loongson STMMAC (including LS7A1000/2000
+and LS2K1000/2000/3000) are copy & paste from other drivers. In fact,
+Loongson STMMAC use 125MHz clocks and need 62 freq division to within
+2.5MHz, meeting most PHY MDC requirement. So fix by setting clk_csr_i
+to 100-150MHz, otherwise some PHYs may link fail.
+
+Cc: stable@vger.kernel.org
+Fixes: 30bba69d7db40e7 ("stmmac: pci: Add dwmac support for Loongson")
+Signed-off-by: Hongliang Wang <wanghongliang@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+@@ -14,7 +14,7 @@
+ static int loongson_default_data(struct plat_stmmacenet_data *plat)
+
+ {
+- plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
++ plat->clk_csr = 1; /* clk_csr_i = 100-150MHz & MDC = clk_csr_i/62 */
+ plat->has_gmac = 1;
+ plat->force_sf_dma_mode = 1;
+
--- /dev/null
+From stable+bounces-217867-greg=kroah.com@vger.kernel.org Tue Feb 24 08:02:54 2026
+From: Li hongliang <1468888505@139.com>
+Date: Tue, 24 Feb 2026 15:02:37 +0800
+Subject: NFS: Fix a deadlock involving nfs_release_folio()
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org, trond.myklebust@hammerspace.com
+Cc: patches@lists.linux.dev, linux-kernel@vger.kernel.org, anna@kernel.org, linux-nfs@vger.kernel.org, wangzhaolong@huaweicloud.com
+Message-ID: <20260224070237.2933965-1-1468888505@139.com>
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit cce0be6eb4971456b703aaeafd571650d314bcca ]
+
+Wang Zhaolong reports a deadlock involving NFSv4.1 state recovery
+waiting on kthreadd, which is attempting to reclaim memory by calling
+nfs_release_folio(). The latter cannot make progress due to state
+recovery being needed.
+
+It seems that the only safe thing to do here is to kick off a writeback
+of the folio, without waiting for completion, or else kicking off an
+asynchronous commit.
+
+Reported-by: Wang Zhaolong <wangzhaolong@huaweicloud.com>
+Fixes: 96780ca55e3c ("NFS: fix up nfs_release_folio() to try to release the page")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+[ Minor conflict resolved. ]
+Signed-off-by: Li hongliang <1468888505@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/file.c | 3 ++-
+ fs/nfs/nfstrace.h | 3 +++
+ fs/nfs/write.c | 33 +++++++++++++++++++++++++++++++++
+ include/linux/nfs_fs.h | 1 +
+ 4 files changed, 39 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -459,7 +459,8 @@ static bool nfs_release_folio(struct fol
+ if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
+ current_is_kswapd() || current_is_kcompactd())
+ return false;
+- if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0)
++ if (nfs_wb_folio_reclaim(folio_file_mapping(folio)->host, folio) < 0 ||
++ folio_test_private(folio))
+ return false;
+ }
+ return nfs_fscache_release_folio(folio, gfp);
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -1033,6 +1033,9 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+ DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
+ DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
+
++DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio_reclaim);
++DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_reclaim_done);
++
+ DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio);
+ DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done);
+
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -2122,6 +2122,39 @@ int nfs_wb_folio_cancel(struct inode *in
+ }
+
+ /**
++ * nfs_wb_folio_reclaim - Write back all requests on one page
++ * @inode: pointer to page
++ * @folio: pointer to folio
++ *
++ * Assumes that the folio has been locked by the caller
++ */
++int nfs_wb_folio_reclaim(struct inode *inode, struct folio *folio)
++{
++ loff_t range_start = folio_pos(folio);
++ size_t len = folio_size(folio);
++ struct writeback_control wbc = {
++ .sync_mode = WB_SYNC_ALL,
++ .nr_to_write = 0,
++ .range_start = range_start,
++ .range_end = range_start + len - 1,
++ .for_sync = 1,
++ };
++ int ret;
++
++ if (folio_test_writeback(folio))
++ return -EBUSY;
++ if (folio_clear_dirty_for_io(folio)) {
++ trace_nfs_writeback_folio_reclaim(inode, range_start, len);
++ ret = nfs_writepage_locked(folio, &wbc);
++ trace_nfs_writeback_folio_reclaim_done(inode, range_start, len,
++ ret);
++ return ret;
++ }
++ nfs_commit_inode(inode, 0);
++ return 0;
++}
++
++/**
+ * nfs_wb_folio - Write back all requests on one page
+ * @inode: pointer to page
+ * @folio: pointer to folio
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -608,6 +608,7 @@ extern int nfs_update_folio(struct file
+ extern int nfs_sync_inode(struct inode *inode);
+ extern int nfs_wb_all(struct inode *inode);
+ extern int nfs_wb_folio(struct inode *inode, struct folio *folio);
++extern int nfs_wb_folio_reclaim(struct inode *inode, struct folio *folio);
+ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio);
+ extern int nfs_commit_inode(struct inode *, int);
+ extern struct nfs_commit_data *nfs_commitdata_alloc(void);
--- /dev/null
+From stable+bounces-217866-greg=kroah.com@vger.kernel.org Tue Feb 24 08:01:20 2026
+From: Li hongliang <1468888505@139.com>
+Date: Tue, 24 Feb 2026 15:00:58 +0800
+Subject: nfs: pass explicit offset/count to trace events
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org, hch@lst.de
+Cc: patches@lists.linux.dev, linux-kernel@vger.kernel.org, trond.myklebust@hammerspace.com, anna@kernel.org, linux-nfs@vger.kernel.org, chuck.lever@oracle.com, Anna.Schumaker@Netapp.com
+Message-ID: <20260224070058.2933695-1-1468888505@139.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit fada32ed6dbc748f447c8d050a961b75d946055a ]
+
+nfs_folio_length is unsafe to use without having the folio locked and a
+check for a NULL ->f_mapping that protects against truncations and can
+lead to kernel crashes. E.g. when running xfstests generic/065 with
+all nfs trace points enabled.
+
+Follow the model of the XFS trace points and pass in an explÑ–cit offset
+and length. This has the additional benefit that these values can
+be more accurate as some of the users touch partial folio ranges.
+
+Fixes: eb5654b3b89d ("NFS: Enable tracing of nfs_invalidate_folio() and nfs_launder_folio()")
+Reported-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+[ Minor conflict resolved. ]
+Signed-off-by: Li hongliang <1468888505@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/file.c | 5 +++--
+ fs/nfs/nfstrace.h | 36 ++++++++++++++++++++----------------
+ fs/nfs/read.c | 8 +++++---
+ fs/nfs/write.c | 10 +++++-----
+ 4 files changed, 33 insertions(+), 26 deletions(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -441,7 +441,7 @@ static void nfs_invalidate_folio(struct
+ /* Cancel any unstarted writes on this page */
+ nfs_wb_folio_cancel(inode, folio);
+ folio_wait_fscache(folio);
+- trace_nfs_invalidate_folio(inode, folio);
++ trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
+ }
+
+ /*
+@@ -509,7 +509,8 @@ static int nfs_launder_folio(struct foli
+
+ folio_wait_fscache(folio);
+ ret = nfs_wb_folio(inode, folio);
+- trace_nfs_launder_folio_done(inode, folio, ret);
++ trace_nfs_launder_folio_done(inode, folio_pos(folio),
++ folio_size(folio), ret);
+ return ret;
+ }
+
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -933,10 +933,11 @@ TRACE_EVENT(nfs_sillyrename_unlink,
+ DECLARE_EVENT_CLASS(nfs_folio_event,
+ TP_PROTO(
+ const struct inode *inode,
+- struct folio *folio
++ loff_t offset,
++ size_t count
+ ),
+
+- TP_ARGS(inode, folio),
++ TP_ARGS(inode, offset, count),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+@@ -944,7 +945,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, offset)
+- __field(u32, count)
++ __field(size_t, count)
+ ),
+
+ TP_fast_assign(
+@@ -954,13 +955,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->version = inode_peek_iversion_raw(inode);
+- __entry->offset = folio_file_pos(folio);
+- __entry->count = nfs_folio_length(folio);
++ __entry->offset = offset,
++ __entry->count = count;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+- "offset=%lld count=%u",
++ "offset=%lld count=%zu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->version,
+@@ -972,18 +973,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
+ DEFINE_EVENT(nfs_folio_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+- struct folio *folio \
++ loff_t offset, \
++ size_t count \
+ ), \
+- TP_ARGS(inode, folio))
++ TP_ARGS(inode, offset, count))
+
+ DECLARE_EVENT_CLASS(nfs_folio_event_done,
+ TP_PROTO(
+ const struct inode *inode,
+- struct folio *folio,
++ loff_t offset,
++ size_t count,
+ int ret
+ ),
+
+- TP_ARGS(inode, folio, ret),
++ TP_ARGS(inode, offset, count, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+@@ -992,7 +995,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, offset)
+- __field(u32, count)
++ __field(size_t, count)
+ ),
+
+ TP_fast_assign(
+@@ -1002,14 +1005,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->version = inode_peek_iversion_raw(inode);
+- __entry->offset = folio_file_pos(folio);
+- __entry->count = nfs_folio_length(folio);
++ __entry->offset = offset,
++ __entry->count = count,
+ __entry->ret = ret;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+- "offset=%lld count=%u ret=%d",
++ "offset=%lld count=%zu ret=%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->version,
+@@ -1021,10 +1024,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+ DEFINE_EVENT(nfs_folio_event_done, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+- struct folio *folio, \
++ loff_t offset, \
++ size_t count, \
+ int ret \
+ ), \
+- TP_ARGS(inode, folio, ret))
++ TP_ARGS(inode, offset, count, ret))
+
+ DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
+ DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
+--- a/fs/nfs/read.c
++++ b/fs/nfs/read.c
+@@ -333,13 +333,15 @@ out:
+ int nfs_read_folio(struct file *file, struct folio *folio)
+ {
+ struct inode *inode = file_inode(file);
++ loff_t pos = folio_pos(folio);
++ size_t len = folio_size(folio);
+ struct nfs_pageio_descriptor pgio;
+ struct nfs_open_context *ctx;
+ int ret;
+
+- trace_nfs_aop_readpage(inode, folio);
++ trace_nfs_aop_readpage(inode, pos, len);
+ nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+- task_io_account_read(folio_size(folio));
++ task_io_account_read(len);
+
+ /*
+ * Try to flush any pending writes to the file..
+@@ -382,7 +384,7 @@ int nfs_read_folio(struct file *file, st
+ out_put:
+ put_nfs_open_context(ctx);
+ out:
+- trace_nfs_aop_readpage_done(inode, folio, ret);
++ trace_nfs_aop_readpage_done(inode, pos, len, ret);
+ return ret;
+ out_unlock:
+ folio_unlock(folio);
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -2131,17 +2131,17 @@ int nfs_wb_folio_cancel(struct inode *in
+ */
+ int nfs_wb_folio(struct inode *inode, struct folio *folio)
+ {
+- loff_t range_start = folio_file_pos(folio);
+- loff_t range_end = range_start + (loff_t)folio_size(folio) - 1;
++ loff_t range_start = folio_pos(folio);
++ size_t len = folio_size(folio);
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0,
+ .range_start = range_start,
+- .range_end = range_end,
++ .range_end = range_start + len - 1,
+ };
+ int ret;
+
+- trace_nfs_writeback_folio(inode, folio);
++ trace_nfs_writeback_folio(inode, range_start, len);
+
+ for (;;) {
+ folio_wait_writeback(folio);
+@@ -2159,7 +2159,7 @@ int nfs_wb_folio(struct inode *inode, st
+ goto out_error;
+ }
+ out_error:
+- trace_nfs_writeback_folio_done(inode, folio, ret);
++ trace_nfs_writeback_folio_done(inode, range_start, len, ret);
+ return ret;
+ }
+
--- /dev/null
+From stable+bounces-217868-greg=kroah.com@vger.kernel.org Tue Feb 24 08:22:45 2026
+From: Li hongliang <1468888505@139.com>
+Date: Tue, 24 Feb 2026 15:22:02 +0800
+Subject: pNFS: Fix a deadlock when returning a delegation during open()
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org, trond.myklebust@hammerspace.com
+Cc: patches@lists.linux.dev, linux-kernel@vger.kernel.org, bcodding@hammerspace.com, anna@kernel.org, linux-nfs@vger.kernel.org, wangzhaolong@huaweicloud.com
+Message-ID: <20260224072202.2940831-1-1468888505@139.com>
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 857bf9056291a16785ae3be1d291026b2437fc48 ]
+
+Ben Coddington reports seeing a hang in the following stack trace:
+ 0 [ffffd0b50e1774e0] __schedule at ffffffff9ca05415
+ 1 [ffffd0b50e177548] schedule at ffffffff9ca05717
+ 2 [ffffd0b50e177558] bit_wait at ffffffff9ca061e1
+ 3 [ffffd0b50e177568] __wait_on_bit at ffffffff9ca05cfb
+ 4 [ffffd0b50e1775c8] out_of_line_wait_on_bit at ffffffff9ca05ea5
+ 5 [ffffd0b50e177618] pnfs_roc at ffffffffc154207b [nfsv4]
+ 6 [ffffd0b50e1776b8] _nfs4_proc_delegreturn at ffffffffc1506586 [nfsv4]
+ 7 [ffffd0b50e177788] nfs4_proc_delegreturn at ffffffffc1507480 [nfsv4]
+ 8 [ffffd0b50e1777f8] nfs_do_return_delegation at ffffffffc1523e41 [nfsv4]
+ 9 [ffffd0b50e177838] nfs_inode_set_delegation at ffffffffc1524a75 [nfsv4]
+ 10 [ffffd0b50e177888] nfs4_process_delegation at ffffffffc14f41dd [nfsv4]
+ 11 [ffffd0b50e1778a0] _nfs4_opendata_to_nfs4_state at ffffffffc1503edf [nfsv4]
+ 12 [ffffd0b50e1778c0] _nfs4_open_and_get_state at ffffffffc1504e56 [nfsv4]
+ 13 [ffffd0b50e177978] _nfs4_do_open at ffffffffc15051b8 [nfsv4]
+ 14 [ffffd0b50e1779f8] nfs4_do_open at ffffffffc150559c [nfsv4]
+ 15 [ffffd0b50e177a80] nfs4_atomic_open at ffffffffc15057fb [nfsv4]
+ 16 [ffffd0b50e177ad0] nfs4_file_open at ffffffffc15219be [nfsv4]
+ 17 [ffffd0b50e177b78] do_dentry_open at ffffffff9c09e6ea
+ 18 [ffffd0b50e177ba8] vfs_open at ffffffff9c0a082e
+ 19 [ffffd0b50e177bd0] dentry_open at ffffffff9c0a0935
+
+The issue is that the delegreturn is being asked to wait for a layout
+return that cannot complete because a state recovery was initiated. The
+state recovery cannot complete until the open() finishes processing the
+delegations it was given.
+
+The solution is to propagate the existing flags that indicate a
+non-blocking call to the function pnfs_roc(), so that it knows not to
+wait in this situation.
+
+Reported-by: Benjamin Coddington <bcodding@hammerspace.com>
+Fixes: 29ade5db1293 ("pNFS: Wait on outstanding layoutreturns to complete in pnfs_roc()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+[ Minor conflict resolved. ]
+Signed-off-by: Li hongliang <1468888505@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/nfs4proc.c | 6 ++---
+ fs/nfs/pnfs.c | 58 ++++++++++++++++++++++++++++++++++++++----------------
+ fs/nfs/pnfs.h | 17 ++++++---------
+ 3 files changed, 51 insertions(+), 30 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3792,8 +3792,8 @@ int nfs4_do_close(struct nfs4_state *sta
+ calldata->res.seqid = calldata->arg.seqid;
+ calldata->res.server = server;
+ calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+- calldata->lr.roc = pnfs_roc(state->inode,
+- &calldata->lr.arg, &calldata->lr.res, msg.rpc_cred);
++ calldata->lr.roc = pnfs_roc(state->inode, &calldata->lr.arg,
++ &calldata->lr.res, msg.rpc_cred, wait);
+ if (calldata->lr.roc) {
+ calldata->arg.lr_args = &calldata->lr.arg;
+ calldata->res.lr_res = &calldata->lr.res;
+@@ -6742,7 +6742,7 @@ static int _nfs4_proc_delegreturn(struct
+ data->inode = nfs_igrab_and_active(inode);
+ if (data->inode || issync) {
+ data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res,
+- cred);
++ cred, issync);
+ if (data->lr.roc) {
+ data->args.lr_args = &data->lr.arg;
+ data->res.lr_res = &data->lr.res;
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1427,10 +1427,9 @@ pnfs_commit_and_return_layout(struct ino
+ return ret;
+ }
+
+-bool pnfs_roc(struct inode *ino,
+- struct nfs4_layoutreturn_args *args,
+- struct nfs4_layoutreturn_res *res,
+- const struct cred *cred)
++bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args,
++ struct nfs4_layoutreturn_res *res, const struct cred *cred,
++ bool sync)
+ {
+ struct nfs_inode *nfsi = NFS_I(ino);
+ struct nfs_open_context *ctx;
+@@ -1441,7 +1440,7 @@ bool pnfs_roc(struct inode *ino,
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode = 0;
+ bool layoutreturn = false, roc = false;
+- bool skip_read = false;
++ bool skip_read;
+
+ if (!nfs_have_layout(ino))
+ return false;
+@@ -1454,20 +1453,14 @@ retry:
+ lo = NULL;
+ goto out_noroc;
+ }
+- pnfs_get_layout_hdr(lo);
+- if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
+- spin_unlock(&ino->i_lock);
+- rcu_read_unlock();
+- wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
+- TASK_UNINTERRUPTIBLE);
+- pnfs_put_layout_hdr(lo);
+- goto retry;
+- }
+
+ /* no roc if we hold a delegation */
++ skip_read = false;
+ if (nfs4_check_delegation(ino, FMODE_READ)) {
+- if (nfs4_check_delegation(ino, FMODE_WRITE))
++ if (nfs4_check_delegation(ino, FMODE_WRITE)) {
++ lo = NULL;
+ goto out_noroc;
++ }
+ skip_read = true;
+ }
+
+@@ -1476,12 +1469,43 @@ retry:
+ if (state == NULL)
+ continue;
+ /* Don't return layout if there is open file state */
+- if (state->state & FMODE_WRITE)
++ if (state->state & FMODE_WRITE) {
++ lo = NULL;
+ goto out_noroc;
++ }
+ if (state->state & FMODE_READ)
+ skip_read = true;
+ }
+
++ if (skip_read) {
++ bool writes = false;
++
++ list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
++ if (lseg->pls_range.iomode != IOMODE_READ) {
++ writes = true;
++ break;
++ }
++ }
++ if (!writes) {
++ lo = NULL;
++ goto out_noroc;
++ }
++ }
++
++ pnfs_get_layout_hdr(lo);
++ if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
++ if (!sync) {
++ pnfs_set_plh_return_info(
++ lo, skip_read ? IOMODE_RW : IOMODE_ANY, 0);
++ goto out_noroc;
++ }
++ spin_unlock(&ino->i_lock);
++ rcu_read_unlock();
++ wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
++ TASK_UNINTERRUPTIBLE);
++ pnfs_put_layout_hdr(lo);
++ goto retry;
++ }
+
+ list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
+ if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
+@@ -1521,7 +1545,7 @@ retry:
+ out_noroc:
+ spin_unlock(&ino->i_lock);
+ rcu_read_unlock();
+- pnfs_layoutcommit_inode(ino, true);
++ pnfs_layoutcommit_inode(ino, sync);
+ if (roc) {
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
+ if (ld->prepare_layoutreturn)
+--- a/fs/nfs/pnfs.h
++++ b/fs/nfs/pnfs.h
+@@ -295,10 +295,9 @@ int pnfs_mark_matching_lsegs_return(stru
+ u32 seq);
+ int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+ struct list_head *lseg_list);
+-bool pnfs_roc(struct inode *ino,
+- struct nfs4_layoutreturn_args *args,
+- struct nfs4_layoutreturn_res *res,
+- const struct cred *cred);
++bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args,
++ struct nfs4_layoutreturn_res *res, const struct cred *cred,
++ bool sync);
+ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
+ struct nfs4_layoutreturn_res **respp, int *ret);
+ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
+@@ -769,12 +768,10 @@ pnfs_layoutcommit_outstanding(struct ino
+ return false;
+ }
+
+-
+-static inline bool
+-pnfs_roc(struct inode *ino,
+- struct nfs4_layoutreturn_args *args,
+- struct nfs4_layoutreturn_res *res,
+- const struct cred *cred)
++static inline bool pnfs_roc(struct inode *ino,
++ struct nfs4_layoutreturn_args *args,
++ struct nfs4_layoutreturn_res *res,
++ const struct cred *cred, bool sync)
+ {
+ return false;
+ }
--- /dev/null
+From stable+bounces-219745-greg=kroah.com@vger.kernel.org Thu Feb 26 03:42:48 2026
+From: Robert Garcia <rob_garcia@163.com>
+Date: Thu, 26 Feb 2026 10:41:02 +0800
+Subject: rxrpc: Fix recvmsg() unconditional requeue
+To: stable@vger.kernel.org, David Howells <dhowells@redhat.com>
+Cc: Marc Dionne <marc.dionne@auristor.com>, Robert Garcia <rob_garcia@163.com>, Steven Rostedt <rostedt@goodmis.org>, linux-kernel@vger.kernel.org, Masami Hiramatsu <mhiramat@kernel.org>, "David S . Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, linux-afs@lists.infradead.org, linux-trace-kernel@vger.kernel.org, netdev@vger.kernel.org, Faith <faith@zellic.io>, Pumpkin Chang <pumpkin@devco.re>, Nir Ohfeld <niro@wiz.io>, Willy Tarreau <w@1wt.eu>, Simon Horman <horms@kernel.org>
+Message-ID: <20260226024102.3522867-1-rob_garcia@163.com>
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 2c28769a51deb6022d7fbd499987e237a01dd63a ]
+
+If rxrpc_recvmsg() fails because MSG_DONTWAIT was specified but the call at
+the front of the recvmsg queue already has its mutex locked, it requeues
+the call - whether or not the call is already queued. The call may be on
+the queue because MSG_PEEK was also passed and so the call was not dequeued
+or because the I/O thread requeued it.
+
+The unconditional requeue may then corrupt the recvmsg queue, leading to
+things like UAFs or refcount underruns.
+
+Fix this by only requeuing the call if it isn't already on the queue - and
+moving it to the front if it is already queued. If we don't queue it, we
+have to put the ref we obtained by dequeuing it.
+
+Also, MSG_PEEK doesn't dequeue the call so shouldn't call
+rxrpc_notify_socket() for the call if we didn't use up all the data on the
+queue, so fix that also.
+
+Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg")
+Reported-by: Faith <faith@zellic.io>
+Reported-by: Pumpkin Chang <pumpkin@devco.re>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Acked-by: Marc Dionne <marc.dionne@auristor.com>
+cc: Nir Ohfeld <niro@wiz.io>
+cc: Willy Tarreau <w@1wt.eu>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+cc: stable@kernel.org
+Link: https://patch.msgid.link/95163.1768428203@warthog.procyon.org.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[Use spin_unlock instead of spin_unlock_irq to maintain context consistency.]
+Signed-off-by: Robert Garcia <rob_garcia@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/rxrpc.h | 4 ++++
+ net/rxrpc/recvmsg.c | 19 +++++++++++++++----
+ 2 files changed, 19 insertions(+), 4 deletions(-)
+
+--- a/include/trace/events/rxrpc.h
++++ b/include/trace/events/rxrpc.h
+@@ -270,6 +270,7 @@
+ EM(rxrpc_call_put_kernel, "PUT kernel ") \
+ EM(rxrpc_call_put_poke, "PUT poke ") \
+ EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \
++ EM(rxrpc_call_put_recvmsg_peek_nowait, "PUT peek-nwt") \
+ EM(rxrpc_call_put_release_sock, "PUT rls-sock") \
+ EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \
+ EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \
+@@ -287,6 +288,9 @@
+ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \
+ EM(rxrpc_call_see_input, "SEE input ") \
+ EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \
++ EM(rxrpc_call_see_recvmsg_requeue, "SEE recv-rqu") \
++ EM(rxrpc_call_see_recvmsg_requeue_first, "SEE recv-rqF") \
++ EM(rxrpc_call_see_recvmsg_requeue_move, "SEE recv-rqM") \
+ EM(rxrpc_call_see_release, "SEE release ") \
+ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \
+ EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \
+--- a/net/rxrpc/recvmsg.c
++++ b/net/rxrpc/recvmsg.c
+@@ -430,7 +430,8 @@ try_again:
+ if (rxrpc_call_has_failed(call))
+ goto call_failed;
+
+- if (!skb_queue_empty(&call->recvmsg_queue))
++ if (!(flags & MSG_PEEK) &&
++ !skb_queue_empty(&call->recvmsg_queue))
+ rxrpc_notify_socket(call);
+ goto not_yet_complete;
+
+@@ -461,11 +462,21 @@ error_unlock_call:
+ error_requeue_call:
+ if (!(flags & MSG_PEEK)) {
+ spin_lock(&rx->recvmsg_lock);
+- list_add(&call->recvmsg_link, &rx->recvmsg_q);
+- spin_unlock(&rx->recvmsg_lock);
++ if (list_empty(&call->recvmsg_link)) {
++ list_add(&call->recvmsg_link, &rx->recvmsg_q);
++ rxrpc_see_call(call, rxrpc_call_see_recvmsg_requeue);
++ spin_unlock(&rx->recvmsg_lock);
++ } else if (list_is_first(&call->recvmsg_link, &rx->recvmsg_q)) {
++ spin_unlock(&rx->recvmsg_lock);
++ rxrpc_put_call(call, rxrpc_call_see_recvmsg_requeue_first);
++ } else {
++ list_move(&call->recvmsg_link, &rx->recvmsg_q);
++ spin_unlock(&rx->recvmsg_lock);
++ rxrpc_put_call(call, rxrpc_call_see_recvmsg_requeue_move);
++ }
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
+ } else {
+- rxrpc_put_call(call, rxrpc_call_put_recvmsg);
++ rxrpc_put_call(call, rxrpc_call_put_recvmsg_peek_nowait);
+ }
+ error_no_call:
+ release_sock(&rx->sk);
ksmbd-compare-macs-in-constant-time.patch
net-tcp-md5-fix-mac-comparison-to-be-constant-time.patch
f2fs-fix-to-avoid-migrating-empty-section.patch
+ext4-fix-dirtyclusters-double-decrement-on-fs-shutdown.patch
+btrfs-always-fallback-to-buffered-write-if-the-inode-requires-checksum.patch
+net-stmmac-dwmac-loongson-set-clk_csr_i-to-100-150mhz.patch
+arm64-mm-don-t-remap-pgtables-per-cont-pte-pmd-block.patch
+arm64-mm-batch-dsb-and-isb-when-populating-pgtables.patch
+arm64-mm-don-t-remap-pgtables-for-allocate-vs-populate.patch
+btrfs-fix-null-dereference-on-root-when-tracing-inode-eviction.patch
+dst-fix-races-in-rt6_uncached_list_del-and-rt_del_uncached_list.patch
+nfs-pass-explicit-offset-count-to-trace-events.patch
+nfs-fix-a-deadlock-involving-nfs_release_folio.patch
+pnfs-fix-a-deadlock-when-returning-a-delegation-during-open.patch
+usb-typec-ucsi-move-unregister-out-of-atomic-section.patch
+eth-bnxt-always-recalculate-features-after-xdp-clearing-fix-null-deref.patch
+ext4-always-allocate-blocks-only-from-groups-inode-can-use.patch
+rxrpc-fix-recvmsg-unconditional-requeue.patch
+dm-verity-disable-recursive-forward-error-correction.patch
--- /dev/null
+From black.hawk@163.com Wed Feb 25 06:10:42 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Wed, 25 Feb 2026 13:10:08 +0800
+Subject: usb: typec: ucsi: Move unregister out of atomic section
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Bjorn Andersson <quic_bjorande@quicinc.com>, Heikki Krogerus <heikki.krogerus@linux.intel.com>, Neil Armstrong <neil.armstrong@linaro.org>, Dmitry Baryshkov <dmitry.baryshkov@linaro.org>, Amit Pundir <amit.pundir@linaro.org>, Johan Hovold <johan+linaro@kernel.org>, Bjorn Andersson <andersson@kernel.org>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260225051008.2547855-1-black.hawk@163.com>
+
+From: Bjorn Andersson <quic_bjorande@quicinc.com>
+
+[ Upstream commit 11bb2ffb679399f99041540cf662409905179e3a ]
+
+Commit '9329933699b3 ("soc: qcom: pmic_glink: Make client-lock
+non-sleeping")' moved the pmic_glink client list under a spinlock, as it
+is accessed by the rpmsg/glink callback, which in turn is invoked from
+IRQ context.
+
+This means that ucsi_unregister() is now called from atomic context,
+which isn't feasible as it's expecting a sleepable context. An effort is
+under way to get GLINK to invoke its callbacks in a sleepable context,
+but until then lets schedule the unregistration.
+
+A side effect of this is that ucsi_unregister() can now happen
+after the remote processor, and thereby the communication link with it, is
+gone. pmic_glink_send() is amended with a check to avoid the resulting NULL
+pointer dereference.
+This does however result in the user being informed about this error by
+the following entry in the kernel log:
+
+ ucsi_glink.pmic_glink_ucsi pmic_glink.ucsi.0: failed to send UCSI write request: -5
+
+Fixes: 9329933699b3 ("soc: qcom: pmic_glink: Make client-lock non-sleeping")
+Cc: stable@vger.kernel.org
+Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Tested-by: Amit Pundir <amit.pundir@linaro.org>
+Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
+Tested-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
+Link: https://lore.kernel.org/r/20240820-pmic-glink-v6-11-races-v3-2-eec53c750a04@quicinc.com
+Signed-off-by: Bjorn Andersson <andersson@kernel.org>
+[ The context change is due to the commit 584e8df58942
+("usb: typec: ucsi: extract common code for command handling")
+in v6.11 which is irrelevant to the logic of this patch. ]
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/soc/qcom/pmic_glink.c | 10 +++++++++-
+ drivers/usb/typec/ucsi/ucsi_glink.c | 27 ++++++++++++++++++++++-----
+ 2 files changed, 31 insertions(+), 6 deletions(-)
+
+--- a/drivers/soc/qcom/pmic_glink.c
++++ b/drivers/soc/qcom/pmic_glink.c
+@@ -115,8 +115,16 @@ EXPORT_SYMBOL_GPL(pmic_glink_client_regi
+ int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len)
+ {
+ struct pmic_glink *pg = client->pg;
++ int ret;
+
+- return rpmsg_send(pg->ept, data, len);
++ mutex_lock(&pg->state_lock);
++ if (!pg->ept)
++ ret = -ECONNRESET;
++ else
++ ret = rpmsg_send(pg->ept, data, len);
++ mutex_unlock(&pg->state_lock);
++
++ return ret;
+ }
+ EXPORT_SYMBOL_GPL(pmic_glink_send);
+
+--- a/drivers/usb/typec/ucsi/ucsi_glink.c
++++ b/drivers/usb/typec/ucsi/ucsi_glink.c
+@@ -72,6 +72,9 @@ struct pmic_glink_ucsi {
+
+ struct work_struct notify_work;
+ struct work_struct register_work;
++ spinlock_t state_lock;
++ bool ucsi_registered;
++ bool pd_running;
+
+ u8 read_buf[UCSI_BUF_SIZE];
+ };
+@@ -270,8 +273,20 @@ static void pmic_glink_ucsi_notify(struc
+ static void pmic_glink_ucsi_register(struct work_struct *work)
+ {
+ struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
++ unsigned long flags;
++ bool pd_running;
+
+- ucsi_register(ucsi->ucsi);
++ spin_lock_irqsave(&ucsi->state_lock, flags);
++ pd_running = ucsi->pd_running;
++ spin_unlock_irqrestore(&ucsi->state_lock, flags);
++
++ if (!ucsi->ucsi_registered && pd_running) {
++ ucsi_register(ucsi->ucsi);
++ ucsi->ucsi_registered = true;
++ } else if (ucsi->ucsi_registered && !pd_running) {
++ ucsi_unregister(ucsi->ucsi);
++ ucsi->ucsi_registered = false;
++ }
+ }
+
+ static void pmic_glink_ucsi_callback(const void *data, size_t len, void *priv)
+@@ -295,11 +310,12 @@ static void pmic_glink_ucsi_callback(con
+ static void pmic_glink_ucsi_pdr_notify(void *priv, int state)
+ {
+ struct pmic_glink_ucsi *ucsi = priv;
++ unsigned long flags;
+
+- if (state == SERVREG_SERVICE_STATE_UP)
+- schedule_work(&ucsi->register_work);
+- else if (state == SERVREG_SERVICE_STATE_DOWN)
+- ucsi_unregister(ucsi->ucsi);
++ spin_lock_irqsave(&ucsi->state_lock, flags);
++ ucsi->pd_running = (state == SERVREG_SERVICE_STATE_UP);
++ spin_unlock_irqrestore(&ucsi->state_lock, flags);
++ schedule_work(&ucsi->register_work);
+ }
+
+ static void pmic_glink_ucsi_destroy(void *data)
+@@ -332,6 +348,7 @@ static int pmic_glink_ucsi_probe(struct
+ init_completion(&ucsi->read_ack);
+ init_completion(&ucsi->write_ack);
+ init_completion(&ucsi->sync_ack);
++ spin_lock_init(&ucsi->state_lock);
+ mutex_init(&ucsi->lock);
+
+ ucsi->ucsi = ucsi_create(dev, &pmic_glink_ucsi_ops);