]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 19 Mar 2026 11:12:50 +0000 (12:12 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 19 Mar 2026 11:12:50 +0000 (12:12 +0100)
added patches:
arm64-mm-batch-dsb-and-isb-when-populating-pgtables.patch
arm64-mm-don-t-remap-pgtables-for-allocate-vs-populate.patch
arm64-mm-don-t-remap-pgtables-per-cont-pte-pmd-block.patch
btrfs-always-fallback-to-buffered-write-if-the-inode-requires-checksum.patch
btrfs-fix-null-dereference-on-root-when-tracing-inode-eviction.patch
dm-verity-disable-recursive-forward-error-correction.patch
dst-fix-races-in-rt6_uncached_list_del-and-rt_del_uncached_list.patch
eth-bnxt-always-recalculate-features-after-xdp-clearing-fix-null-deref.patch
ext4-always-allocate-blocks-only-from-groups-inode-can-use.patch
ext4-fix-dirtyclusters-double-decrement-on-fs-shutdown.patch
net-stmmac-dwmac-loongson-set-clk_csr_i-to-100-150mhz.patch
nfs-fix-a-deadlock-involving-nfs_release_folio.patch
nfs-pass-explicit-offset-count-to-trace-events.patch
pnfs-fix-a-deadlock-when-returning-a-delegation-during-open.patch
rxrpc-fix-recvmsg-unconditional-requeue.patch
usb-typec-ucsi-move-unregister-out-of-atomic-section.patch

17 files changed:
queue-6.6/arm64-mm-batch-dsb-and-isb-when-populating-pgtables.patch [new file with mode: 0644]
queue-6.6/arm64-mm-don-t-remap-pgtables-for-allocate-vs-populate.patch [new file with mode: 0644]
queue-6.6/arm64-mm-don-t-remap-pgtables-per-cont-pte-pmd-block.patch [new file with mode: 0644]
queue-6.6/btrfs-always-fallback-to-buffered-write-if-the-inode-requires-checksum.patch [new file with mode: 0644]
queue-6.6/btrfs-fix-null-dereference-on-root-when-tracing-inode-eviction.patch [new file with mode: 0644]
queue-6.6/dm-verity-disable-recursive-forward-error-correction.patch [new file with mode: 0644]
queue-6.6/dst-fix-races-in-rt6_uncached_list_del-and-rt_del_uncached_list.patch [new file with mode: 0644]
queue-6.6/eth-bnxt-always-recalculate-features-after-xdp-clearing-fix-null-deref.patch [new file with mode: 0644]
queue-6.6/ext4-always-allocate-blocks-only-from-groups-inode-can-use.patch [new file with mode: 0644]
queue-6.6/ext4-fix-dirtyclusters-double-decrement-on-fs-shutdown.patch [new file with mode: 0644]
queue-6.6/net-stmmac-dwmac-loongson-set-clk_csr_i-to-100-150mhz.patch [new file with mode: 0644]
queue-6.6/nfs-fix-a-deadlock-involving-nfs_release_folio.patch [new file with mode: 0644]
queue-6.6/nfs-pass-explicit-offset-count-to-trace-events.patch [new file with mode: 0644]
queue-6.6/pnfs-fix-a-deadlock-when-returning-a-delegation-during-open.patch [new file with mode: 0644]
queue-6.6/rxrpc-fix-recvmsg-unconditional-requeue.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/usb-typec-ucsi-move-unregister-out-of-atomic-section.patch [new file with mode: 0644]

diff --git a/queue-6.6/arm64-mm-batch-dsb-and-isb-when-populating-pgtables.patch b/queue-6.6/arm64-mm-batch-dsb-and-isb-when-populating-pgtables.patch
new file mode 100644 (file)
index 0000000..f52de71
--- /dev/null
@@ -0,0 +1,89 @@
+From stable+bounces-216826-greg=kroah.com@vger.kernel.org Tue Feb 17 14:35:05 2026
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 17 Feb 2026 13:34:07 +0000
+Subject: arm64: mm: Batch dsb and isb when populating pgtables
+To: stable@vger.kernel.org
+Cc: Ryan Roberts <ryan.roberts@arm.com>, catalin.marinas@arm.com, will@kernel.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Jack Aboutboul <jaboutboul@microsoft.com>, Sharath George John <sgeorgejohn@microsoft.com>, Noah Meyerhans <nmeyerhans@microsoft.com>, Jim Perrin <Jim.Perrin@microsoft.com>, Itaru Kitayama <itaru.kitayama@fujitsu.com>, Eric Chanudet <echanude@redhat.com>, Mark Rutland <mark.rutland@arm.com>, Ard Biesheuvel <ardb@kernel.org>
+Message-ID: <20260217133411.2881311-3-ryan.roberts@arm.com>
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+[ Upstream commit 1fcb7cea8a5f7747e02230f816c2c80b060d9517 ]
+
+After removing uneccessary TLBIs, the next bottleneck when creating the
+page tables for the linear map is DSB and ISB, which were previously
+issued per-pte in __set_pte(). Since we are writing multiple ptes in a
+given pte table, we can elide these barriers and insert them once we
+have finished writing to the table.
+
+Execution time of map_mem(), which creates the kernel linear map page
+tables, was measured on different machines with different RAM configs:
+
+               | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra
+               | VM, 16G     | VM, 64G     | VM, 256G    | Metal, 512G
+---------------|-------------|-------------|-------------|-------------
+               |   ms    (%) |   ms    (%) |   ms    (%) |    ms    (%)
+---------------|-------------|-------------|-------------|-------------
+before         |   78   (0%) |  435   (0%) | 1723   (0%) |  3779   (0%)
+after          |   11 (-86%) |  161 (-63%) |  656 (-62%) |  1654 (-56%)
+
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com>
+Tested-by: Eric Chanudet <echanude@redhat.com>
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240412131908.433043-3-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Ryan: Trivial backport ]
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/pgtable.h |    7 ++++++-
+ arch/arm64/mm/mmu.c              |   11 ++++++++++-
+ 2 files changed, 16 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -262,9 +262,14 @@ static inline pte_t pte_mkdevmap(pte_t p
+       return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
+ }
+-static inline void set_pte(pte_t *ptep, pte_t pte)
++static inline void set_pte_nosync(pte_t *ptep, pte_t pte)
+ {
+       WRITE_ONCE(*ptep, pte);
++}
++
++static inline void set_pte(pte_t *ptep, pte_t pte)
++{
++      set_pte_nosync(ptep, pte);
+       /*
+        * Only if the new pte is valid and kernel, otherwise TLB maintenance
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -175,7 +175,11 @@ static void init_pte(pte_t *ptep, unsign
+       do {
+               pte_t old_pte = READ_ONCE(*ptep);
+-              set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
++              /*
++               * Required barriers to make this visible to the table walker
++               * are deferred to the end of alloc_init_cont_pte().
++               */
++              set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot));
+               /*
+                * After the PTE entry has been populated once, we
+@@ -229,6 +233,11 @@ static void alloc_init_cont_pte(pmd_t *p
+               phys += next - addr;
+       } while (addr = next, addr != end);
++      /*
++       * Note: barriers and maintenance necessary to clear the fixmap slot
++       * ensure that all previous pgtable writes are visible to the table
++       * walker.
++       */
+       pte_clear_fixmap();
+ }
diff --git a/queue-6.6/arm64-mm-don-t-remap-pgtables-for-allocate-vs-populate.patch b/queue-6.6/arm64-mm-don-t-remap-pgtables-for-allocate-vs-populate.patch
new file mode 100644 (file)
index 0000000..9638591
--- /dev/null
@@ -0,0 +1,177 @@
+From stable+bounces-216827-greg=kroah.com@vger.kernel.org Tue Feb 17 14:34:55 2026
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 17 Feb 2026 13:34:08 +0000
+Subject: arm64: mm: Don't remap pgtables for allocate vs populate
+To: stable@vger.kernel.org
+Cc: Ryan Roberts <ryan.roberts@arm.com>, catalin.marinas@arm.com, will@kernel.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Jack Aboutboul <jaboutboul@microsoft.com>, Sharath George John <sgeorgejohn@microsoft.com>, Noah Meyerhans <nmeyerhans@microsoft.com>, Jim Perrin <Jim.Perrin@microsoft.com>, Mark Rutland <mark.rutland@arm.com>, Itaru Kitayama <itaru.kitayama@fujitsu.com>, Eric Chanudet <echanude@redhat.com>, Ard Biesheuvel <ardb@kernel.org>
+Message-ID: <20260217133411.2881311-4-ryan.roberts@arm.com>
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+[ Upstream commit 0e9df1c905d8293d333ace86c13d147382f5caf9 ]
+
+During linear map pgtable creation, each pgtable is fixmapped /
+fixunmapped twice; once during allocation to zero the memory, and a
+again during population to write the entries. This means each table has
+2 TLB invalidations issued against it. Let's fix this so that each table
+is only fixmapped/fixunmapped once, halving the number of TLBIs, and
+improving performance.
+
+Achieve this by separating allocation and initialization (zeroing) of
+the page. The allocated page is now fixmapped directly by the walker and
+initialized, before being populated and finally fixunmapped.
+
+This approach keeps the change small, but has the side effect that late
+allocations (using __get_free_page()) must also go through the generic
+memory clearing routine. So let's tell __get_free_page() not to zero the
+memory to avoid duplication.
+
+Additionally this approach means that fixmap/fixunmap is still used for
+late pgtable modifications. That's not technically needed since the
+memory is all mapped in the linear map by that point. That's left as a
+possible future optimization if found to be needed.
+
+Execution time of map_mem(), which creates the kernel linear map page
+tables, was measured on different machines with different RAM configs:
+
+               | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra
+               | VM, 16G     | VM, 64G     | VM, 256G    | Metal, 512G
+---------------|-------------|-------------|-------------|-------------
+               |   ms    (%) |   ms    (%) |   ms    (%) |    ms    (%)
+---------------|-------------|-------------|-------------|-------------
+before         |   11   (0%) |  161   (0%) |  656   (0%) |  1654   (0%)
+after          |   10 (-11%) |  104 (-35%) |  438 (-33%) |  1223 (-26%)
+
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Suggested-by: Mark Rutland <mark.rutland@arm.com>
+Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com>
+Tested-by: Eric Chanudet <echanude@redhat.com>
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240412131908.433043-4-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Ryan: Trivial backport ]
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/mmu.c |   58 ++++++++++++++++++++++++++--------------------------
+ 1 file changed, 29 insertions(+), 29 deletions(-)
+
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -106,28 +106,12 @@ EXPORT_SYMBOL(phys_mem_access_prot);
+ static phys_addr_t __init early_pgtable_alloc(int shift)
+ {
+       phys_addr_t phys;
+-      void *ptr;
+       phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0,
+                                        MEMBLOCK_ALLOC_NOLEAKTRACE);
+       if (!phys)
+               panic("Failed to allocate page table page\n");
+-      /*
+-       * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+-       * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+-       * any level of table.
+-       */
+-      ptr = pte_set_fixmap(phys);
+-
+-      memset(ptr, 0, PAGE_SIZE);
+-
+-      /*
+-       * Implicit barriers also ensure the zeroed page is visible to the page
+-       * table walker
+-       */
+-      pte_clear_fixmap();
+-
+       return phys;
+ }
+@@ -169,6 +153,14 @@ bool pgattr_change_is_safe(u64 old, u64
+       return ((old ^ new) & ~mask) == 0;
+ }
++static void init_clear_pgtable(void *table)
++{
++      clear_page(table);
++
++      /* Ensure the zeroing is observed by page table walks. */
++      dsb(ishst);
++}
++
+ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
+                    phys_addr_t phys, pgprot_t prot)
+ {
+@@ -211,12 +203,15 @@ static void alloc_init_cont_pte(pmd_t *p
+                       pmdval |= PMD_TABLE_PXN;
+               BUG_ON(!pgtable_alloc);
+               pte_phys = pgtable_alloc(PAGE_SHIFT);
++              ptep = pte_set_fixmap(pte_phys);
++              init_clear_pgtable(ptep);
++              ptep += pte_index(addr);
+               __pmd_populate(pmdp, pte_phys, pmdval);
+-              pmd = READ_ONCE(*pmdp);
++      } else {
++              BUG_ON(pmd_bad(pmd));
++              ptep = pte_set_fixmap_offset(pmdp, addr);
+       }
+-      BUG_ON(pmd_bad(pmd));
+-      ptep = pte_set_fixmap_offset(pmdp, addr);
+       do {
+               pgprot_t __prot = prot;
+@@ -295,12 +290,15 @@ static void alloc_init_cont_pmd(pud_t *p
+                       pudval |= PUD_TABLE_PXN;
+               BUG_ON(!pgtable_alloc);
+               pmd_phys = pgtable_alloc(PMD_SHIFT);
++              pmdp = pmd_set_fixmap(pmd_phys);
++              init_clear_pgtable(pmdp);
++              pmdp += pmd_index(addr);
+               __pud_populate(pudp, pmd_phys, pudval);
+-              pud = READ_ONCE(*pudp);
++      } else {
++              BUG_ON(pud_bad(pud));
++              pmdp = pmd_set_fixmap_offset(pudp, addr);
+       }
+-      BUG_ON(pud_bad(pud));
+-      pmdp = pmd_set_fixmap_offset(pudp, addr);
+       do {
+               pgprot_t __prot = prot;
+@@ -338,12 +336,15 @@ static void alloc_init_pud(pgd_t *pgdp,
+                       p4dval |= P4D_TABLE_PXN;
+               BUG_ON(!pgtable_alloc);
+               pud_phys = pgtable_alloc(PUD_SHIFT);
++              pudp = pud_set_fixmap(pud_phys);
++              init_clear_pgtable(pudp);
++              pudp += pud_index(addr);
+               __p4d_populate(p4dp, pud_phys, p4dval);
+-              p4d = READ_ONCE(*p4dp);
++      } else {
++              BUG_ON(p4d_bad(p4d));
++              pudp = pud_set_fixmap_offset(p4dp, addr);
+       }
+-      BUG_ON(p4d_bad(p4d));
+-      pudp = pud_set_fixmap_offset(p4dp, addr);
+       do {
+               pud_t old_pud = READ_ONCE(*pudp);
+@@ -425,11 +426,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdi
+ static phys_addr_t __pgd_pgtable_alloc(int shift)
+ {
+-      void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
+-      BUG_ON(!ptr);
++      /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
++      void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO);
+-      /* Ensure the zeroed page is visible to the page table walker */
+-      dsb(ishst);
++      BUG_ON(!ptr);
+       return __pa(ptr);
+ }
diff --git a/queue-6.6/arm64-mm-don-t-remap-pgtables-per-cont-pte-pmd-block.patch b/queue-6.6/arm64-mm-don-t-remap-pgtables-per-cont-pte-pmd-block.patch
new file mode 100644 (file)
index 0000000..14483a1
--- /dev/null
@@ -0,0 +1,160 @@
+From stable+bounces-216825-greg=kroah.com@vger.kernel.org Tue Feb 17 14:34:47 2026
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 17 Feb 2026 13:34:06 +0000
+Subject: arm64: mm: Don't remap pgtables per-cont(pte|pmd) block
+To: stable@vger.kernel.org
+Cc: Ryan Roberts <ryan.roberts@arm.com>, catalin.marinas@arm.com, will@kernel.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, Jack Aboutboul <jaboutboul@microsoft.com>, Sharath George John <sgeorgejohn@microsoft.com>, Noah Meyerhans <nmeyerhans@microsoft.com>, Jim Perrin <Jim.Perrin@microsoft.com>, Itaru Kitayama <itaru.kitayama@fujitsu.com>, Eric Chanudet <echanude@redhat.com>, Mark Rutland <mark.rutland@arm.com>, Ard Biesheuvel <ardb@kernel.org>
+Message-ID: <20260217133411.2881311-2-ryan.roberts@arm.com>
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+[ Upstream commit 5c63db59c5f89925add57642be4f789d0d671ccd ]
+
+A large part of the kernel boot time is creating the kernel linear map
+page tables. When rodata=full, all memory is mapped by pte. And when
+there is lots of physical ram, there are lots of pte tables to populate.
+The primary cost associated with this is mapping and unmapping the pte
+table memory in the fixmap; at unmap time, the TLB entry must be
+invalidated and this is expensive.
+
+Previously, each pmd and pte table was fixmapped/fixunmapped for each
+cont(pte|pmd) block of mappings (16 entries with 4K granule). This means
+we ended up issuing 32 TLBIs per (pmd|pte) table during the population
+phase.
+
+Let's fix that, and fixmap/fixunmap each page once per population, for a
+saving of 31 TLBIs per (pmd|pte) table. This gives a significant boot
+speedup.
+
+Execution time of map_mem(), which creates the kernel linear map page
+tables, was measured on different machines with different RAM configs:
+
+               | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra
+               | VM, 16G     | VM, 64G     | VM, 256G    | Metal, 512G
+---------------|-------------|-------------|-------------|-------------
+               |   ms    (%) |   ms    (%) |   ms    (%) |    ms    (%)
+---------------|-------------|-------------|-------------|-------------
+before         |  168   (0%) | 2198   (0%) | 8644   (0%) | 17447   (0%)
+after          |   78 (-53%) |  435 (-80%) | 1723 (-80%) |  3779 (-78%)
+
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com>
+Tested-by: Eric Chanudet <echanude@redhat.com>
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240412131908.433043-2-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+[ Ryan: Trivial backport ]
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/mmu.c |   27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -169,12 +169,9 @@ bool pgattr_change_is_safe(u64 old, u64
+       return ((old ^ new) & ~mask) == 0;
+ }
+-static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
++static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
+                    phys_addr_t phys, pgprot_t prot)
+ {
+-      pte_t *ptep;
+-
+-      ptep = pte_set_fixmap_offset(pmdp, addr);
+       do {
+               pte_t old_pte = READ_ONCE(*ptep);
+@@ -189,8 +186,6 @@ static void init_pte(pmd_t *pmdp, unsign
+               phys += PAGE_SIZE;
+       } while (ptep++, addr += PAGE_SIZE, addr != end);
+-
+-      pte_clear_fixmap();
+ }
+ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
+@@ -201,6 +196,7 @@ static void alloc_init_cont_pte(pmd_t *p
+ {
+       unsigned long next;
+       pmd_t pmd = READ_ONCE(*pmdp);
++      pte_t *ptep;
+       BUG_ON(pmd_sect(pmd));
+       if (pmd_none(pmd)) {
+@@ -216,6 +212,7 @@ static void alloc_init_cont_pte(pmd_t *p
+       }
+       BUG_ON(pmd_bad(pmd));
++      ptep = pte_set_fixmap_offset(pmdp, addr);
+       do {
+               pgprot_t __prot = prot;
+@@ -226,20 +223,21 @@ static void alloc_init_cont_pte(pmd_t *p
+                   (flags & NO_CONT_MAPPINGS) == 0)
+                       __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+-              init_pte(pmdp, addr, next, phys, __prot);
++              init_pte(ptep, addr, next, phys, __prot);
++              ptep += pte_index(next) - pte_index(addr);
+               phys += next - addr;
+       } while (addr = next, addr != end);
++
++      pte_clear_fixmap();
+ }
+-static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
++static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
+                    phys_addr_t phys, pgprot_t prot,
+                    phys_addr_t (*pgtable_alloc)(int), int flags)
+ {
+       unsigned long next;
+-      pmd_t *pmdp;
+-      pmdp = pmd_set_fixmap_offset(pudp, addr);
+       do {
+               pmd_t old_pmd = READ_ONCE(*pmdp);
+@@ -265,8 +263,6 @@ static void init_pmd(pud_t *pudp, unsign
+               }
+               phys += next - addr;
+       } while (pmdp++, addr = next, addr != end);
+-
+-      pmd_clear_fixmap();
+ }
+ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
+@@ -276,6 +272,7 @@ static void alloc_init_cont_pmd(pud_t *p
+ {
+       unsigned long next;
+       pud_t pud = READ_ONCE(*pudp);
++      pmd_t *pmdp;
+       /*
+        * Check for initial section mappings in the pgd/pud.
+@@ -294,6 +291,7 @@ static void alloc_init_cont_pmd(pud_t *p
+       }
+       BUG_ON(pud_bad(pud));
++      pmdp = pmd_set_fixmap_offset(pudp, addr);
+       do {
+               pgprot_t __prot = prot;
+@@ -304,10 +302,13 @@ static void alloc_init_cont_pmd(pud_t *p
+                   (flags & NO_CONT_MAPPINGS) == 0)
+                       __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+-              init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
++              init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
++              pmdp += pmd_index(next) - pmd_index(addr);
+               phys += next - addr;
+       } while (addr = next, addr != end);
++
++      pmd_clear_fixmap();
+ }
+ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
diff --git a/queue-6.6/btrfs-always-fallback-to-buffered-write-if-the-inode-requires-checksum.patch b/queue-6.6/btrfs-always-fallback-to-buffered-write-if-the-inode-requires-checksum.patch
new file mode 100644 (file)
index 0000000..60f2aca
--- /dev/null
@@ -0,0 +1,108 @@
+From stable+bounces-217704-greg=kroah.com@vger.kernel.org Mon Feb 23 09:07:37 2026
+From: Qu Wenruo <wqu@suse.com>
+Date: Mon, 23 Feb 2026 18:33:48 +1030
+Subject: btrfs: always fallback to buffered write if the inode requires checksum
+To: linux-btrfs@vger.kernel.org
+Cc: stable@vger.kernel.org, Christoph Hellwig <hch@infradead.org>, Filipe Manana <fdmanana@suse.com>, David Sterba <dsterba@suse.com>
+Message-ID: <5c3a9c8f484ed1ba8fe897e67057eec24968f7bd.1771833812.git.wqu@suse.com>
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 968f19c5b1b7d5595423b0ac0020cc18dfed8cb5 upstream.
+
+[BUG]
+It is a long known bug that VM image on btrfs can lead to data csum
+mismatch, if the qemu is using direct-io for the image (this is commonly
+known as cache mode 'none').
+
+[CAUSE]
+Inside the VM, if the fs is EXT4 or XFS, or even NTFS from Windows, the
+fs is allowed to dirty/modify the folio even if the folio is under
+writeback (as long as the address space doesn't have AS_STABLE_WRITES
+flag inherited from the block device).
+
+This is a valid optimization to improve the concurrency, and since these
+filesystems have no extra checksum on data, the content change is not a
+problem at all.
+
+But the final write into the image file is handled by btrfs, which needs
+the content not to be modified during writeback, or the checksum will
+not match the data (checksum is calculated before submitting the bio).
+
+So EXT4/XFS/NTRFS assume they can modify the folio under writeback, but
+btrfs requires no modification, this leads to the false csum mismatch.
+
+This is only a controlled example, there are even cases where
+multi-thread programs can submit a direct IO write, then another thread
+modifies the direct IO buffer for whatever reason.
+
+For such cases, btrfs has no sane way to detect such cases and leads to
+false data csum mismatch.
+
+[FIX]
+I have considered the following ideas to solve the problem:
+
+- Make direct IO to always skip data checksum
+  This not only requires a new incompatible flag, as it breaks the
+  current per-inode NODATASUM flag.
+  But also requires extra handling for no csum found cases.
+
+  And this also reduces our checksum protection.
+
+- Let hardware handle all the checksum
+  AKA, just nodatasum mount option.
+  That requires trust for hardware (which is not that trustful in a lot
+  of cases), and it's not generic at all.
+
+- Always fallback to buffered write if the inode requires checksum
+  This was suggested by Christoph, and is the solution utilized by this
+  patch.
+
+  The cost is obvious, the extra buffer copying into page cache, thus it
+  reduces the performance.
+  But at least it's still user configurable, if the end user still wants
+  the zero-copy performance, just set NODATASUM flag for the inode
+  (which is a common practice for VM images on btrfs).
+
+  Since we cannot trust user space programs to keep the buffer
+  consistent during direct IO, we have no choice but always falling back
+  to buffered IO.  At least by this, we avoid the more deadly false data
+  checksum mismatch error.
+
+Cc: stable@vger.kernel.org # 6.6
+[ Conflicts caused by code extracted into direct-io.c ]
+Suggested-by: Christoph Hellwig <hch@infradead.org>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c |   16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1514,6 +1514,22 @@ relock:
+               btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
+               goto buffered;
+       }
++      /*
++       * We can't control the folios being passed in, applications can write
++       * to them while a direct IO write is in progress.  This means the
++       * content might change after we calculated the data checksum.
++       * Therefore we can end up storing a checksum that doesn't match the
++       * persisted data.
++       *
++       * To be extra safe and avoid false data checksum mismatch, if the
++       * inode requires data checksum, just fallback to buffered IO.
++       * For buffered IO we have full control of page cache and can ensure
++       * no one is modifying the content during writeback.
++       */
++      if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
++              btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
++              goto buffered;
++      }
+       /*
+        * The iov_iter can be mapped to the same file range we are writing to.
diff --git a/queue-6.6/btrfs-fix-null-dereference-on-root-when-tracing-inode-eviction.patch b/queue-6.6/btrfs-fix-null-dereference-on-root-when-tracing-inode-eviction.patch
new file mode 100644 (file)
index 0000000..f626d22
--- /dev/null
@@ -0,0 +1,49 @@
+From stable+bounces-217853-greg=kroah.com@vger.kernel.org Tue Feb 24 04:37:02 2026
+From: Bin Lan <lanbincn@139.com>
+Date: Tue, 24 Feb 2026 03:32:14 +0000
+Subject: btrfs: fix NULL dereference on root when tracing inode eviction
+To: stable@vger.kernel.org, gregkh@linuxfoundation.org
+Cc: "Miquel Sabaté Solà" <mssola@mssola.com>, syzbot+d991fea1b4b23b1f6bf8@syzkaller.appspotmail.com, "David Sterba" <dsterba@suse.com>, "Bin Lan" <lanbincn@139.com>
+Message-ID: <20260224033214.4976-1-lanbincn@139.com>
+
+From: Miquel Sabaté Solà <mssola@mssola.com>
+
+[ Upstream commit f157dd661339fc6f5f2b574fe2429c43bd309534 ]
+
+When evicting an inode the first thing we do is to setup tracing for it,
+which implies fetching the root's id. But in btrfs_evict_inode() the
+root might be NULL, as implied in the next check that we do in
+btrfs_evict_inode().
+
+Hence, we either should set the ->root_objectid to 0 in case the root is
+NULL, or we move tracing setup after checking that the root is not
+NULL. Setting the rootid to 0 at least gives us the possibility to trace
+this call even in the case when the root is NULL, so that's the solution
+taken here.
+
+Fixes: 1abe9b8a138c ("Btrfs: add initial tracepoint support for btrfs")
+Reported-by: syzbot+d991fea1b4b23b1f6bf8@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=d991fea1b4b23b1f6bf8
+Signed-off-by: Miquel Sabaté Solà <mssola@mssola.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[ Adjust context ]
+Signed-off-by: Bin Lan <lanbincn@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/btrfs.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/trace/events/btrfs.h
++++ b/include/trace/events/btrfs.h
+@@ -225,8 +225,8 @@ DECLARE_EVENT_CLASS(btrfs__inode,
+               __entry->generation = BTRFS_I(inode)->generation;
+               __entry->last_trans = BTRFS_I(inode)->last_trans;
+               __entry->logged_trans = BTRFS_I(inode)->logged_trans;
+-              __entry->root_objectid =
+-                              BTRFS_I(inode)->root->root_key.objectid;
++              __entry->root_objectid = BTRFS_I(inode)->root ?
++                                       btrfs_root_id(BTRFS_I(inode)->root) : 0;
+       ),
+       TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu "
diff --git a/queue-6.6/dm-verity-disable-recursive-forward-error-correction.patch b/queue-6.6/dm-verity-disable-recursive-forward-error-correction.patch
new file mode 100644 (file)
index 0000000..cb952a7
--- /dev/null
@@ -0,0 +1,68 @@
+From stable+bounces-219751-greg=kroah.com@vger.kernel.org Thu Feb 26 06:05:28 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Thu, 26 Feb 2026 13:04:18 +0800
+Subject: dm-verity: disable recursive forward error correction
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Mikulas Patocka <mpatocka@redhat.com>, Guangwu Zhang <guazhang@redhat.com>, Sami Tolvanen <samitolvanen@google.com>, Eric Biggers <ebiggers@kernel.org>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260226050418.159241-1-black.hawk@163.com>
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit d9f3e47d3fae0c101d9094bc956ed24e7a0ee801 ]
+
+There are two problems with the recursive correction:
+
+1. It may cause denial-of-service. In fec_read_bufs, there is a loop that
+has 253 iterations. For each iteration, we may call verity_hash_for_block
+recursively. There is a limit of 4 nested recursions - that means that
+there may be at most 253^4 (4 billion) iterations. Red Hat QE team
+actually created an image that pushes dm-verity to this limit - and this
+image just makes the udev-worker process get stuck in the 'D' state.
+
+2. It doesn't work. In fec_read_bufs we store data into the variable
+"fio->bufs", but fio bufs is shared between recursive invocations, if
+"verity_hash_for_block" invoked correction recursively, it would
+overwrite partially filled fio->bufs.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Reported-by: Guangwu Zhang <guazhang@redhat.com>
+Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
+Reviewed-by: Eric Biggers <ebiggers@kernel.org>
+[ The context change is due to the commit bdf253d580d7
+("dm-verity: remove support for asynchronous hashes")
+in v6.18 and the commit 9356fcfe0ac4
+("dm verity: set DM_TARGET_SINGLETON feature flag") in v6.9
+which are irrelevant to the logic of this patch. ]
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-verity-fec.c |    4 +---
+ drivers/md/dm-verity-fec.h |    3 ---
+ 2 files changed, 1 insertion(+), 6 deletions(-)
+
+--- a/drivers/md/dm-verity-fec.c
++++ b/drivers/md/dm-verity-fec.c
+@@ -439,10 +439,8 @@ int verity_fec_decode(struct dm_verity *
+       if (!verity_fec_is_enabled(v))
+               return -EOPNOTSUPP;
+-      if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
+-              DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
++      if (fio->level)
+               return -EIO;
+-      }
+       fio->level++;
+--- a/drivers/md/dm-verity-fec.h
++++ b/drivers/md/dm-verity-fec.h
+@@ -23,9 +23,6 @@
+ #define DM_VERITY_FEC_BUF_MAX \
+       (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
+-/* maximum recursion level for verity_fec_decode */
+-#define DM_VERITY_FEC_MAX_RECURSION   4
+-
+ #define DM_VERITY_OPT_FEC_DEV         "use_fec_from_device"
+ #define DM_VERITY_OPT_FEC_BLOCKS      "fec_blocks"
+ #define DM_VERITY_OPT_FEC_START               "fec_start"
diff --git a/queue-6.6/dst-fix-races-in-rt6_uncached_list_del-and-rt_del_uncached_list.patch b/queue-6.6/dst-fix-races-in-rt6_uncached_list_del-and-rt_del_uncached_list.patch
new file mode 100644 (file)
index 0000000..85db198
--- /dev/null
@@ -0,0 +1,263 @@
+From stable+bounces-217861-greg=kroah.com@vger.kernel.org Tue Feb 24 06:50:52 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Tue, 24 Feb 2026 13:49:43 +0800
+Subject: dst: fix races in rt6_uncached_list_del() and rt_del_uncached_list()
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Eric Dumazet <edumazet@google.com>, syzbot+179fc225724092b8b2b2@syzkaller.appspotmail.com, Martin KaFai Lau <martin.lau@kernel.org>, David Ahern <dsahern@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260224054943.3324184-1-black.hawk@163.com>
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9a6f0c4d5796ab89b5a28a890ce542344d58bd69 ]
+
+syzbot was able to crash the kernel in rt6_uncached_list_flush_dev()
+in an interesting way [1]
+
+Crash happens in list_del_init()/INIT_LIST_HEAD() while writing
+list->prev, while the prior write on list->next went well.
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+       WRITE_ONCE(list->next, list); // This went well
+       WRITE_ONCE(list->prev, list); // Crash, @list has been freed.
+}
+
+Issue here is that rt6_uncached_list_del() did not attempt to lock
+ul->lock, as list_empty(&rt->dst.rt_uncached) returned
+true because the WRITE_ONCE(list->next, list) happened on the other CPU.
+
+We might use list_del_init_careful() and list_empty_careful(),
+or make sure rt6_uncached_list_del() always grabs the spinlock
+whenever rt->dst.rt_uncached_list has been set.
+
+A similar fix is neeed for IPv4.
+
+[1]
+
+ BUG: KASAN: slab-use-after-free in INIT_LIST_HEAD include/linux/list.h:46 [inline]
+ BUG: KASAN: slab-use-after-free in list_del_init include/linux/list.h:296 [inline]
+ BUG: KASAN: slab-use-after-free in rt6_uncached_list_flush_dev net/ipv6/route.c:191 [inline]
+ BUG: KASAN: slab-use-after-free in rt6_disable_ip+0x633/0x730 net/ipv6/route.c:5020
+Write of size 8 at addr ffff8880294cfa78 by task kworker/u8:14/3450
+
+CPU: 0 UID: 0 PID: 3450 Comm: kworker/u8:14 Tainted: G             L      syzkaller #0 PREEMPT_{RT,(full)}
+Tainted: [L]=SOFTLOCKUP
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/25/2025
+Workqueue: netns cleanup_net
+Call Trace:
+ <TASK>
+  dump_stack_lvl+0xe8/0x150 lib/dump_stack.c:120
+  print_address_description mm/kasan/report.c:378 [inline]
+  print_report+0xca/0x240 mm/kasan/report.c:482
+  kasan_report+0x118/0x150 mm/kasan/report.c:595
+  INIT_LIST_HEAD include/linux/list.h:46 [inline]
+  list_del_init include/linux/list.h:296 [inline]
+  rt6_uncached_list_flush_dev net/ipv6/route.c:191 [inline]
+  rt6_disable_ip+0x633/0x730 net/ipv6/route.c:5020
+  addrconf_ifdown+0x143/0x18a0 net/ipv6/addrconf.c:3853
+ addrconf_notify+0x1bc/0x1050 net/ipv6/addrconf.c:-1
+  notifier_call_chain+0x19d/0x3a0 kernel/notifier.c:85
+  call_netdevice_notifiers_extack net/core/dev.c:2268 [inline]
+  call_netdevice_notifiers net/core/dev.c:2282 [inline]
+  netif_close_many+0x29c/0x410 net/core/dev.c:1785
+  unregister_netdevice_many_notify+0xb50/0x2330 net/core/dev.c:12353
+  ops_exit_rtnl_list net/core/net_namespace.c:187 [inline]
+  ops_undo_list+0x3dc/0x990 net/core/net_namespace.c:248
+  cleanup_net+0x4de/0x7b0 net/core/net_namespace.c:696
+  process_one_work kernel/workqueue.c:3257 [inline]
+  process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+  worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+  kthread+0x711/0x8a0 kernel/kthread.c:463
+  ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+ </TASK>
+
+Allocated by task 803:
+  kasan_save_stack mm/kasan/common.c:57 [inline]
+  kasan_save_track+0x3e/0x80 mm/kasan/common.c:78
+  unpoison_slab_object mm/kasan/common.c:340 [inline]
+  __kasan_slab_alloc+0x6c/0x80 mm/kasan/common.c:366
+  kasan_slab_alloc include/linux/kasan.h:253 [inline]
+  slab_post_alloc_hook mm/slub.c:4953 [inline]
+  slab_alloc_node mm/slub.c:5263 [inline]
+  kmem_cache_alloc_noprof+0x18d/0x6c0 mm/slub.c:5270
+  dst_alloc+0x105/0x170 net/core/dst.c:89
+  ip6_dst_alloc net/ipv6/route.c:342 [inline]
+  icmp6_dst_alloc+0x75/0x460 net/ipv6/route.c:3333
+  mld_sendpack+0x683/0xe60 net/ipv6/mcast.c:1844
+  mld_send_cr net/ipv6/mcast.c:2154 [inline]
+  mld_ifc_work+0x83e/0xd60 net/ipv6/mcast.c:2693
+  process_one_work kernel/workqueue.c:3257 [inline]
+  process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+  worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+  kthread+0x711/0x8a0 kernel/kthread.c:463
+  ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+
+Freed by task 20:
+  kasan_save_stack mm/kasan/common.c:57 [inline]
+  kasan_save_track+0x3e/0x80 mm/kasan/common.c:78
+  kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:584
+  poison_slab_object mm/kasan/common.c:253 [inline]
+  __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:285
+  kasan_slab_free include/linux/kasan.h:235 [inline]
+  slab_free_hook mm/slub.c:2540 [inline]
+  slab_free mm/slub.c:6670 [inline]
+  kmem_cache_free+0x18f/0x8d0 mm/slub.c:6781
+  dst_destroy+0x235/0x350 net/core/dst.c:121
+  rcu_do_batch kernel/rcu/tree.c:2605 [inline]
+  rcu_core kernel/rcu/tree.c:2857 [inline]
+  rcu_cpu_kthread+0xba5/0x1af0 kernel/rcu/tree.c:2945
+  smpboot_thread_fn+0x542/0xa60 kernel/smpboot.c:160
+  kthread+0x711/0x8a0 kernel/kthread.c:463
+  ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+
+Last potentially related work creation:
+  kasan_save_stack+0x3e/0x60 mm/kasan/common.c:57
+  kasan_record_aux_stack+0xbd/0xd0 mm/kasan/generic.c:556
+  __call_rcu_common kernel/rcu/tree.c:3119 [inline]
+  call_rcu+0xee/0x890 kernel/rcu/tree.c:3239
+  refdst_drop include/net/dst.h:266 [inline]
+  skb_dst_drop include/net/dst.h:278 [inline]
+  skb_release_head_state+0x71/0x360 net/core/skbuff.c:1156
+  skb_release_all net/core/skbuff.c:1180 [inline]
+  __kfree_skb net/core/skbuff.c:1196 [inline]
+  sk_skb_reason_drop+0xe9/0x170 net/core/skbuff.c:1234
+  kfree_skb_reason include/linux/skbuff.h:1322 [inline]
+  tcf_kfree_skb_list include/net/sch_generic.h:1127 [inline]
+  __dev_xmit_skb net/core/dev.c:4260 [inline]
+  __dev_queue_xmit+0x26aa/0x3210 net/core/dev.c:4785
+  NF_HOOK_COND include/linux/netfilter.h:307 [inline]
+  ip6_output+0x340/0x550 net/ipv6/ip6_output.c:247
+  NF_HOOK+0x9e/0x380 include/linux/netfilter.h:318
+  mld_sendpack+0x8d4/0xe60 net/ipv6/mcast.c:1855
+  mld_send_cr net/ipv6/mcast.c:2154 [inline]
+  mld_ifc_work+0x83e/0xd60 net/ipv6/mcast.c:2693
+  process_one_work kernel/workqueue.c:3257 [inline]
+  process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+  worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+  kthread+0x711/0x8a0 kernel/kthread.c:463
+  ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:246
+
+The buggy address belongs to the object at ffff8880294cfa00
+ which belongs to the cache ip6_dst_cache of size 232
+The buggy address is located 120 bytes inside of
+ freed 232-byte region [ffff8880294cfa00, ffff8880294cfae8)
+
+The buggy address belongs to the physical page:
+page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x294cf
+memcg:ffff88803536b781
+flags: 0x80000000000000(node=0|zone=1)
+page_type: f5(slab)
+raw: 0080000000000000 ffff88802ff1c8c0 ffffea0000bf2bc0 dead000000000006
+raw: 0000000000000000 00000000800c000c 00000000f5000000 ffff88803536b781
+page dumped because: kasan: bad access detected
+page_owner tracks the page as allocated
+page last allocated via order 0, migratetype Unmovable, gfp_mask 0x52820(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP), pid 9, tgid 9 (kworker/0:0), ts 91119585830, free_ts 91088628818
+  set_page_owner include/linux/page_owner.h:32 [inline]
+  post_alloc_hook+0x234/0x290 mm/page_alloc.c:1857
+  prep_new_page mm/page_alloc.c:1865 [inline]
+  get_page_from_freelist+0x28c0/0x2960 mm/page_alloc.c:3915
+  __alloc_frozen_pages_noprof+0x181/0x370 mm/page_alloc.c:5210
+  alloc_pages_mpol+0xd1/0x380 mm/mempolicy.c:2486
+  alloc_slab_page mm/slub.c:3075 [inline]
+  allocate_slab+0x86/0x3b0 mm/slub.c:3248
+  new_slab mm/slub.c:3302 [inline]
+  ___slab_alloc+0xb10/0x13e0 mm/slub.c:4656
+  __slab_alloc+0xc6/0x1f0 mm/slub.c:4779
+  __slab_alloc_node mm/slub.c:4855 [inline]
+  slab_alloc_node mm/slub.c:5251 [inline]
+  kmem_cache_alloc_noprof+0x101/0x6c0 mm/slub.c:5270
+  dst_alloc+0x105/0x170 net/core/dst.c:89
+  ip6_dst_alloc net/ipv6/route.c:342 [inline]
+  icmp6_dst_alloc+0x75/0x460 net/ipv6/route.c:3333
+  mld_sendpack+0x683/0xe60 net/ipv6/mcast.c:1844
+  mld_send_cr net/ipv6/mcast.c:2154 [inline]
+  mld_ifc_work+0x83e/0xd60 net/ipv6/mcast.c:2693
+  process_one_work kernel/workqueue.c:3257 [inline]
+  process_scheduled_works+0xad1/0x1770 kernel/workqueue.c:3340
+  worker_thread+0x8a0/0xda0 kernel/workqueue.c:3421
+  kthread+0x711/0x8a0 kernel/kthread.c:463
+  ret_from_fork+0x510/0xa50 arch/x86/kernel/process.c:158
+page last free pid 5859 tgid 5859 stack trace:
+  reset_page_owner include/linux/page_owner.h:25 [inline]
+  free_pages_prepare mm/page_alloc.c:1406 [inline]
+  __free_frozen_pages+0xfe1/0x1170 mm/page_alloc.c:2943
+  discard_slab mm/slub.c:3346 [inline]
+  __put_partials+0x149/0x170 mm/slub.c:3886
+  __slab_free+0x2af/0x330 mm/slub.c:5952
+  qlink_free mm/kasan/quarantine.c:163 [inline]
+  qlist_free_all+0x97/0x100 mm/kasan/quarantine.c:179
+  kasan_quarantine_reduce+0x148/0x160 mm/kasan/quarantine.c:286
+  __kasan_slab_alloc+0x22/0x80 mm/kasan/common.c:350
+  kasan_slab_alloc include/linux/kasan.h:253 [inline]
+  slab_post_alloc_hook mm/slub.c:4953 [inline]
+  slab_alloc_node mm/slub.c:5263 [inline]
+  kmem_cache_alloc_noprof+0x18d/0x6c0 mm/slub.c:5270
+  getname_flags+0xb8/0x540 fs/namei.c:146
+  getname include/linux/fs.h:2498 [inline]
+  do_sys_openat2+0xbc/0x200 fs/open.c:1426
+  do_sys_open fs/open.c:1436 [inline]
+  __do_sys_openat fs/open.c:1452 [inline]
+  __se_sys_openat fs/open.c:1447 [inline]
+  __x64_sys_openat+0x138/0x170 fs/open.c:1447
+  do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+  do_syscall_64+0xec/0xf80 arch/x86/entry/syscall_64.c:94
+
+Fixes: 8d0b94afdca8 ("ipv6: Keep track of DST_NOCACHE routes in case of iface down/unregister")
+Fixes: 78df76a065ae ("ipv4: take rt_uncached_lock only if needed")
+Reported-by: syzbot+179fc225724092b8b2b2@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/6964cdf2.050a0220.eaf7.009d.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Martin KaFai Lau <martin.lau@kernel.org>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20260112103825.3810713-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dst.c   |    1 +
+ net/ipv4/route.c |    4 ++--
+ net/ipv6/route.c |    4 ++--
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/net/core/dst.c
++++ b/net/core/dst.c
+@@ -68,6 +68,7 @@ void dst_init(struct dst_entry *dst, str
+       dst->lwtstate = NULL;
+       rcuref_init(&dst->__rcuref, initial_ref);
+       INIT_LIST_HEAD(&dst->rt_uncached);
++      dst->rt_uncached_list = NULL;
+       dst->__use = 0;
+       dst->lastuse = jiffies;
+       dst->flags = flags;
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1546,9 +1546,9 @@ void rt_add_uncached_list(struct rtable
+ void rt_del_uncached_list(struct rtable *rt)
+ {
+-      if (!list_empty(&rt->dst.rt_uncached)) {
+-              struct uncached_list *ul = rt->dst.rt_uncached_list;
++      struct uncached_list *ul = rt->dst.rt_uncached_list;
++      if (ul) {
+               spin_lock_bh(&ul->lock);
+               list_del_init(&rt->dst.rt_uncached);
+               spin_unlock_bh(&ul->lock);
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -149,9 +149,9 @@ void rt6_uncached_list_add(struct rt6_in
+ void rt6_uncached_list_del(struct rt6_info *rt)
+ {
+-      if (!list_empty(&rt->dst.rt_uncached)) {
+-              struct uncached_list *ul = rt->dst.rt_uncached_list;
++      struct uncached_list *ul = rt->dst.rt_uncached_list;
++      if (ul) {
+               spin_lock_bh(&ul->lock);
+               list_del_init(&rt->dst.rt_uncached);
+               spin_unlock_bh(&ul->lock);
diff --git a/queue-6.6/eth-bnxt-always-recalculate-features-after-xdp-clearing-fix-null-deref.patch b/queue-6.6/eth-bnxt-always-recalculate-features-after-xdp-clearing-fix-null-deref.patch
new file mode 100644 (file)
index 0000000..38b2963
--- /dev/null
@@ -0,0 +1,162 @@
+From stable+bounces-219193-greg=kroah.com@vger.kernel.org Wed Feb 25 07:01:18 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Wed, 25 Feb 2026 14:00:20 +0800
+Subject: eth: bnxt: always recalculate features after XDP clearing, fix null-deref
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Jakub Kicinski <kuba@kernel.org>, Michael Chan <michael.chan@broadcom.com>, Somnath Kotur <somnath.kotur@broadcom.com>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260225060020.3361855-1-black.hawk@163.com>
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit f0aa6a37a3dbb40b272df5fc6db93c114688adcd ]
+
+Recalculate features when XDP is detached.
+
+Before:
+  # ip li set dev eth0 xdp obj xdp_dummy.bpf.o sec xdp
+  # ip li set dev eth0 xdp off
+  # ethtool -k eth0 | grep gro
+  rx-gro-hw: off [requested on]
+
+After:
+  # ip li set dev eth0 xdp obj xdp_dummy.bpf.o sec xdp
+  # ip li set dev eth0 xdp off
+  # ethtool -k eth0 | grep gro
+  rx-gro-hw: on
+
+The fact that HW-GRO doesn't get re-enabled automatically is just
+a minor annoyance. The real issue is that the features will randomly
+come back during another reconfiguration which just happens to invoke
+netdev_update_features(). The driver doesn't handle reconfiguring
+two things at a time very robustly.
+
+Starting with commit 98ba1d931f61 ("bnxt_en: Fix RSS logic in
+__bnxt_reserve_rings()") we only reconfigure the RSS hash table
+if the "effective" number of Rx rings has changed. If HW-GRO is
+enabled "effective" number of rings is 2x what user sees.
+So if we are in the bad state, with HW-GRO re-enablement "pending"
+after XDP off, and we lower the rings by / 2 - the HW-GRO rings
+doing 2x and the ethtool -L doing / 2 may cancel each other out,
+and the:
+
+  if (old_rx_rings != bp->hw_resc.resv_rx_rings &&
+
+condition in __bnxt_reserve_rings() will be false.
+The RSS map won't get updated, and we'll crash with:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000168
+  RIP: 0010:__bnxt_hwrm_vnic_set_rss+0x13a/0x1a0
+    bnxt_hwrm_vnic_rss_cfg_p5+0x47/0x180
+    __bnxt_setup_vnic_p5+0x58/0x110
+    bnxt_init_nic+0xb72/0xf50
+    __bnxt_open_nic+0x40d/0xab0
+    bnxt_open_nic+0x2b/0x60
+    ethtool_set_channels+0x18c/0x1d0
+
+As we try to access a freed ring.
+
+The issue is present since XDP support was added, really, but
+prior to commit 98ba1d931f61 ("bnxt_en: Fix RSS logic in
+__bnxt_reserve_rings()") it wasn't causing major issues.
+
+Fixes: 1054aee82321 ("bnxt_en: Use NETIF_F_GRO_HW.")
+Fixes: 98ba1d931f61 ("bnxt_en: Fix RSS logic in __bnxt_reserve_rings()")
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Link: https://patch.msgid.link/20250109043057.2888953-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ The context change is due to the commit 1f6e77cb9b32
+("bnxt_en: Add bnxt_l2_filter hash table.") in v6.8 and the commit
+8336a974f37d ("bnxt_en: Save user configured filters in a lookup list")
+in v6.9 which are irrelevant to the logic of this patch. ]
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c     |   25 ++++++++++++++++++++-----
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h     |    2 +-
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c |    7 -------
+ 3 files changed, 21 insertions(+), 13 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -3996,7 +3996,7 @@ void bnxt_set_ring_params(struct bnxt *b
+ /* Changing allocation mode of RX rings.
+  * TODO: Update when extending xdp_rxq_info to support allocation modes.
+  */
+-int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
++static void __bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
+ {
+       struct net_device *dev = bp->dev;
+@@ -4017,15 +4017,30 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp
+                       bp->rx_skb_func = bnxt_rx_page_skb;
+               }
+               bp->rx_dir = DMA_BIDIRECTIONAL;
+-              /* Disable LRO or GRO_HW */
+-              netdev_update_features(dev);
+       } else {
+               dev->max_mtu = bp->max_mtu;
+               bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
+               bp->rx_dir = DMA_FROM_DEVICE;
+               bp->rx_skb_func = bnxt_rx_skb;
+       }
+-      return 0;
++}
++
++void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
++{
++      __bnxt_set_rx_skb_mode(bp, page_mode);
++
++      if (!page_mode) {
++              int rx, tx;
++
++              bnxt_get_max_rings(bp, &rx, &tx, true);
++              if (rx > 1) {
++                      bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
++                      bp->dev->hw_features |= NETIF_F_LRO;
++              }
++      }
++
++      /* Update LRO and GRO_HW availability */
++      netdev_update_features(bp->dev);
+ }
+ static void bnxt_free_vnic_attributes(struct bnxt *bp)
+@@ -13773,7 +13788,7 @@ static int bnxt_init_one(struct pci_dev
+       if (rc)
+               goto init_err_pci_clean;
+-      bnxt_set_rx_skb_mode(bp, false);
++      __bnxt_set_rx_skb_mode(bp, false);
+       bnxt_set_tpa_flags(bp);
+       bnxt_set_ring_params(bp);
+       rc = bnxt_set_dflt_rings(bp, true);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -2332,7 +2332,7 @@ void bnxt_reuse_rx_data(struct bnxt_rx_r
+ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
+ void bnxt_set_tpa_flags(struct bnxt *bp);
+ void bnxt_set_ring_params(struct bnxt *);
+-int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
++void bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
+ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
+                           int bmap_size, bool async_only);
+ int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp);
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -422,15 +422,8 @@ static int bnxt_xdp_set(struct bnxt *bp,
+               bnxt_set_rx_skb_mode(bp, true);
+               xdp_features_set_redirect_target(dev, true);
+       } else {
+-              int rx, tx;
+-
+               xdp_features_clear_redirect_target(dev);
+               bnxt_set_rx_skb_mode(bp, false);
+-              bnxt_get_max_rings(bp, &rx, &tx, true);
+-              if (rx > 1) {
+-                      bp->flags &= ~BNXT_FLAG_NO_AGG_RINGS;
+-                      bp->dev->hw_features |= NETIF_F_LRO;
+-              }
+       }
+       bp->tx_nr_rings_xdp = tx_xdp;
+       bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
diff --git a/queue-6.6/ext4-always-allocate-blocks-only-from-groups-inode-can-use.patch b/queue-6.6/ext4-always-allocate-blocks-only-from-groups-inode-can-use.patch
new file mode 100644 (file)
index 0000000..799d13f
--- /dev/null
@@ -0,0 +1,71 @@
+From stable+bounces-219686-greg=kroah.com@vger.kernel.org Wed Feb 25 19:15:41 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Feb 2026 13:15:35 -0500
+Subject: ext4: always allocate blocks only from groups inode can use
+To: stable@vger.kernel.org
+Cc: Jan Kara <jack@suse.cz>, Baokun Li <libaokun1@huawei.com>, Zhang Yi <yi.zhang@huawei.com>, Pedro Falcato <pfalcato@suse.de>, stable@kernel.org, Theodore Ts'o <tytso@mit.edu>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260225181535.912817-1-sashal@kernel.org>
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 4865c768b563deff1b6a6384e74a62f143427b42 ]
+
+For filesystems with more than 2^32 blocks inodes using indirect block
+based format cannot use blocks beyond the 32-bit limit.
+ext4_mb_scan_groups_linear() takes care to not select these unsupported
+groups for such inodes however other functions selecting groups for
+allocation don't. So far this is harmless because the other selection
+functions are used only with mb_optimize_scan and this is currently
+disabled for inodes with indirect blocks however in the following patch
+we want to enable mb_optimize_scan regardless of inode format.
+
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Acked-by: Pedro Falcato <pfalcato@suse.de>
+Cc: stable@kernel.org
+Link: https://patch.msgid.link/20260114182836.14120-3-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+[ Drop a few hunks not needed in older trees ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c |   20 ++++++++++++++++----
+ 1 file changed, 16 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -883,6 +883,21 @@ mb_update_avg_fragment_size(struct super
+       }
+ }
++static ext4_group_t ext4_get_allocation_groups_count(
++                              struct ext4_allocation_context *ac)
++{
++      ext4_group_t ngroups = ext4_get_groups_count(ac->ac_sb);
++
++      /* non-extent files are limited to low blocks/groups */
++      if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
++              ngroups = EXT4_SB(ac->ac_sb)->s_blockfile_groups;
++
++      /* Pairs with smp_wmb() in ext4_update_super() */
++      smp_rmb();
++
++      return ngroups;
++}
++
+ /*
+  * Choose next group by traversing largest_free_order lists. Updates *new_cr if
+  * cr level needs an update.
+@@ -2817,10 +2832,7 @@ ext4_mb_regular_allocator(struct ext4_al
+       sb = ac->ac_sb;
+       sbi = EXT4_SB(sb);
+-      ngroups = ext4_get_groups_count(sb);
+-      /* non-extent files are limited to low blocks/groups */
+-      if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
+-              ngroups = sbi->s_blockfile_groups;
++      ngroups = ext4_get_allocation_groups_count(ac);
+       BUG_ON(ac->ac_status == AC_STATUS_FOUND);
diff --git a/queue-6.6/ext4-fix-dirtyclusters-double-decrement-on-fs-shutdown.patch b/queue-6.6/ext4-fix-dirtyclusters-double-decrement-on-fs-shutdown.patch
new file mode 100644 (file)
index 0000000..fc2f05c
--- /dev/null
@@ -0,0 +1,109 @@
+From stable+bounces-219625-greg=kroah.com@vger.kernel.org Wed Feb 25 14:52:44 2026
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Feb 2026 08:47:44 -0500
+Subject: ext4: fix dirtyclusters double decrement on fs shutdown
+To: stable@vger.kernel.org
+Cc: Brian Foster <bfoster@redhat.com>, Baokun Li <libaokun1@huawei.com>, Theodore Ts'o <tytso@mit.edu>, stable@kernel.org, Sasha Levin <sashal@kernel.org>
+Message-ID: <20260225134744.311174-1-sashal@kernel.org>
+
+From: Brian Foster <bfoster@redhat.com>
+
+[ Upstream commit 94a8cea54cd935c54fa2fba70354757c0fc245e3 ]
+
+fstests test generic/388 occasionally reproduces a warning in
+ext4_put_super() associated with the dirty clusters count:
+
+  WARNING: CPU: 7 PID: 76064 at fs/ext4/super.c:1324 ext4_put_super+0x48c/0x590 [ext4]
+
+Tracing the failure shows that the warning fires due to an
+s_dirtyclusters_counter value of -1. IOW, this appears to be a
+spurious decrement as opposed to some sort of leak. Further tracing
+of the dirty cluster count deltas and an LLM scan of the resulting
+output identified the cause as a double decrement in the error path
+between ext4_mb_mark_diskspace_used() and the caller
+ext4_mb_new_blocks().
+
+First, note that generic/388 is a shutdown vs. fsstress test and so
+produces a random set of operations and shutdown injections. In the
+problematic case, the shutdown triggers an error return from the
+ext4_handle_dirty_metadata() call(s) made from
+ext4_mb_mark_context(). The changed value is non-zero at this point,
+so ext4_mb_mark_diskspace_used() does not exit after the error
+bubbles up from ext4_mb_mark_context(). Instead, the former
+decrements both cluster counters and returns the error up to
+ext4_mb_new_blocks(). The latter falls into the !ar->len out path
+which decrements the dirty clusters counter a second time, creating
+the inconsistency.
+
+To avoid this problem and simplify ownership of the cluster
+reservation in this codepath, lift the counter reduction to a single
+place in the caller. This makes it more clear that
+ext4_mb_new_blocks() is responsible for acquiring cluster
+reservation (via ext4_claim_free_clusters()) in the !delalloc case
+as well as releasing it, regardless of whether it ends up consumed
+or returned due to failure.
+
+Fixes: 0087d9fb3f29 ("ext4: Fix s_dirty_blocks_counter if block allocation failed with nodelalloc")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Link: https://patch.msgid.link/20260113171905.118284-1-bfoster@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+[ Drop mballoc-test changes ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c |   21 +++++----------------
+ 1 file changed, 5 insertions(+), 16 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3999,8 +3999,7 @@ void ext4_exit_mballoc(void)
+  * Returns 0 if success or error code
+  */
+ static noinline_for_stack int
+-ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+-                              handle_t *handle, unsigned int reserv_clstrs)
++ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, handle_t *handle)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+       struct ext4_group_desc *gdp;
+@@ -4086,13 +4085,6 @@ ext4_mb_mark_diskspace_used(struct ext4_
+       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+       percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
+-      /*
+-       * Now reduce the dirty block count also. Should not go negative
+-       */
+-      if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+-              /* release all the reserved blocks if non delalloc */
+-              percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+-                                 reserv_clstrs);
+       if (sbi->s_log_groups_per_flex) {
+               ext4_group_t flex_group = ext4_flex_group(sbi,
+@@ -6265,7 +6257,7 @@ repeat:
+                       ext4_mb_pa_put_free(ac);
+       }
+       if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+-              *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
++              *errp = ext4_mb_mark_diskspace_used(ac, handle);
+               if (*errp) {
+                       ext4_discard_allocated_blocks(ac);
+                       goto errout;
+@@ -6296,12 +6288,9 @@ errout:
+ out:
+       if (inquota && ar->len < inquota)
+               dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
+-      if (!ar->len) {
+-              if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
+-                      /* release all the reserved blocks if non delalloc */
+-                      percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+-                                              reserv_clstrs);
+-      }
++      /* release any reserved blocks */
++      if (reserv_clstrs)
++              percpu_counter_sub(&sbi->s_dirtyclusters_counter, reserv_clstrs);
+       trace_ext4_allocate_blocks(ar, (unsigned long long)block);
diff --git a/queue-6.6/net-stmmac-dwmac-loongson-set-clk_csr_i-to-100-150mhz.patch b/queue-6.6/net-stmmac-dwmac-loongson-set-clk_csr_i-to-100-150mhz.patch
new file mode 100644 (file)
index 0000000..802c744
--- /dev/null
@@ -0,0 +1,38 @@
+From stable+bounces-217275-greg=kroah.com@vger.kernel.org Wed Feb 18 13:13:16 2026
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Wed, 18 Feb 2026 20:12:42 +0800
+Subject: net: stmmac: dwmac-loongson: Set clk_csr_i to 100-150MHz
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Sasha Levin <sashal@kernel.org>, Huacai Chen <chenhuacai@kernel.org>
+Cc: Xuerui Wang <kernel@xen0n.name>, stable@vger.kernel.org, Andrew Lunn <andrew+netdev@lunn.ch>, "David S . Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Yanteng Si <si.yanteng@linux.dev>, linux-kernel@vger.kernel.org, loongarch@lists.linux.dev, netdev@vger.kernel.org, Huacai Chen <chenhuacai@loongson.cn>, Hongliang Wang <wanghongliang@loongson.cn>
+Message-ID: <20260218121242.2545128-1-chenhuacai@loongson.cn>
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit e1aa5ef892fb4fa9014a25e87b64b97347919d37 upstream.
+
+Current clk_csr_i setting of Loongson STMMAC (including LS7A1000/2000
+and LS2K1000/2000/3000) are copy & paste from other drivers. In fact,
+Loongson STMMAC use 125MHz clocks and need 62 freq division to within
+2.5MHz, meeting most PHY MDC requirement. So fix by setting clk_csr_i
+to 100-150MHz, otherwise some PHYs may link fail.
+
+Cc: stable@vger.kernel.org
+Fixes: 30bba69d7db40e7 ("stmmac: pci: Add dwmac support for Loongson")
+Signed-off-by: Hongliang Wang <wanghongliang@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c
+@@ -14,7 +14,7 @@
+ static int loongson_default_data(struct plat_stmmacenet_data *plat)
+ {
+-      plat->clk_csr = 2;      /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
++      plat->clk_csr = 1;      /* clk_csr_i = 100-150MHz & MDC = clk_csr_i/62 */
+       plat->has_gmac = 1;
+       plat->force_sf_dma_mode = 1;
diff --git a/queue-6.6/nfs-fix-a-deadlock-involving-nfs_release_folio.patch b/queue-6.6/nfs-fix-a-deadlock-involving-nfs_release_folio.patch
new file mode 100644 (file)
index 0000000..4167b87
--- /dev/null
@@ -0,0 +1,110 @@
+From stable+bounces-217867-greg=kroah.com@vger.kernel.org Tue Feb 24 08:02:54 2026
+From: Li hongliang <1468888505@139.com>
+Date: Tue, 24 Feb 2026 15:02:37 +0800
+Subject: NFS: Fix a deadlock involving nfs_release_folio()
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org, trond.myklebust@hammerspace.com
+Cc: patches@lists.linux.dev, linux-kernel@vger.kernel.org, anna@kernel.org, linux-nfs@vger.kernel.org, wangzhaolong@huaweicloud.com
+Message-ID: <20260224070237.2933965-1-1468888505@139.com>
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit cce0be6eb4971456b703aaeafd571650d314bcca ]
+
+Wang Zhaolong reports a deadlock involving NFSv4.1 state recovery
+waiting on kthreadd, which is attempting to reclaim memory by calling
+nfs_release_folio(). The latter cannot make progress due to state
+recovery being needed.
+
+It seems that the only safe thing to do here is to kick off a writeback
+of the folio, without waiting for completion, or else kicking off an
+asynchronous commit.
+
+Reported-by: Wang Zhaolong <wangzhaolong@huaweicloud.com>
+Fixes: 96780ca55e3c ("NFS: fix up nfs_release_folio() to try to release the page")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+[ Minor conflict resolved. ]
+Signed-off-by: Li hongliang <1468888505@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/file.c          |    3 ++-
+ fs/nfs/nfstrace.h      |    3 +++
+ fs/nfs/write.c         |   33 +++++++++++++++++++++++++++++++++
+ include/linux/nfs_fs.h |    1 +
+ 4 files changed, 39 insertions(+), 1 deletion(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -459,7 +459,8 @@ static bool nfs_release_folio(struct fol
+               if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL ||
+                   current_is_kswapd() || current_is_kcompactd())
+                       return false;
+-              if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0)
++              if (nfs_wb_folio_reclaim(folio_file_mapping(folio)->host, folio) < 0 ||
++                  folio_test_private(folio))
+                       return false;
+       }
+       return nfs_fscache_release_folio(folio, gfp);
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -1033,6 +1033,9 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+ DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
+ DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
++DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio_reclaim);
++DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_reclaim_done);
++
+ DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio);
+ DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done);
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -2122,6 +2122,39 @@ int nfs_wb_folio_cancel(struct inode *in
+ }
+ /**
++ * nfs_wb_folio_reclaim - Write back all requests on one page
++ * @inode: pointer to page
++ * @folio: pointer to folio
++ *
++ * Assumes that the folio has been locked by the caller
++ */
++int nfs_wb_folio_reclaim(struct inode *inode, struct folio *folio)
++{
++      loff_t range_start = folio_pos(folio);
++      size_t len = folio_size(folio);
++      struct writeback_control wbc = {
++              .sync_mode = WB_SYNC_ALL,
++              .nr_to_write = 0,
++              .range_start = range_start,
++              .range_end = range_start + len - 1,
++              .for_sync = 1,
++      };
++      int ret;
++
++      if (folio_test_writeback(folio))
++              return -EBUSY;
++      if (folio_clear_dirty_for_io(folio)) {
++              trace_nfs_writeback_folio_reclaim(inode, range_start, len);
++              ret = nfs_writepage_locked(folio, &wbc);
++              trace_nfs_writeback_folio_reclaim_done(inode, range_start, len,
++                                                     ret);
++              return ret;
++      }
++      nfs_commit_inode(inode, 0);
++      return 0;
++}
++
++/**
+  * nfs_wb_folio - Write back all requests on one page
+  * @inode: pointer to page
+  * @folio: pointer to folio
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -608,6 +608,7 @@ extern int  nfs_update_folio(struct file
+ extern int nfs_sync_inode(struct inode *inode);
+ extern int nfs_wb_all(struct inode *inode);
+ extern int nfs_wb_folio(struct inode *inode, struct folio *folio);
++extern int nfs_wb_folio_reclaim(struct inode *inode, struct folio *folio);
+ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio);
+ extern int  nfs_commit_inode(struct inode *, int);
+ extern struct nfs_commit_data *nfs_commitdata_alloc(void);
diff --git a/queue-6.6/nfs-pass-explicit-offset-count-to-trace-events.patch b/queue-6.6/nfs-pass-explicit-offset-count-to-trace-events.patch
new file mode 100644 (file)
index 0000000..d5b4fd3
--- /dev/null
@@ -0,0 +1,226 @@
+From stable+bounces-217866-greg=kroah.com@vger.kernel.org Tue Feb 24 08:01:20 2026
+From: Li hongliang <1468888505@139.com>
+Date: Tue, 24 Feb 2026 15:00:58 +0800
+Subject: nfs: pass explicit offset/count to trace events
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org, hch@lst.de
+Cc: patches@lists.linux.dev, linux-kernel@vger.kernel.org, trond.myklebust@hammerspace.com, anna@kernel.org, linux-nfs@vger.kernel.org, chuck.lever@oracle.com, Anna.Schumaker@Netapp.com
+Message-ID: <20260224070058.2933695-1-1468888505@139.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit fada32ed6dbc748f447c8d050a961b75d946055a ]
+
+nfs_folio_length is unsafe to use without having the folio locked and a
+check for a NULL ->f_mapping that protects against truncations and can
+lead to kernel crashes.  E.g. when running xfstests generic/065 with
+all nfs trace points enabled.
+
+Follow the model of the XFS trace points and pass in an explÑ–cit offset
+and length.  This has the additional benefit that these values can
+be more accurate as some of the users touch partial folio ranges.
+
+Fixes: eb5654b3b89d ("NFS: Enable tracing of nfs_invalidate_folio() and nfs_launder_folio()")
+Reported-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+[ Minor conflict resolved. ]
+Signed-off-by: Li hongliang <1468888505@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/file.c     |    5 +++--
+ fs/nfs/nfstrace.h |   36 ++++++++++++++++++++----------------
+ fs/nfs/read.c     |    8 +++++---
+ fs/nfs/write.c    |   10 +++++-----
+ 4 files changed, 33 insertions(+), 26 deletions(-)
+
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -441,7 +441,7 @@ static void nfs_invalidate_folio(struct
+       /* Cancel any unstarted writes on this page */
+       nfs_wb_folio_cancel(inode, folio);
+       folio_wait_fscache(folio);
+-      trace_nfs_invalidate_folio(inode, folio);
++      trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
+ }
+ /*
+@@ -509,7 +509,8 @@ static int nfs_launder_folio(struct foli
+       folio_wait_fscache(folio);
+       ret = nfs_wb_folio(inode, folio);
+-      trace_nfs_launder_folio_done(inode, folio, ret);
++      trace_nfs_launder_folio_done(inode, folio_pos(folio),
++                      folio_size(folio), ret);
+       return ret;
+ }
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -933,10 +933,11 @@ TRACE_EVENT(nfs_sillyrename_unlink,
+ DECLARE_EVENT_CLASS(nfs_folio_event,
+               TP_PROTO(
+                       const struct inode *inode,
+-                      struct folio *folio
++                      loff_t offset,
++                      size_t count
+               ),
+-              TP_ARGS(inode, folio),
++              TP_ARGS(inode, offset, count),
+               TP_STRUCT__entry(
+                       __field(dev_t, dev)
+@@ -944,7 +945,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
+                       __field(u64, fileid)
+                       __field(u64, version)
+                       __field(loff_t, offset)
+-                      __field(u32, count)
++                      __field(size_t, count)
+               ),
+               TP_fast_assign(
+@@ -954,13 +955,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
+                       __entry->fileid = nfsi->fileid;
+                       __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+                       __entry->version = inode_peek_iversion_raw(inode);
+-                      __entry->offset = folio_file_pos(folio);
+-                      __entry->count = nfs_folio_length(folio);
++                      __entry->offset = offset,
++                      __entry->count = count;
+               ),
+               TP_printk(
+                       "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+-                      "offset=%lld count=%u",
++                      "offset=%lld count=%zu",
+                       MAJOR(__entry->dev), MINOR(__entry->dev),
+                       (unsigned long long)__entry->fileid,
+                       __entry->fhandle, __entry->version,
+@@ -972,18 +973,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
+       DEFINE_EVENT(nfs_folio_event, name, \
+                       TP_PROTO( \
+                               const struct inode *inode, \
+-                              struct folio *folio \
++                              loff_t offset, \
++                              size_t count \
+                       ), \
+-                      TP_ARGS(inode, folio))
++                      TP_ARGS(inode, offset, count))
+ DECLARE_EVENT_CLASS(nfs_folio_event_done,
+               TP_PROTO(
+                       const struct inode *inode,
+-                      struct folio *folio,
++                      loff_t offset,
++                      size_t count,
+                       int ret
+               ),
+-              TP_ARGS(inode, folio, ret),
++              TP_ARGS(inode, offset, count, ret),
+               TP_STRUCT__entry(
+                       __field(dev_t, dev)
+@@ -992,7 +995,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+                       __field(u64, fileid)
+                       __field(u64, version)
+                       __field(loff_t, offset)
+-                      __field(u32, count)
++                      __field(size_t, count)
+               ),
+               TP_fast_assign(
+@@ -1002,14 +1005,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+                       __entry->fileid = nfsi->fileid;
+                       __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+                       __entry->version = inode_peek_iversion_raw(inode);
+-                      __entry->offset = folio_file_pos(folio);
+-                      __entry->count = nfs_folio_length(folio);
++                      __entry->offset = offset,
++                      __entry->count = count,
+                       __entry->ret = ret;
+               ),
+               TP_printk(
+                       "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+-                      "offset=%lld count=%u ret=%d",
++                      "offset=%lld count=%zu ret=%d",
+                       MAJOR(__entry->dev), MINOR(__entry->dev),
+                       (unsigned long long)__entry->fileid,
+                       __entry->fhandle, __entry->version,
+@@ -1021,10 +1024,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done
+       DEFINE_EVENT(nfs_folio_event_done, name, \
+                       TP_PROTO( \
+                               const struct inode *inode, \
+-                              struct folio *folio, \
++                              loff_t offset, \
++                              size_t count, \
+                               int ret \
+                       ), \
+-                      TP_ARGS(inode, folio, ret))
++                      TP_ARGS(inode, offset, count, ret))
+ DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
+ DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
+--- a/fs/nfs/read.c
++++ b/fs/nfs/read.c
+@@ -333,13 +333,15 @@ out:
+ int nfs_read_folio(struct file *file, struct folio *folio)
+ {
+       struct inode *inode = file_inode(file);
++      loff_t pos = folio_pos(folio);
++      size_t len = folio_size(folio);
+       struct nfs_pageio_descriptor pgio;
+       struct nfs_open_context *ctx;
+       int ret;
+-      trace_nfs_aop_readpage(inode, folio);
++      trace_nfs_aop_readpage(inode, pos, len);
+       nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+-      task_io_account_read(folio_size(folio));
++      task_io_account_read(len);
+       /*
+        * Try to flush any pending writes to the file..
+@@ -382,7 +384,7 @@ int nfs_read_folio(struct file *file, st
+ out_put:
+       put_nfs_open_context(ctx);
+ out:
+-      trace_nfs_aop_readpage_done(inode, folio, ret);
++      trace_nfs_aop_readpage_done(inode, pos, len, ret);
+       return ret;
+ out_unlock:
+       folio_unlock(folio);
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -2131,17 +2131,17 @@ int nfs_wb_folio_cancel(struct inode *in
+  */
+ int nfs_wb_folio(struct inode *inode, struct folio *folio)
+ {
+-      loff_t range_start = folio_file_pos(folio);
+-      loff_t range_end = range_start + (loff_t)folio_size(folio) - 1;
++      loff_t range_start = folio_pos(folio);
++      size_t len = folio_size(folio);
+       struct writeback_control wbc = {
+               .sync_mode = WB_SYNC_ALL,
+               .nr_to_write = 0,
+               .range_start = range_start,
+-              .range_end = range_end,
++              .range_end = range_start + len - 1,
+       };
+       int ret;
+-      trace_nfs_writeback_folio(inode, folio);
++      trace_nfs_writeback_folio(inode, range_start, len);
+       for (;;) {
+               folio_wait_writeback(folio);
+@@ -2159,7 +2159,7 @@ int nfs_wb_folio(struct inode *inode, st
+                       goto out_error;
+       }
+ out_error:
+-      trace_nfs_writeback_folio_done(inode, folio, ret);
++      trace_nfs_writeback_folio_done(inode, range_start, len, ret);
+       return ret;
+ }
diff --git a/queue-6.6/pnfs-fix-a-deadlock-when-returning-a-delegation-during-open.patch b/queue-6.6/pnfs-fix-a-deadlock-when-returning-a-delegation-during-open.patch
new file mode 100644 (file)
index 0000000..01e6fbe
--- /dev/null
@@ -0,0 +1,214 @@
+From stable+bounces-217868-greg=kroah.com@vger.kernel.org Tue Feb 24 08:22:45 2026
+From: Li hongliang <1468888505@139.com>
+Date: Tue, 24 Feb 2026 15:22:02 +0800
+Subject: pNFS: Fix a deadlock when returning a delegation during open()
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org, trond.myklebust@hammerspace.com
+Cc: patches@lists.linux.dev, linux-kernel@vger.kernel.org, bcodding@hammerspace.com, anna@kernel.org, linux-nfs@vger.kernel.org, wangzhaolong@huaweicloud.com
+Message-ID: <20260224072202.2940831-1-1468888505@139.com>
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 857bf9056291a16785ae3be1d291026b2437fc48 ]
+
+Ben Coddington reports seeing a hang in the following stack trace:
+  0 [ffffd0b50e1774e0] __schedule at ffffffff9ca05415
+  1 [ffffd0b50e177548] schedule at ffffffff9ca05717
+  2 [ffffd0b50e177558] bit_wait at ffffffff9ca061e1
+  3 [ffffd0b50e177568] __wait_on_bit at ffffffff9ca05cfb
+  4 [ffffd0b50e1775c8] out_of_line_wait_on_bit at ffffffff9ca05ea5
+  5 [ffffd0b50e177618] pnfs_roc at ffffffffc154207b [nfsv4]
+  6 [ffffd0b50e1776b8] _nfs4_proc_delegreturn at ffffffffc1506586 [nfsv4]
+  7 [ffffd0b50e177788] nfs4_proc_delegreturn at ffffffffc1507480 [nfsv4]
+  8 [ffffd0b50e1777f8] nfs_do_return_delegation at ffffffffc1523e41 [nfsv4]
+  9 [ffffd0b50e177838] nfs_inode_set_delegation at ffffffffc1524a75 [nfsv4]
+ 10 [ffffd0b50e177888] nfs4_process_delegation at ffffffffc14f41dd [nfsv4]
+ 11 [ffffd0b50e1778a0] _nfs4_opendata_to_nfs4_state at ffffffffc1503edf [nfsv4]
+ 12 [ffffd0b50e1778c0] _nfs4_open_and_get_state at ffffffffc1504e56 [nfsv4]
+ 13 [ffffd0b50e177978] _nfs4_do_open at ffffffffc15051b8 [nfsv4]
+ 14 [ffffd0b50e1779f8] nfs4_do_open at ffffffffc150559c [nfsv4]
+ 15 [ffffd0b50e177a80] nfs4_atomic_open at ffffffffc15057fb [nfsv4]
+ 16 [ffffd0b50e177ad0] nfs4_file_open at ffffffffc15219be [nfsv4]
+ 17 [ffffd0b50e177b78] do_dentry_open at ffffffff9c09e6ea
+ 18 [ffffd0b50e177ba8] vfs_open at ffffffff9c0a082e
+ 19 [ffffd0b50e177bd0] dentry_open at ffffffff9c0a0935
+
+The issue is that the delegreturn is being asked to wait for a layout
+return that cannot complete because a state recovery was initiated. The
+state recovery cannot complete until the open() finishes processing the
+delegations it was given.
+
+The solution is to propagate the existing flags that indicate a
+non-blocking call to the function pnfs_roc(), so that it knows not to
+wait in this situation.
+
+Reported-by: Benjamin Coddington <bcodding@hammerspace.com>
+Fixes: 29ade5db1293 ("pNFS: Wait on outstanding layoutreturns to complete in pnfs_roc()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+[ Minor conflict resolved. ]
+Signed-off-by: Li hongliang <1468888505@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfs/nfs4proc.c |    6 ++---
+ fs/nfs/pnfs.c     |   58 ++++++++++++++++++++++++++++++++++++++----------------
+ fs/nfs/pnfs.h     |   17 ++++++---------
+ 3 files changed, 51 insertions(+), 30 deletions(-)
+
+--- a/fs/nfs/nfs4proc.c
++++ b/fs/nfs/nfs4proc.c
+@@ -3792,8 +3792,8 @@ int nfs4_do_close(struct nfs4_state *sta
+       calldata->res.seqid = calldata->arg.seqid;
+       calldata->res.server = server;
+       calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+-      calldata->lr.roc = pnfs_roc(state->inode,
+-                      &calldata->lr.arg, &calldata->lr.res, msg.rpc_cred);
++      calldata->lr.roc = pnfs_roc(state->inode, &calldata->lr.arg,
++                                  &calldata->lr.res, msg.rpc_cred, wait);
+       if (calldata->lr.roc) {
+               calldata->arg.lr_args = &calldata->lr.arg;
+               calldata->res.lr_res = &calldata->lr.res;
+@@ -6742,7 +6742,7 @@ static int _nfs4_proc_delegreturn(struct
+       data->inode = nfs_igrab_and_active(inode);
+       if (data->inode || issync) {
+               data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res,
+-                                      cred);
++                                      cred, issync);
+               if (data->lr.roc) {
+                       data->args.lr_args = &data->lr.arg;
+                       data->res.lr_res = &data->lr.res;
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -1427,10 +1427,9 @@ pnfs_commit_and_return_layout(struct ino
+       return ret;
+ }
+-bool pnfs_roc(struct inode *ino,
+-              struct nfs4_layoutreturn_args *args,
+-              struct nfs4_layoutreturn_res *res,
+-              const struct cred *cred)
++bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args,
++            struct nfs4_layoutreturn_res *res, const struct cred *cred,
++            bool sync)
+ {
+       struct nfs_inode *nfsi = NFS_I(ino);
+       struct nfs_open_context *ctx;
+@@ -1441,7 +1440,7 @@ bool pnfs_roc(struct inode *ino,
+       nfs4_stateid stateid;
+       enum pnfs_iomode iomode = 0;
+       bool layoutreturn = false, roc = false;
+-      bool skip_read = false;
++      bool skip_read;
+       if (!nfs_have_layout(ino))
+               return false;
+@@ -1454,20 +1453,14 @@ retry:
+               lo = NULL;
+               goto out_noroc;
+       }
+-      pnfs_get_layout_hdr(lo);
+-      if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
+-              spin_unlock(&ino->i_lock);
+-              rcu_read_unlock();
+-              wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
+-                              TASK_UNINTERRUPTIBLE);
+-              pnfs_put_layout_hdr(lo);
+-              goto retry;
+-      }
+       /* no roc if we hold a delegation */
++      skip_read = false;
+       if (nfs4_check_delegation(ino, FMODE_READ)) {
+-              if (nfs4_check_delegation(ino, FMODE_WRITE))
++              if (nfs4_check_delegation(ino, FMODE_WRITE)) {
++                      lo = NULL;
+                       goto out_noroc;
++              }
+               skip_read = true;
+       }
+@@ -1476,12 +1469,43 @@ retry:
+               if (state == NULL)
+                       continue;
+               /* Don't return layout if there is open file state */
+-              if (state->state & FMODE_WRITE)
++              if (state->state & FMODE_WRITE) {
++                      lo = NULL;
+                       goto out_noroc;
++              }
+               if (state->state & FMODE_READ)
+                       skip_read = true;
+       }
++      if (skip_read) {
++              bool writes = false;
++
++              list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
++                      if (lseg->pls_range.iomode != IOMODE_READ) {
++                              writes = true;
++                              break;
++                      }
++              }
++              if (!writes) {
++                      lo = NULL;
++                      goto out_noroc;
++              }
++      }
++
++      pnfs_get_layout_hdr(lo);
++      if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
++              if (!sync) {
++                      pnfs_set_plh_return_info(
++                              lo, skip_read ? IOMODE_RW : IOMODE_ANY, 0);
++                      goto out_noroc;
++              }
++              spin_unlock(&ino->i_lock);
++              rcu_read_unlock();
++              wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
++                          TASK_UNINTERRUPTIBLE);
++              pnfs_put_layout_hdr(lo);
++              goto retry;
++      }
+       list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
+               if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
+@@ -1521,7 +1545,7 @@ retry:
+ out_noroc:
+       spin_unlock(&ino->i_lock);
+       rcu_read_unlock();
+-      pnfs_layoutcommit_inode(ino, true);
++      pnfs_layoutcommit_inode(ino, sync);
+       if (roc) {
+               struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
+               if (ld->prepare_layoutreturn)
+--- a/fs/nfs/pnfs.h
++++ b/fs/nfs/pnfs.h
+@@ -295,10 +295,9 @@ int pnfs_mark_matching_lsegs_return(stru
+                               u32 seq);
+ int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+               struct list_head *lseg_list);
+-bool pnfs_roc(struct inode *ino,
+-              struct nfs4_layoutreturn_args *args,
+-              struct nfs4_layoutreturn_res *res,
+-              const struct cred *cred);
++bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args,
++            struct nfs4_layoutreturn_res *res, const struct cred *cred,
++            bool sync);
+ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
+                 struct nfs4_layoutreturn_res **respp, int *ret);
+ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
+@@ -769,12 +768,10 @@ pnfs_layoutcommit_outstanding(struct ino
+       return false;
+ }
+-
+-static inline bool
+-pnfs_roc(struct inode *ino,
+-              struct nfs4_layoutreturn_args *args,
+-              struct nfs4_layoutreturn_res *res,
+-              const struct cred *cred)
++static inline bool pnfs_roc(struct inode *ino,
++                          struct nfs4_layoutreturn_args *args,
++                          struct nfs4_layoutreturn_res *res,
++                          const struct cred *cred, bool sync)
+ {
+       return false;
+ }
diff --git a/queue-6.6/rxrpc-fix-recvmsg-unconditional-requeue.patch b/queue-6.6/rxrpc-fix-recvmsg-unconditional-requeue.patch
new file mode 100644 (file)
index 0000000..29912a0
--- /dev/null
@@ -0,0 +1,106 @@
+From stable+bounces-219745-greg=kroah.com@vger.kernel.org Thu Feb 26 03:42:48 2026
+From: Robert Garcia <rob_garcia@163.com>
+Date: Thu, 26 Feb 2026 10:41:02 +0800
+Subject: rxrpc: Fix recvmsg() unconditional requeue
+To: stable@vger.kernel.org, David Howells <dhowells@redhat.com>
+Cc: Marc Dionne <marc.dionne@auristor.com>, Robert Garcia <rob_garcia@163.com>, Steven Rostedt <rostedt@goodmis.org>, linux-kernel@vger.kernel.org, Masami Hiramatsu <mhiramat@kernel.org>, "David S . Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, linux-afs@lists.infradead.org, linux-trace-kernel@vger.kernel.org, netdev@vger.kernel.org, Faith <faith@zellic.io>, Pumpkin Chang <pumpkin@devco.re>, Nir Ohfeld <niro@wiz.io>, Willy Tarreau <w@1wt.eu>, Simon Horman <horms@kernel.org>
+Message-ID: <20260226024102.3522867-1-rob_garcia@163.com>
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 2c28769a51deb6022d7fbd499987e237a01dd63a ]
+
+If rxrpc_recvmsg() fails because MSG_DONTWAIT was specified but the call at
+the front of the recvmsg queue already has its mutex locked, it requeues
+the call - whether or not the call is already queued.  The call may be on
+the queue because MSG_PEEK was also passed and so the call was not dequeued
+or because the I/O thread requeued it.
+
+The unconditional requeue may then corrupt the recvmsg queue, leading to
+things like UAFs or refcount underruns.
+
+Fix this by only requeuing the call if it isn't already on the queue - and
+moving it to the front if it is already queued.  If we don't queue it, we
+have to put the ref we obtained by dequeuing it.
+
+Also, MSG_PEEK doesn't dequeue the call so shouldn't call
+rxrpc_notify_socket() for the call if we didn't use up all the data on the
+queue, so fix that also.
+
+Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg")
+Reported-by: Faith <faith@zellic.io>
+Reported-by: Pumpkin Chang <pumpkin@devco.re>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Acked-by: Marc Dionne <marc.dionne@auristor.com>
+cc: Nir Ohfeld <niro@wiz.io>
+cc: Willy Tarreau <w@1wt.eu>
+cc: Simon Horman <horms@kernel.org>
+cc: linux-afs@lists.infradead.org
+cc: stable@kernel.org
+Link: https://patch.msgid.link/95163.1768428203@warthog.procyon.org.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[Use spin_unlock instead of spin_unlock_irq to maintain context consistency.]
+Signed-off-by: Robert Garcia <rob_garcia@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/rxrpc.h |    4 ++++
+ net/rxrpc/recvmsg.c          |   19 +++++++++++++++----
+ 2 files changed, 19 insertions(+), 4 deletions(-)
+
+--- a/include/trace/events/rxrpc.h
++++ b/include/trace/events/rxrpc.h
+@@ -270,6 +270,7 @@
+       EM(rxrpc_call_put_kernel,               "PUT kernel  ") \
+       EM(rxrpc_call_put_poke,                 "PUT poke    ") \
+       EM(rxrpc_call_put_recvmsg,              "PUT recvmsg ") \
++      EM(rxrpc_call_put_recvmsg_peek_nowait,  "PUT peek-nwt") \
+       EM(rxrpc_call_put_release_sock,         "PUT rls-sock") \
+       EM(rxrpc_call_put_release_sock_tba,     "PUT rls-sk-a") \
+       EM(rxrpc_call_put_sendmsg,              "PUT sendmsg ") \
+@@ -287,6 +288,9 @@
+       EM(rxrpc_call_see_distribute_error,     "SEE dist-err") \
+       EM(rxrpc_call_see_input,                "SEE input   ") \
+       EM(rxrpc_call_see_recvmsg,              "SEE recvmsg ") \
++      EM(rxrpc_call_see_recvmsg_requeue,      "SEE recv-rqu") \
++      EM(rxrpc_call_see_recvmsg_requeue_first, "SEE recv-rqF") \
++      EM(rxrpc_call_see_recvmsg_requeue_move, "SEE recv-rqM") \
+       EM(rxrpc_call_see_release,              "SEE release ") \
+       EM(rxrpc_call_see_userid_exists,        "SEE u-exists") \
+       EM(rxrpc_call_see_waiting_call,         "SEE q-conn  ") \
+--- a/net/rxrpc/recvmsg.c
++++ b/net/rxrpc/recvmsg.c
+@@ -430,7 +430,8 @@ try_again:
+       if (rxrpc_call_has_failed(call))
+               goto call_failed;
+-      if (!skb_queue_empty(&call->recvmsg_queue))
++      if (!(flags & MSG_PEEK) &&
++          !skb_queue_empty(&call->recvmsg_queue))
+               rxrpc_notify_socket(call);
+       goto not_yet_complete;
+@@ -461,11 +462,21 @@ error_unlock_call:
+ error_requeue_call:
+       if (!(flags & MSG_PEEK)) {
+               spin_lock(&rx->recvmsg_lock);
+-              list_add(&call->recvmsg_link, &rx->recvmsg_q);
+-              spin_unlock(&rx->recvmsg_lock);
++              if (list_empty(&call->recvmsg_link)) {
++                      list_add(&call->recvmsg_link, &rx->recvmsg_q);
++                      rxrpc_see_call(call, rxrpc_call_see_recvmsg_requeue);
++                      spin_unlock(&rx->recvmsg_lock);
++              } else if (list_is_first(&call->recvmsg_link, &rx->recvmsg_q)) {
++                      spin_unlock(&rx->recvmsg_lock);
++                      rxrpc_put_call(call, rxrpc_call_see_recvmsg_requeue_first);
++              } else {
++                      list_move(&call->recvmsg_link, &rx->recvmsg_q);
++                      spin_unlock(&rx->recvmsg_lock);
++                      rxrpc_put_call(call, rxrpc_call_see_recvmsg_requeue_move);
++              }
+               trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
+       } else {
+-              rxrpc_put_call(call, rxrpc_call_put_recvmsg);
++              rxrpc_put_call(call, rxrpc_call_put_recvmsg_peek_nowait);
+       }
+ error_no_call:
+       release_sock(&rx->sk);
index 37a1df4b0c8d429fc1579d456238e2c775711cd7..725e1c387dd93ca3fde8a55f6418ea668a738bdd 100644 (file)
@@ -396,3 +396,19 @@ smb-client-compare-macs-in-constant-time.patch
 ksmbd-compare-macs-in-constant-time.patch
 net-tcp-md5-fix-mac-comparison-to-be-constant-time.patch
 f2fs-fix-to-avoid-migrating-empty-section.patch
+ext4-fix-dirtyclusters-double-decrement-on-fs-shutdown.patch
+btrfs-always-fallback-to-buffered-write-if-the-inode-requires-checksum.patch
+net-stmmac-dwmac-loongson-set-clk_csr_i-to-100-150mhz.patch
+arm64-mm-don-t-remap-pgtables-per-cont-pte-pmd-block.patch
+arm64-mm-batch-dsb-and-isb-when-populating-pgtables.patch
+arm64-mm-don-t-remap-pgtables-for-allocate-vs-populate.patch
+btrfs-fix-null-dereference-on-root-when-tracing-inode-eviction.patch
+dst-fix-races-in-rt6_uncached_list_del-and-rt_del_uncached_list.patch
+nfs-pass-explicit-offset-count-to-trace-events.patch
+nfs-fix-a-deadlock-involving-nfs_release_folio.patch
+pnfs-fix-a-deadlock-when-returning-a-delegation-during-open.patch
+usb-typec-ucsi-move-unregister-out-of-atomic-section.patch
+eth-bnxt-always-recalculate-features-after-xdp-clearing-fix-null-deref.patch
+ext4-always-allocate-blocks-only-from-groups-inode-can-use.patch
+rxrpc-fix-recvmsg-unconditional-requeue.patch
+dm-verity-disable-recursive-forward-error-correction.patch
diff --git a/queue-6.6/usb-typec-ucsi-move-unregister-out-of-atomic-section.patch b/queue-6.6/usb-typec-ucsi-move-unregister-out-of-atomic-section.patch
new file mode 100644 (file)
index 0000000..8bd1d46
--- /dev/null
@@ -0,0 +1,131 @@
+From black.hawk@163.com Wed Feb 25 06:10:42 2026
+From: Rahul Sharma <black.hawk@163.com>
+Date: Wed, 25 Feb 2026 13:10:08 +0800
+Subject: usb: typec: ucsi: Move unregister out of atomic section
+To: gregkh@linuxfoundation.org, stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org, Bjorn Andersson <quic_bjorande@quicinc.com>, Heikki Krogerus <heikki.krogerus@linux.intel.com>, Neil Armstrong <neil.armstrong@linaro.org>, Dmitry Baryshkov <dmitry.baryshkov@linaro.org>, Amit Pundir <amit.pundir@linaro.org>, Johan Hovold <johan+linaro@kernel.org>, Bjorn Andersson <andersson@kernel.org>, Rahul Sharma <black.hawk@163.com>
+Message-ID: <20260225051008.2547855-1-black.hawk@163.com>
+
+From: Bjorn Andersson <quic_bjorande@quicinc.com>
+
+[ Upstream commit 11bb2ffb679399f99041540cf662409905179e3a ]
+
+Commit '9329933699b3 ("soc: qcom: pmic_glink: Make client-lock
+non-sleeping")' moved the pmic_glink client list under a spinlock, as it
+is accessed by the rpmsg/glink callback, which in turn is invoked from
+IRQ context.
+
+This means that ucsi_unregister() is now called from atomic context,
+which isn't feasible as it's expecting a sleepable context. An effort is
+under way to get GLINK to invoke its callbacks in a sleepable context,
+but until then lets schedule the unregistration.
+
+A side effect of this is that ucsi_unregister() can now happen
+after the remote processor, and thereby the communication link with it, is
+gone. pmic_glink_send() is amended with a check to avoid the resulting NULL
+pointer dereference.
+This does however result in the user being informed about this error by
+the following entry in the kernel log:
+
+  ucsi_glink.pmic_glink_ucsi pmic_glink.ucsi.0: failed to send UCSI write request: -5
+
+Fixes: 9329933699b3 ("soc: qcom: pmic_glink: Make client-lock non-sleeping")
+Cc: stable@vger.kernel.org
+Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Tested-by: Amit Pundir <amit.pundir@linaro.org>
+Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
+Tested-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
+Link: https://lore.kernel.org/r/20240820-pmic-glink-v6-11-races-v3-2-eec53c750a04@quicinc.com
+Signed-off-by: Bjorn Andersson <andersson@kernel.org>
+[ The context change is due to the commit 584e8df58942
+("usb: typec: ucsi: extract common code for command handling")
+in v6.11 which is irrelevant to the logic of this patch. ]
+Signed-off-by: Rahul Sharma <black.hawk@163.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/soc/qcom/pmic_glink.c       |   10 +++++++++-
+ drivers/usb/typec/ucsi/ucsi_glink.c |   27 ++++++++++++++++++++++-----
+ 2 files changed, 31 insertions(+), 6 deletions(-)
+
+--- a/drivers/soc/qcom/pmic_glink.c
++++ b/drivers/soc/qcom/pmic_glink.c
+@@ -115,8 +115,16 @@ EXPORT_SYMBOL_GPL(pmic_glink_client_regi
+ int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len)
+ {
+       struct pmic_glink *pg = client->pg;
++      int ret;
+-      return rpmsg_send(pg->ept, data, len);
++      mutex_lock(&pg->state_lock);
++      if (!pg->ept)
++              ret = -ECONNRESET;
++      else
++              ret = rpmsg_send(pg->ept, data, len);
++      mutex_unlock(&pg->state_lock);
++
++      return ret;
+ }
+ EXPORT_SYMBOL_GPL(pmic_glink_send);
+--- a/drivers/usb/typec/ucsi/ucsi_glink.c
++++ b/drivers/usb/typec/ucsi/ucsi_glink.c
+@@ -72,6 +72,9 @@ struct pmic_glink_ucsi {
+       struct work_struct notify_work;
+       struct work_struct register_work;
++      spinlock_t state_lock;
++      bool ucsi_registered;
++      bool pd_running;
+       u8 read_buf[UCSI_BUF_SIZE];
+ };
+@@ -270,8 +273,20 @@ static void pmic_glink_ucsi_notify(struc
+ static void pmic_glink_ucsi_register(struct work_struct *work)
+ {
+       struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
++      unsigned long flags;
++      bool pd_running;
+-      ucsi_register(ucsi->ucsi);
++      spin_lock_irqsave(&ucsi->state_lock, flags);
++      pd_running = ucsi->pd_running;
++      spin_unlock_irqrestore(&ucsi->state_lock, flags);
++
++      if (!ucsi->ucsi_registered && pd_running) {
++              ucsi_register(ucsi->ucsi);
++              ucsi->ucsi_registered = true;
++      } else if (ucsi->ucsi_registered && !pd_running) {
++              ucsi_unregister(ucsi->ucsi);
++              ucsi->ucsi_registered = false;
++      }
+ }
+ static void pmic_glink_ucsi_callback(const void *data, size_t len, void *priv)
+@@ -295,11 +310,12 @@ static void pmic_glink_ucsi_callback(con
+ static void pmic_glink_ucsi_pdr_notify(void *priv, int state)
+ {
+       struct pmic_glink_ucsi *ucsi = priv;
++      unsigned long flags;
+-      if (state == SERVREG_SERVICE_STATE_UP)
+-              schedule_work(&ucsi->register_work);
+-      else if (state == SERVREG_SERVICE_STATE_DOWN)
+-              ucsi_unregister(ucsi->ucsi);
++      spin_lock_irqsave(&ucsi->state_lock, flags);
++      ucsi->pd_running = (state == SERVREG_SERVICE_STATE_UP);
++      spin_unlock_irqrestore(&ucsi->state_lock, flags);
++      schedule_work(&ucsi->register_work);
+ }
+ static void pmic_glink_ucsi_destroy(void *data)
+@@ -332,6 +348,7 @@ static int pmic_glink_ucsi_probe(struct
+       init_completion(&ucsi->read_ack);
+       init_completion(&ucsi->write_ack);
+       init_completion(&ucsi->sync_ack);
++      spin_lock_init(&ucsi->state_lock);
+       mutex_init(&ucsi->lock);
+       ucsi->ucsi = ucsi_create(dev, &pmic_glink_ucsi_ops);