--- /dev/null
+From 49c87f7677746f3c5bd16c81b23700bb6b88bfd4 Mon Sep 17 00:00:00 2001
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Wed, 26 Feb 2025 12:06:52 +0000
+Subject: arm64: hugetlb: Fix huge_ptep_get_and_clear() for non-present ptes
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+commit 49c87f7677746f3c5bd16c81b23700bb6b88bfd4 upstream.
+
+arm64 supports multiple huge_pte sizes. Some of the sizes are covered by
+a single pte entry at a particular level (PMD_SIZE, PUD_SIZE), and some
+are covered by multiple ptes at a particular level (CONT_PTE_SIZE,
+CONT_PMD_SIZE). So the function has to figure out the size from the
+huge_pte pointer. This was previously done by walking the pgtable to
+determine the level and by using the PTE_CONT bit to determine the
+number of ptes at the level.
+
+But the PTE_CONT bit is only valid when the pte is present. For
+non-present pte values (e.g. markers, migration entries), the previous
+implementation was therefore erroneously determining the size. There is
+at least one known caller in core-mm, move_huge_pte(), which may call
+huge_ptep_get_and_clear() for a non-present pte. So we must be robust to
+this case. Additionally the "regular" ptep_get_and_clear() is robust to
+being called for non-present ptes so it makes sense to follow the
+behavior.
+
+Fix this by using the new sz parameter which is now provided to the
+function. Additionally when clearing each pte in a contig range, don't
+gather the access and dirty bits if the pte is not present.
+
+An alternative approach that would not require API changes would be to
+store the PTE_CONT bit in a spare bit in the swap entry pte for the
+non-present case. But it felt cleaner to follow other APIs' lead and
+just pass in the size.
+
+As an aside, PTE_CONT is bit 52, which corresponds to bit 40 in the swap
+entry offset field (layout of non-present pte). Since hugetlb is never
+swapped to disk, this field will only be populated for markers, which
+always set this bit to 0 and hwpoison swap entries, which set the offset
+field to a PFN; So it would only ever be 1 for a 52-bit PVA system where
+memory in that high half was poisoned (I think!). So in practice, this
+bit would almost always be zero for non-present ptes and we would only
+clear the first entry if it was actually a contiguous block. That's
+probably a less severe symptom than if it was always interpreted as 1
+and cleared out potentially-present neighboring PTEs.
+
+Cc: stable@vger.kernel.org
+Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Link: https://lore.kernel.org/r/20250226120656.2400136-3-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/hugetlbpage.c | 51 ++++++++++++++++----------------------------
+ 1 file changed, 19 insertions(+), 32 deletions(-)
+
+--- a/arch/arm64/mm/hugetlbpage.c
++++ b/arch/arm64/mm/hugetlbpage.c
+@@ -100,20 +100,11 @@ static int find_num_contig(struct mm_str
+
+ static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
+ {
+- int contig_ptes = 0;
++ int contig_ptes = 1;
+
+ *pgsize = size;
+
+ switch (size) {
+-#ifndef __PAGETABLE_PMD_FOLDED
+- case PUD_SIZE:
+- if (pud_sect_supported())
+- contig_ptes = 1;
+- break;
+-#endif
+- case PMD_SIZE:
+- contig_ptes = 1;
+- break;
+ case CONT_PMD_SIZE:
+ *pgsize = PMD_SIZE;
+ contig_ptes = CONT_PMDS;
+@@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsign
+ *pgsize = PAGE_SIZE;
+ contig_ptes = CONT_PTES;
+ break;
++ default:
++ WARN_ON(!__hugetlb_valid_size(size));
+ }
+
+ return contig_ptes;
+@@ -163,24 +156,23 @@ static pte_t get_clear_contig(struct mm_
+ unsigned long pgsize,
+ unsigned long ncontig)
+ {
+- pte_t orig_pte = __ptep_get(ptep);
+- unsigned long i;
++ pte_t pte, tmp_pte;
++ bool present;
+
+- for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
+- pte_t pte = __ptep_get_and_clear(mm, addr, ptep);
+-
+- /*
+- * If HW_AFDBM is enabled, then the HW could turn on
+- * the dirty or accessed bit for any page in the set,
+- * so check them all.
+- */
+- if (pte_dirty(pte))
+- orig_pte = pte_mkdirty(orig_pte);
+-
+- if (pte_young(pte))
+- orig_pte = pte_mkyoung(orig_pte);
++ pte = __ptep_get_and_clear(mm, addr, ptep);
++ present = pte_present(pte);
++ while (--ncontig) {
++ ptep++;
++ addr += pgsize;
++ tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
++ if (present) {
++ if (pte_dirty(tmp_pte))
++ pte = pte_mkdirty(pte);
++ if (pte_young(tmp_pte))
++ pte = pte_mkyoung(pte);
++ }
+ }
+- return orig_pte;
++ return pte;
+ }
+
+ static pte_t get_clear_contig_flush(struct mm_struct *mm,
+@@ -401,13 +393,8 @@ pte_t huge_ptep_get_and_clear(struct mm_
+ {
+ int ncontig;
+ size_t pgsize;
+- pte_t orig_pte = __ptep_get(ptep);
+-
+- if (!pte_cont(orig_pte))
+- return __ptep_get_and_clear(mm, addr, ptep);
+-
+- ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+
++ ncontig = num_contig_ptes(sz, &pgsize);
+ return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
+ }
+
--- /dev/null
+From 02410ac72ac3707936c07ede66e94360d0d65319 Mon Sep 17 00:00:00 2001
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Wed, 26 Feb 2025 12:06:51 +0000
+Subject: mm: hugetlb: Add huge page size param to huge_ptep_get_and_clear()
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+commit 02410ac72ac3707936c07ede66e94360d0d65319 upstream.
+
+In order to fix a bug, arm64 needs to be told the size of the huge page
+for which the huge_pte is being cleared in huge_ptep_get_and_clear().
+Provide for this by adding an `unsigned long sz` parameter to the
+function. This follows the same pattern as huge_pte_clear() and
+set_huge_pte_at().
+
+This commit makes the required interface modifications to the core mm as
+well as all arches that implement this function (arm64, loongarch, mips,
+parisc, powerpc, riscv, s390, sparc). The actual arm64 bug will be fixed
+in a separate commit.
+
+Cc: stable@vger.kernel.org
+Fixes: 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> # riscv
+Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: Alexander Gordeev <agordeev@linux.ibm.com> # s390
+Link: https://lore.kernel.org/r/20250226120656.2400136-2-ryan.roberts@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/hugetlb.h | 4 ++--
+ arch/arm64/mm/hugetlbpage.c | 8 +++++---
+ arch/loongarch/include/asm/hugetlb.h | 6 ++++--
+ arch/mips/include/asm/hugetlb.h | 6 ++++--
+ arch/parisc/include/asm/hugetlb.h | 2 +-
+ arch/parisc/mm/hugetlbpage.c | 2 +-
+ arch/powerpc/include/asm/hugetlb.h | 6 ++++--
+ arch/riscv/include/asm/hugetlb.h | 3 ++-
+ arch/riscv/mm/hugetlbpage.c | 2 +-
+ arch/s390/include/asm/hugetlb.h | 18 +++++++++++++-----
+ arch/s390/mm/hugetlbpage.c | 4 ++--
+ arch/sparc/include/asm/hugetlb.h | 2 +-
+ arch/sparc/mm/hugetlbpage.c | 2 +-
+ include/asm-generic/hugetlb.h | 2 +-
+ include/linux/hugetlb.h | 4 +++-
+ mm/hugetlb.c | 4 ++--
+ 16 files changed, 47 insertions(+), 28 deletions(-)
+
+--- a/arch/arm64/include/asm/hugetlb.h
++++ b/arch/arm64/include/asm/hugetlb.h
+@@ -42,8 +42,8 @@ extern int huge_ptep_set_access_flags(st
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty);
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+-extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep);
++extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
++ pte_t *ptep, unsigned long sz);
+ #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+ extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+--- a/arch/arm64/mm/hugetlbpage.c
++++ b/arch/arm64/mm/hugetlbpage.c
+@@ -396,8 +396,8 @@ void huge_pte_clear(struct mm_struct *mm
+ __pte_clear(mm, addr, ptep);
+ }
+
+-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep)
++pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
++ pte_t *ptep, unsigned long sz)
+ {
+ int ncontig;
+ size_t pgsize;
+@@ -549,6 +549,8 @@ bool __init arch_hugetlb_valid_size(unsi
+
+ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+ {
++ unsigned long psize = huge_page_size(hstate_vma(vma));
++
+ if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
+ /*
+ * Break-before-make (BBM) is required for all user space mappings
+@@ -558,7 +560,7 @@ pte_t huge_ptep_modify_prot_start(struct
+ if (pte_user_exec(__ptep_get(ptep)))
+ return huge_ptep_clear_flush(vma, addr, ptep);
+ }
+- return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
++ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
+ }
+
+ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+--- a/arch/loongarch/include/asm/hugetlb.h
++++ b/arch/loongarch/include/asm/hugetlb.h
+@@ -36,7 +36,8 @@ static inline void huge_pte_clear(struct
+
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep)
++ unsigned long addr, pte_t *ptep,
++ unsigned long sz)
+ {
+ pte_t clear;
+ pte_t pte = ptep_get(ptep);
+@@ -51,8 +52,9 @@ static inline pte_t huge_ptep_clear_flus
+ unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte;
++ unsigned long sz = huge_page_size(hstate_vma(vma));
+
+- pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
++ pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz);
+ flush_tlb_page(vma, addr);
+ return pte;
+ }
+--- a/arch/mips/include/asm/hugetlb.h
++++ b/arch/mips/include/asm/hugetlb.h
+@@ -27,7 +27,8 @@ static inline int prepare_hugepage_range
+
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep)
++ unsigned long addr, pte_t *ptep,
++ unsigned long sz)
+ {
+ pte_t clear;
+ pte_t pte = *ptep;
+@@ -42,13 +43,14 @@ static inline pte_t huge_ptep_clear_flus
+ unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte;
++ unsigned long sz = huge_page_size(hstate_vma(vma));
+
+ /*
+ * clear the huge pte entry firstly, so that the other smp threads will
+ * not get old pte entry after finishing flush_tlb_page and before
+ * setting new huge pte entry
+ */
+- pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
++ pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz);
+ flush_tlb_page(vma, addr);
+ return pte;
+ }
+--- a/arch/parisc/include/asm/hugetlb.h
++++ b/arch/parisc/include/asm/hugetlb.h
+@@ -10,7 +10,7 @@ void set_huge_pte_at(struct mm_struct *m
+
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+- pte_t *ptep);
++ pte_t *ptep, unsigned long sz);
+
+ #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+--- a/arch/parisc/mm/hugetlbpage.c
++++ b/arch/parisc/mm/hugetlbpage.c
+@@ -126,7 +126,7 @@ void set_huge_pte_at(struct mm_struct *m
+
+
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+- pte_t *ptep)
++ pte_t *ptep, unsigned long sz)
+ {
+ pte_t entry;
+
+--- a/arch/powerpc/include/asm/hugetlb.h
++++ b/arch/powerpc/include/asm/hugetlb.h
+@@ -45,7 +45,8 @@ void set_huge_pte_at(struct mm_struct *m
+
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep)
++ unsigned long addr, pte_t *ptep,
++ unsigned long sz)
+ {
+ return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
+ }
+@@ -55,8 +56,9 @@ static inline pte_t huge_ptep_clear_flus
+ unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte;
++ unsigned long sz = huge_page_size(hstate_vma(vma));
+
+- pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
++ pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz);
+ flush_hugetlb_page(vma, addr);
+ return pte;
+ }
+--- a/arch/riscv/include/asm/hugetlb.h
++++ b/arch/riscv/include/asm/hugetlb.h
+@@ -28,7 +28,8 @@ void set_huge_pte_at(struct mm_struct *m
+
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep);
++ unsigned long addr, pte_t *ptep,
++ unsigned long sz);
+
+ #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+--- a/arch/riscv/mm/hugetlbpage.c
++++ b/arch/riscv/mm/hugetlbpage.c
+@@ -293,7 +293,7 @@ int huge_ptep_set_access_flags(struct vm
+
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr,
+- pte_t *ptep)
++ pte_t *ptep, unsigned long sz)
+ {
+ pte_t orig_pte = ptep_get(ptep);
+ int pte_num;
+--- a/arch/s390/include/asm/hugetlb.h
++++ b/arch/s390/include/asm/hugetlb.h
+@@ -23,9 +23,17 @@ void __set_huge_pte_at(struct mm_struct
+ pte_t *ptep, pte_t pte);
+ #define __HAVE_ARCH_HUGE_PTEP_GET
+ extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
++
++pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
++ pte_t *ptep);
++
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+-extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep);
++static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
++ unsigned long addr, pte_t *ptep,
++ unsigned long sz)
++{
++ return __huge_ptep_get_and_clear(mm, addr, ptep);
++}
+
+ static inline void arch_clear_hugetlb_flags(struct folio *folio)
+ {
+@@ -47,7 +55,7 @@ static inline void huge_pte_clear(struct
+ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+ {
+- return huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
++ return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+ }
+
+ #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+@@ -57,7 +65,7 @@ static inline int huge_ptep_set_access_f
+ {
+ int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
+ if (changed) {
+- huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
++ __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ __set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+ }
+ return changed;
+@@ -67,7 +75,7 @@ static inline int huge_ptep_set_access_f
+ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+ {
+- pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
++ pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep);
+ __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+ }
+
+--- a/arch/s390/mm/hugetlbpage.c
++++ b/arch/s390/mm/hugetlbpage.c
+@@ -188,8 +188,8 @@ pte_t huge_ptep_get(struct mm_struct *mm
+ return __rste_to_pte(pte_val(*ptep));
+ }
+
+-pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep)
++pte_t __huge_ptep_get_and_clear(struct mm_struct *mm,
++ unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte = huge_ptep_get(mm, addr, ptep);
+ pmd_t *pmdp = (pmd_t *) ptep;
+--- a/arch/sparc/include/asm/hugetlb.h
++++ b/arch/sparc/include/asm/hugetlb.h
+@@ -20,7 +20,7 @@ void __set_huge_pte_at(struct mm_struct
+
+ #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+- pte_t *ptep);
++ pte_t *ptep, unsigned long sz);
+
+ #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -260,7 +260,7 @@ void set_huge_pte_at(struct mm_struct *m
+ }
+
+ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+- pte_t *ptep)
++ pte_t *ptep, unsigned long sz)
+ {
+ unsigned int i, nptes, orig_shift, shift;
+ unsigned long size;
+--- a/include/asm-generic/hugetlb.h
++++ b/include/asm-generic/hugetlb.h
+@@ -90,7 +90,7 @@ static inline void set_huge_pte_at(struc
+
+ #ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+- unsigned long addr, pte_t *ptep)
++ unsigned long addr, pte_t *ptep, unsigned long sz)
+ {
+ return ptep_get_and_clear(mm, addr, ptep);
+ }
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -1003,7 +1003,9 @@ static inline void hugetlb_count_sub(lon
+ static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+ {
+- return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
++ unsigned long psize = huge_page_size(hstate_vma(vma));
++
++ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize);
+ }
+ #endif
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5402,7 +5402,7 @@ static void move_huge_pte(struct vm_area
+ if (src_ptl != dst_ptl)
+ spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+
+- pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
++ pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz);
+
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ huge_pte_clear(mm, new_addr, dst_pte, sz);
+@@ -5577,7 +5577,7 @@ void __unmap_hugepage_range(struct mmu_g
+ set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED);
+ }
+
+- pte = huge_ptep_get_and_clear(mm, address, ptep);
++ pte = huge_ptep_get_and_clear(mm, address, ptep, sz);
+ tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
+ if (huge_pte_dirty(pte))
+ set_page_dirty(page);