+++ /dev/null
-From fac35ba763ed07ba93154c95ffc0c4a55023707f Mon Sep 17 00:00:00 2001
-From: Baolin Wang <baolin.wang@linux.alibaba.com>
-Date: Thu, 1 Sep 2022 18:41:31 +0800
-Subject: mm/hugetlb: fix races when looking up a CONT-PTE/PMD size hugetlb page
-
-From: Baolin Wang <baolin.wang@linux.alibaba.com>
-
-commit fac35ba763ed07ba93154c95ffc0c4a55023707f upstream.
-
-On some architectures (like ARM64), it can support CONT-PTE/PMD size
-hugetlb, which means it can support not only PMD/PUD size hugetlb (2M and
-1G), but also CONT-PTE/PMD size(64K and 32M) if a 4K page size specified.
-
-So when looking up a CONT-PTE size hugetlb page by follow_page(), it will
-use pte_offset_map_lock() to get the pte entry lock for the CONT-PTE size
-hugetlb in follow_page_pte(). However this pte entry lock is incorrect
-for the CONT-PTE size hugetlb, since we should use huge_pte_lock() to get
-the correct lock, which is mm->page_table_lock.
-
-That means the pte entry of the CONT-PTE size hugetlb under current pte
-lock is unstable in follow_page_pte(), we can continue to migrate or
-poison the pte entry of the CONT-PTE size hugetlb, which can cause some
-potential race issues, even though they are under the 'pte lock'.
-
-For example, suppose thread A is trying to look up a CONT-PTE size hugetlb
-page by move_pages() syscall under the lock, however antoher thread B can
-migrate the CONT-PTE hugetlb page at the same time, which will cause
-thread A to get an incorrect page, if thread A also wants to do page
-migration, then data inconsistency error occurs.
-
-Moreover we have the same issue for CONT-PMD size hugetlb in
-follow_huge_pmd().
-
-To fix above issues, rename the follow_huge_pmd() as follow_huge_pmd_pte()
-to handle PMD and PTE level size hugetlb, which uses huge_pte_lock() to
-get the correct pte entry lock to make the pte entry stable.
-
-Mike said:
-
-Support for CONT_PMD/_PTE was added with bb9dd3df8ee9 ("arm64: hugetlb:
-refactor find_num_contig()"). Patch series "Support for contiguous pte
-hugepages", v4. However, I do not believe these code paths were
-executed until migration support was added with 5480280d3f2d ("arm64/mm:
-enable HugeTLB migration for contiguous bit HugeTLB pages") I would go
-with 5480280d3f2d for the Fixes: targe.
-
-Link: https://lkml.kernel.org/r/635f43bdd85ac2615a58405da82b4d33c6e5eb05.1662017562.git.baolin.wang@linux.alibaba.com
-Fixes: 5480280d3f2d ("arm64/mm: enable HugeTLB migration for contiguous bit HugeTLB pages")
-Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
-Suggested-by: Mike Kravetz <mike.kravetz@oracle.com>
-Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
-Cc: David Hildenbrand <david@redhat.com>
-Cc: Muchun Song <songmuchun@bytedance.com>
-Cc: <stable@vger.kernel.org>
-Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
-[5.4: Fixup contextual diffs before pin_user_pages()]
-Signed-off-by: Samuel Mendoza-Jonas <samjonas@amazon.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/hugetlb.h | 6 +++---
- mm/gup.c | 13 ++++++++++++-
- mm/hugetlb.c | 28 ++++++++++++++--------------
- 3 files changed, 29 insertions(+), 18 deletions(-)
-
---- a/include/linux/hugetlb.h
-+++ b/include/linux/hugetlb.h
-@@ -127,8 +127,8 @@ struct page *follow_huge_addr(struct mm_
- struct page *follow_huge_pd(struct vm_area_struct *vma,
- unsigned long address, hugepd_t hpd,
- int flags, int pdshift);
--struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-- pmd_t *pmd, int flags);
-+struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address,
-+ int flags);
- struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int flags);
- struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
-@@ -175,7 +175,7 @@ static inline void hugetlb_show_meminfo(
- {
- }
- #define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL
--#define follow_huge_pmd(mm, addr, pmd, flags) NULL
-+#define follow_huge_pmd_pte(vma, addr, flags) NULL
- #define follow_huge_pud(mm, addr, pud, flags) NULL
- #define follow_huge_pgd(mm, addr, pgd, flags) NULL
- #define prepare_hugepage_range(file, addr, len) (-EINVAL)
---- a/mm/gup.c
-+++ b/mm/gup.c
-@@ -188,6 +188,17 @@ static struct page *follow_page_pte(stru
- spinlock_t *ptl;
- pte_t *ptep, pte;
-
-+ /*
-+ * Considering PTE level hugetlb, like continuous-PTE hugetlb on
-+ * ARM64 architecture.
-+ */
-+ if (is_vm_hugetlb_page(vma)) {
-+ page = follow_huge_pmd_pte(vma, address, flags);
-+ if (page)
-+ return page;
-+ return no_page_table(vma, flags);
-+ }
-+
- retry:
- if (unlikely(pmd_bad(*pmd)))
- return no_page_table(vma, flags);
-@@ -333,7 +344,7 @@ static struct page *follow_pmd_mask(stru
- if (pmd_none(pmdval))
- return no_page_table(vma, flags);
- if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
-- page = follow_huge_pmd(mm, address, pmd, flags);
-+ page = follow_huge_pmd_pte(vma, address, flags);
- if (page)
- return page;
- return no_page_table(vma, flags);
---- a/mm/hugetlb.c
-+++ b/mm/hugetlb.c
-@@ -5157,30 +5157,30 @@ follow_huge_pd(struct vm_area_struct *vm
- }
-
- struct page * __weak
--follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-- pmd_t *pmd, int flags)
-+follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
- {
-+ struct hstate *h = hstate_vma(vma);
-+ struct mm_struct *mm = vma->vm_mm;
- struct page *page = NULL;
- spinlock_t *ptl;
-- pte_t pte;
-+ pte_t *ptep, pte;
-+
- retry:
-- ptl = pmd_lockptr(mm, pmd);
-- spin_lock(ptl);
-- /*
-- * make sure that the address range covered by this pmd is not
-- * unmapped from other threads.
-- */
-- if (!pmd_huge(*pmd))
-- goto out;
-- pte = huge_ptep_get((pte_t *)pmd);
-+ ptep = huge_pte_offset(mm, address, huge_page_size(h));
-+ if (!ptep)
-+ return NULL;
-+
-+ ptl = huge_pte_lock(h, mm, ptep);
-+ pte = huge_ptep_get(ptep);
- if (pte_present(pte)) {
-- page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
-+ page = pte_page(pte) +
-+ ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
- if (flags & FOLL_GET)
- get_page(page);
- } else {
- if (is_hugetlb_entry_migration(pte)) {
- spin_unlock(ptl);
-- __migration_entry_wait(mm, (pte_t *)pmd, ptl);
-+ __migration_entry_wait(mm, ptep, ptl);
- goto retry;
- }
- /*