From: Sasha Levin Date: Thu, 1 Jul 2021 11:27:20 +0000 (-0400) Subject: Fixes for 4.14 X-Git-Tag: v5.13.1~18 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9eb5ae625e6d3dff5d579c2b84c11b3f4cca08ae;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 4.14 Signed-off-by: Sasha Levin --- diff --git a/queue-4.14/include-linux-mmdebug.h-make-vm_warn-non-rvals.patch b/queue-4.14/include-linux-mmdebug.h-make-vm_warn-non-rvals.patch new file mode 100644 index 00000000000..60aa03b62b9 --- /dev/null +++ b/queue-4.14/include-linux-mmdebug.h-make-vm_warn-non-rvals.patch @@ -0,0 +1,55 @@ +From 03af020be6c1b36fcf922898f75111ec55ea13e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Apr 2018 16:25:30 -0700 +Subject: include/linux/mmdebug.h: make VM_WARN* non-rvals + +From: Michal Hocko + +[ Upstream commit 91241681c62a5a690c88eb2aca027f094125eaac ] + +At present the construct + + if (VM_WARN(...)) + +will compile OK with CONFIG_DEBUG_VM=y and will fail with +CONFIG_DEBUG_VM=n. The reason is that VM_{WARN,BUG}* have always been +special wrt. {WARN/BUG}* and never generate any code when DEBUG_VM is +disabled. So we cannot really use it in conditionals. + +We considered changing things so that this construct works in both cases +but that might cause unwanted code generation with CONFIG_DEBUG_VM=n. +It is safer and simpler to make the build fail in both cases. + +[akpm@linux-foundation.org: changelog] +Signed-off-by: Michal Hocko +Reviewed-by: Andrew Morton +Cc: Stephen Rothwell +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + include/linux/mmdebug.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h +index 57b0030d3800..2ad72d2c8cc5 100644 +--- a/include/linux/mmdebug.h ++++ b/include/linux/mmdebug.h +@@ -37,10 +37,10 @@ void dump_mm(const struct mm_struct *mm); + BUG(); \ + } \ + } while (0) +-#define VM_WARN_ON(cond) WARN_ON(cond) +-#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) +-#define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) +-#define VM_WARN(cond, format...) WARN(cond, format) ++#define VM_WARN_ON(cond) (void)WARN_ON(cond) ++#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) ++#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) ++#define VM_WARN(cond, format...) (void)WARN(cond, format) + #else + #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) + #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) +-- +2.30.2 + diff --git a/queue-4.14/mm-add-vm_warn_on_once_page-macro.patch b/queue-4.14/mm-add-vm_warn_on_once_page-macro.patch new file mode 100644 index 00000000000..0fc1ddc999f --- /dev/null +++ b/queue-4.14/mm-add-vm_warn_on_once_page-macro.patch @@ -0,0 +1,64 @@ +From 8dded92b9c871ff31bc86a6988cb2c658a781732 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Dec 2020 14:01:31 -0800 +Subject: mm: add VM_WARN_ON_ONCE_PAGE() macro + +From: Alex Shi + +[ Upstream commit a4055888629bc0467d12d912cd7c90acdf3d9b12 part ] + +Add VM_WARN_ON_ONCE_PAGE() macro. + +Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.alibaba.com +Signed-off-by: Alex Shi +Acked-by: Michal Hocko +Acked-by: Hugh Dickins +Acked-by: Johannes Weiner +Cc: Vladimir Davydov +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +Note on stable backport: original commit was titled +mm/memcg: warning on !memcg after readahead page charged +which included uses of this macro in mm/memcontrol.c: here omitted. + +Signed-off-by: Hugh Dickins +Signed-off-by: Sasha Levin +--- + include/linux/mmdebug.h | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h +index 2ad72d2c8cc5..5d0767cb424a 100644 +--- a/include/linux/mmdebug.h ++++ b/include/linux/mmdebug.h +@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm); + BUG(); \ + } \ + } while (0) ++#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ ++ static bool __section(".data.once") __warned; \ ++ int __ret_warn_once = !!(cond); \ ++ \ ++ if (unlikely(__ret_warn_once && !__warned)) { \ ++ dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\ ++ __warned = true; \ ++ WARN_ON(1); \ ++ } \ ++ unlikely(__ret_warn_once); \ ++}) ++ + #define VM_WARN_ON(cond) (void)WARN_ON(cond) + #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) + #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) +@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm); + #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) + #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) + #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) ++#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) + #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) + #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) + #endif +-- +2.30.2 + diff --git a/queue-4.14/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch b/queue-4.14/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch new file mode 100644 index 00000000000..f7867df919e --- /dev/null +++ b/queue-4.14/mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch @@ -0,0 +1,165 @@ +From 282ef53c0d02a97151131fc9573c122a85de2b59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:52 -0700 +Subject: mm, futex: fix shared futex pgoff on shmem huge page + +From: Hugh Dickins + +[ Upstream commit fe19bd3dae3d15d2fbfdb3de8839a6ea0fe94264 ] + +If more than one futex is placed on a shmem huge page, it can happen +that waking the second wakes the first instead, and leaves the second +waiting: the key's shared.pgoff is wrong. + +When 3.11 commit 13d60f4b6ab5 ("futex: Take hugepages into account when +generating futex_key"), the only shared huge pages came from hugetlbfs, +and the code added to deal with its exceptional page->index was put into +hugetlb source. Then that was missed when 4.8 added shmem huge pages. + +page_to_pgoff() is what others use for this nowadays: except that, as +currently written, it gives the right answer on hugetlbfs head, but +nonsense on hugetlbfs tails. Fix that by calling hugetlbfs-specific +hugetlb_basepage_index() on PageHuge tails as well as on head. + +Yes, it's unconventional to declare hugetlb_basepage_index() there in +pagemap.h, rather than in hugetlb.h; but I do not expect anything but +page_to_pgoff() ever to need it. + +[akpm@linux-foundation.org: give hugetlb_basepage_index() prototype the correct scope] + +Link: https://lkml.kernel.org/r/b17d946b-d09-326e-b42a-52884c36df32@google.com +Fixes: 800d8c63b2e9 ("shmem: add huge pages support") +Reported-by: Neel Natu +Signed-off-by: Hugh Dickins +Reviewed-by: Matthew Wilcox (Oracle) +Acked-by: Thomas Gleixner +Cc: "Kirill A. Shutemov" +Cc: Zhang Yi +Cc: Mel Gorman +Cc: Mike Kravetz +Cc: Ingo Molnar +Cc: Peter Zijlstra +Cc: Darren Hart +Cc: Davidlohr Bueso +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +Note on stable backport: leave redundant #include +in kernel/futex.c, to avoid conflict over the header files included. + +Signed-off-by: Hugh Dickins +Signed-off-by: Sasha Levin +--- + include/linux/hugetlb.h | 16 ---------------- + include/linux/pagemap.h | 13 +++++++------ + kernel/futex.c | 2 +- + mm/hugetlb.c | 5 +---- + 4 files changed, 9 insertions(+), 27 deletions(-) + +diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h +index fe0ec0a29db7..d2b5cc8ce54f 100644 +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -467,17 +467,6 @@ static inline int hstate_index(struct hstate *h) + return h - hstates; + } + +-pgoff_t __basepage_index(struct page *page); +- +-/* Return page->index in PAGE_SIZE units */ +-static inline pgoff_t basepage_index(struct page *page) +-{ +- if (!PageCompound(page)) +- return page->index; +- +- return __basepage_index(page); +-} +- + extern int dissolve_free_huge_page(struct page *page); + extern int dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn); +@@ -572,11 +561,6 @@ static inline int hstate_index(struct hstate *h) + return 0; + } + +-static inline pgoff_t basepage_index(struct page *page) +-{ +- return page->index; +-} +- + static inline int dissolve_free_huge_page(struct page *page) + { + return 0; +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h +index e08b5339023c..84c7fc7f63e7 100644 +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -399,7 +399,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, + } + + /* +- * Get index of the page with in radix-tree ++ * Get index of the page within radix-tree (but not for hugetlb pages). + * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) + */ + static inline pgoff_t page_to_index(struct page *page) +@@ -418,15 +418,16 @@ static inline pgoff_t page_to_index(struct page *page) + return pgoff; + } + ++extern pgoff_t hugetlb_basepage_index(struct page *page); ++ + /* +- * Get the offset in PAGE_SIZE. +- * (TODO: hugepage should have ->index in PAGE_SIZE) ++ * Get the offset in PAGE_SIZE (even for hugetlb pages). ++ * (TODO: hugetlb pages should have ->index in PAGE_SIZE) + */ + static inline pgoff_t page_to_pgoff(struct page *page) + { +- if (unlikely(PageHeadHuge(page))) +- return page->index << compound_order(page); +- ++ if (unlikely(PageHuge(page))) ++ return hugetlb_basepage_index(page); + return page_to_index(page); + } + +diff --git a/kernel/futex.c b/kernel/futex.c +index af1d9a993988..e282c083df59 100644 +--- a/kernel/futex.c ++++ b/kernel/futex.c +@@ -719,7 +719,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) + + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ + key->shared.i_seq = get_inode_sequence_number(inode); +- key->shared.pgoff = basepage_index(tail); ++ key->shared.pgoff = page_to_pgoff(tail); + rcu_read_unlock(); + } + +diff --git a/mm/hugetlb.c b/mm/hugetlb.c +index 0dc181290d1f..c765fd01f0aa 100644 +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1403,15 +1403,12 @@ int PageHeadHuge(struct page *page_head) + return get_compound_page_dtor(page_head) == free_huge_page; + } + +-pgoff_t __basepage_index(struct page *page) ++pgoff_t hugetlb_basepage_index(struct page *page) + { + struct page *page_head = compound_head(page); + pgoff_t index = page_index(page_head); + unsigned long compound_idx; + +- if (!PageHuge(page_head)) +- return page_index(page); +- + if (compound_order(page_head) >= MAX_ORDER) + compound_idx = page_to_pfn(page) - page_to_pfn(page_head); + else +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch b/queue-4.14/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch new file mode 100644 index 00000000000..e9a3d4b80a9 --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-add-a-level-of-indentation.patch @@ -0,0 +1,160 @@ +From 7d740deb578880c6db6feb6b8f87e7816a295011 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:17 -0700 +Subject: mm: page_vma_mapped_walk(): add a level of indentation + +From: Hugh Dickins + +[ Upstream commit b3807a91aca7d21c05d5790612e49969117a72b9 ] + +page_vma_mapped_walk() cleanup: add a level of indentation to much of +the body, making no functional change in this commit, but reducing the +later diff when this is all converted to a loop. + +[hughd@google.com: : page_vma_mapped_walk(): add a level of indentation fix] + Link: https://lkml.kernel.org/r/7f817555-3ce1-c785-e438-87d8efdcaf26@google.com + +Link: https://lkml.kernel.org/r/efde211-f3e2-fe54-977-ef481419e7f3@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 105 ++++++++++++++++++++++--------------------- + 1 file changed, 55 insertions(+), 50 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 2463ba78959b..911c6dbe85f9 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -168,62 +168,67 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (pvmw->pte) + goto next_pte; + restart: +- pgd = pgd_offset(mm, pvmw->address); +- if (!pgd_present(*pgd)) +- return false; +- p4d = p4d_offset(pgd, pvmw->address); +- if (!p4d_present(*p4d)) +- return false; +- pud = pud_offset(p4d, pvmw->address); +- if (!pud_present(*pud)) +- return false; +- pvmw->pmd = pmd_offset(pud, pvmw->address); +- /* +- * Make sure the pmd value isn't cached in a register by the +- * compiler and used as a stale value after we've observed a +- * subsequent update. +- */ +- pmde = READ_ONCE(*pvmw->pmd); +- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { +- pvmw->ptl = pmd_lock(mm, pvmw->pmd); +- pmde = *pvmw->pmd; +- if (likely(pmd_trans_huge(pmde))) { +- if (pvmw->flags & PVMW_MIGRATION) +- return not_found(pvmw); +- if (pmd_page(pmde) != page) +- return not_found(pvmw); +- return true; +- } +- if (!pmd_present(pmde)) { +- swp_entry_t entry; ++ { ++ pgd = pgd_offset(mm, pvmw->address); ++ if (!pgd_present(*pgd)) ++ return false; ++ p4d = p4d_offset(pgd, pvmw->address); ++ if (!p4d_present(*p4d)) ++ return false; ++ pud = pud_offset(p4d, pvmw->address); ++ if (!pud_present(*pud)) ++ return false; + +- if (!thp_migration_supported() || +- !(pvmw->flags & PVMW_MIGRATION)) +- return not_found(pvmw); +- entry = pmd_to_swp_entry(pmde); +- if (!is_migration_entry(entry) || +- migration_entry_to_page(entry) != page) +- return not_found(pvmw); +- return true; +- } +- /* THP pmd was split under us: handle on pte level */ +- spin_unlock(pvmw->ptl); +- pvmw->ptl = NULL; +- } else if (!pmd_present(pmde)) { ++ pvmw->pmd = pmd_offset(pud, pvmw->address); + /* +- * If PVMW_SYNC, take and drop THP pmd lock so that we +- * cannot return prematurely, while zap_huge_pmd() has +- * cleared *pmd but not decremented compound_mapcount(). ++ * Make sure the pmd value isn't cached in a register by the ++ * compiler and used as a stale value after we've observed a ++ * subsequent update. + */ +- if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) { +- spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); ++ pmde = READ_ONCE(*pvmw->pmd); ++ ++ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { ++ pvmw->ptl = pmd_lock(mm, pvmw->pmd); ++ pmde = *pvmw->pmd; ++ if (likely(pmd_trans_huge(pmde))) { ++ if (pvmw->flags & PVMW_MIGRATION) ++ return not_found(pvmw); ++ if (pmd_page(pmde) != page) ++ return not_found(pvmw); ++ return true; ++ } ++ if (!pmd_present(pmde)) { ++ swp_entry_t entry; ++ ++ if (!thp_migration_supported() || ++ !(pvmw->flags & PVMW_MIGRATION)) ++ return not_found(pvmw); ++ entry = pmd_to_swp_entry(pmde); ++ if (!is_migration_entry(entry) || ++ migration_entry_to_page(entry) != page) ++ return not_found(pvmw); ++ return true; ++ } ++ /* THP pmd was split under us: handle on pte level */ ++ spin_unlock(pvmw->ptl); ++ pvmw->ptl = NULL; ++ } else if (!pmd_present(pmde)) { ++ /* ++ * If PVMW_SYNC, take and drop THP pmd lock so that we ++ * cannot return prematurely, while zap_huge_pmd() has ++ * cleared *pmd but not decremented compound_mapcount(). ++ */ ++ if ((pvmw->flags & PVMW_SYNC) && ++ PageTransCompound(page)) { ++ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + +- spin_unlock(ptl); ++ spin_unlock(ptl); ++ } ++ return false; + } +- return false; ++ if (!map_pte(pvmw)) ++ goto next_pte; + } +- if (!map_pte(pvmw)) +- goto next_pte; + while (1) { + unsigned long end; + +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch b/queue-4.14/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch new file mode 100644 index 00000000000..6e3bcf62a47 --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-crossing-page-table-boundary.patch @@ -0,0 +1,61 @@ +From a3934d9ec623b8dea7f1ecc86c8aac617b990cd8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:14 -0700 +Subject: mm: page_vma_mapped_walk(): crossing page table boundary + +From: Hugh Dickins + +[ Upstream commit 448282487483d6fa5b2eeeafaa0acc681e544a9c ] + +page_vma_mapped_walk() cleanup: adjust the test for crossing page table +boundary - I believe pvmw->address is always page-aligned, but nothing +else here assumed that; and remember to reset pvmw->pte to NULL after +unmapping the page table, though I never saw any bug from that. + +Link: https://lkml.kernel.org/r/799b3f9c-2a9e-dfef-5d89-26e9f76fd97@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 92d7f574b8ab..2463ba78959b 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -239,16 +239,16 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (pvmw->address >= end) + return not_found(pvmw); + /* Did we cross page table boundary? */ +- if (pvmw->address % PMD_SIZE == 0) { +- pte_unmap(pvmw->pte); ++ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { + if (pvmw->ptl) { + spin_unlock(pvmw->ptl); + pvmw->ptl = NULL; + } ++ pte_unmap(pvmw->pte); ++ pvmw->pte = NULL; + goto restart; +- } else { +- pvmw->pte++; + } ++ pvmw->pte++; + } while (pte_none(*pvmw->pte)); + + if (!pvmw->ptl) { +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch b/queue-4.14/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch new file mode 100644 index 00000000000..b5bc6a1768c --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch @@ -0,0 +1,68 @@ +From f7be95c11847d4399325bb317b16dc50daeb962a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:23 -0700 +Subject: mm: page_vma_mapped_walk(): get vma_address_end() earlier + +From: Hugh Dickins + +[ Upstream commit a765c417d876cc635f628365ec9aa6f09470069a ] + +page_vma_mapped_walk() cleanup: get THP's vma_address_end() at the +start, rather than later at next_pte. + +It's a little unnecessary overhead on the first call, but makes for a +simpler loop in the following commit. + +Link: https://lkml.kernel.org/r/4542b34d-862f-7cb4-bb22-e0df6ce830a2@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index f6c750539a6b..96d4c4738590 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -166,6 +166,15 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + return true; + } + ++ /* ++ * Seek to next pte only makes sense for THP. ++ * But more important than that optimization, is to filter out ++ * any PageKsm page: whose page->index misleads vma_address() ++ * and vma_address_end() to disaster. ++ */ ++ end = PageTransCompound(page) ? ++ vma_address_end(page, pvmw->vma) : ++ pvmw->address + PAGE_SIZE; + if (pvmw->pte) + goto next_pte; + restart: +@@ -233,10 +242,6 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (check_pte(pvmw)) + return true; + next_pte: +- /* Seek to next pte only makes sense for THP */ +- if (!PageTransHuge(page)) +- return not_found(pvmw); +- end = vma_address_end(page, pvmw->vma); + do { + pvmw->address += PAGE_SIZE; + if (pvmw->address >= end) +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch b/queue-4.14/mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch new file mode 100644 index 00000000000..748a2dadd9e --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch @@ -0,0 +1,80 @@ +From d4e20c7bea4b602253be8e27d034a307bc70aa51 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:10 -0700 +Subject: mm: page_vma_mapped_walk(): prettify PVMW_MIGRATION block + +From: Hugh Dickins + +[ Upstream commit e2e1d4076c77b3671cf8ce702535ae7dee3acf89 ] + +page_vma_mapped_walk() cleanup: rearrange the !pmd_present() block to +follow the same "return not_found, return not_found, return true" +pattern as the block above it (note: returning not_found there is never +premature, since existence or prior existence of huge pmd guarantees +good alignment). + +Link: https://lkml.kernel.org/r/378c8650-1488-2edf-9647-32a53cf2e21@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 30 ++++++++++++++---------------- + 1 file changed, 14 insertions(+), 16 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 8a6af4007c7e..92d7f574b8ab 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -193,24 +193,22 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (pmd_page(pmde) != page) + return not_found(pvmw); + return true; +- } else if (!pmd_present(pmde)) { +- if (thp_migration_supported()) { +- if (!(pvmw->flags & PVMW_MIGRATION)) +- return not_found(pvmw); +- if (is_migration_entry(pmd_to_swp_entry(pmde))) { +- swp_entry_t entry = pmd_to_swp_entry(pmde); ++ } ++ if (!pmd_present(pmde)) { ++ swp_entry_t entry; + +- if (migration_entry_to_page(entry) != page) +- return not_found(pvmw); +- return true; +- } +- } +- return not_found(pvmw); +- } else { +- /* THP pmd was split under us: handle on pte level */ +- spin_unlock(pvmw->ptl); +- pvmw->ptl = NULL; ++ if (!thp_migration_supported() || ++ !(pvmw->flags & PVMW_MIGRATION)) ++ return not_found(pvmw); ++ entry = pmd_to_swp_entry(pmde); ++ if (!is_migration_entry(entry) || ++ migration_entry_to_page(entry) != page) ++ return not_found(pvmw); ++ return true; + } ++ /* THP pmd was split under us: handle on pte level */ ++ spin_unlock(pvmw->ptl); ++ pvmw->ptl = NULL; + } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch b/queue-4.14/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch new file mode 100644 index 00000000000..e9b8967d1ff --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch @@ -0,0 +1,73 @@ +From e91dff2640beec5309275d7933848bac8c32c101 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:04 -0700 +Subject: mm: page_vma_mapped_walk(): settle PageHuge on entry + +From: Hugh Dickins + +[ Upstream commit 6d0fd5987657cb0c9756ce684e3a74c0f6351728 ] + +page_vma_mapped_walk() cleanup: get the hugetlbfs PageHuge case out of +the way at the start, so no need to worry about it later. + +Link: https://lkml.kernel.org/r/e31a483c-6d73-a6bb-26c5-43c3b880a2@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Alistair Popple +Cc: "Kirill A. Shutemov" +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 3cff784019c1..bdb63aafc737 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -147,10 +147,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (pvmw->pmd && !pvmw->pte) + return not_found(pvmw); + +- if (pvmw->pte) +- goto next_pte; +- + if (unlikely(PageHuge(page))) { ++ /* The only possible mapping was handled on last iteration */ ++ if (pvmw->pte) ++ return not_found(pvmw); ++ + /* when pud is not present, pte will be NULL */ + pvmw->pte = huge_pte_offset(mm, pvmw->address, + PAGE_SIZE << compound_order(page)); +@@ -163,6 +164,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + return not_found(pvmw); + return true; + } ++ ++ if (pvmw->pte) ++ goto next_pte; + restart: + pgd = pgd_offset(mm, pvmw->address); + if (!pgd_present(*pgd)) +@@ -228,7 +232,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + return true; + next_pte: + /* Seek to next pte only makes sense for THP */ +- if (!PageTransHuge(page) || PageHuge(page)) ++ if (!PageTransHuge(page)) + return not_found(pvmw); + end = vma_address_end(page, pvmw->vma); + do { +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch b/queue-4.14/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch new file mode 100644 index 00000000000..5d0c9492047 --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch @@ -0,0 +1,66 @@ +From 2aab14fa4f5c330a41c5dfe18bc83d18b6a02063 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:20 -0700 +Subject: mm: page_vma_mapped_walk(): use goto instead of while (1) + +From: Hugh Dickins + +[ Upstream commit 474466301dfd8b39a10c01db740645f3f7ae9a28 ] + +page_vma_mapped_walk() cleanup: add a label this_pte, matching next_pte, +and use "goto this_pte", in place of the "while (1)" loop at the end. + +Link: https://lkml.kernel.org/r/a52b234a-851-3616-2525-f42736e8934@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 911c6dbe85f9..f6c750539a6b 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -138,6 +138,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + { + struct mm_struct *mm = pvmw->vma->vm_mm; + struct page *page = pvmw->page; ++ unsigned long end; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; +@@ -228,10 +229,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + } + if (!map_pte(pvmw)) + goto next_pte; +- } +- while (1) { +- unsigned long end; +- ++this_pte: + if (check_pte(pvmw)) + return true; + next_pte: +@@ -260,6 +258,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + pvmw->ptl = pte_lockptr(mm, pvmw->pmd); + spin_lock(pvmw->ptl); + } ++ goto this_pte; + } + } + +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch b/queue-4.14/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch new file mode 100644 index 00000000000..25981d43292 --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch @@ -0,0 +1,78 @@ +From aa84a964e28e7feed6e5191876ad4526c9fe6aea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:01 -0700 +Subject: mm: page_vma_mapped_walk(): use page for pvmw->page + +From: Hugh Dickins + +[ Upstream commit f003c03bd29e6f46fef1b9a8e8d636ac732286d5 ] + +Patch series "mm: page_vma_mapped_walk() cleanup and THP fixes". + +I've marked all of these for stable: many are merely cleanups, but I +think they are much better before the main fix than after. + +This patch (of 11): + +page_vma_mapped_walk() cleanup: sometimes the local copy of pvwm->page +was used, sometimes pvmw->page itself: use the local copy "page" +throughout. + +Link: https://lkml.kernel.org/r/589b358c-febc-c88e-d4c2-7834b37fa7bf@google.com +Link: https://lkml.kernel.org/r/88e67645-f467-c279-bf5e-af4b5c6b13eb@google.com +Signed-off-by: Hugh Dickins +Reviewed-by: Alistair Popple +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Yang Shi +Cc: Wang Yugui +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Zi Yan +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 340207ba3743..3cff784019c1 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -150,7 +150,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (pvmw->pte) + goto next_pte; + +- if (unlikely(PageHuge(pvmw->page))) { ++ if (unlikely(PageHuge(page))) { + /* when pud is not present, pte will be NULL */ + pvmw->pte = huge_pte_offset(mm, pvmw->address, + PAGE_SIZE << compound_order(page)); +@@ -212,8 +212,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ +- if ((pvmw->flags & PVMW_SYNC) && +- PageTransCompound(pvmw->page)) { ++ if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); +@@ -229,9 +228,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + return true; + next_pte: + /* Seek to next pte only makes sense for THP */ +- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) ++ if (!PageTransHuge(page) || PageHuge(page)) + return not_found(pvmw); +- end = vma_address_end(pvmw->page, pvmw->vma); ++ end = vma_address_end(page, pvmw->vma); + do { + pvmw->address += PAGE_SIZE; + if (pvmw->address >= end) +-- +2.30.2 + diff --git a/queue-4.14/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch b/queue-4.14/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch new file mode 100644 index 00000000000..134696ede03 --- /dev/null +++ b/queue-4.14/mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch @@ -0,0 +1,63 @@ +From f60a41a44158e5e3a7826a9da9dc2a416dd2f07b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:07 -0700 +Subject: mm: page_vma_mapped_walk(): use pmde for *pvmw->pmd + +From: Hugh Dickins + +[ Upstream commit 3306d3119ceacc43ea8b141a73e21fea68eec30c ] + +page_vma_mapped_walk() cleanup: re-evaluate pmde after taking lock, then +use it in subsequent tests, instead of repeatedly dereferencing pointer. + +Link: https://lkml.kernel.org/r/53fbc9d-891e-46b2-cb4b-468c3b19238e@google.com +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Reviewed-by: Peter Xu +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index bdb63aafc737..8a6af4007c7e 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -186,18 +186,19 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + pmde = READ_ONCE(*pvmw->pmd); + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { + pvmw->ptl = pmd_lock(mm, pvmw->pmd); +- if (likely(pmd_trans_huge(*pvmw->pmd))) { ++ pmde = *pvmw->pmd; ++ if (likely(pmd_trans_huge(pmde))) { + if (pvmw->flags & PVMW_MIGRATION) + return not_found(pvmw); +- if (pmd_page(*pvmw->pmd) != page) ++ if (pmd_page(pmde) != page) + return not_found(pvmw); + return true; +- } else if (!pmd_present(*pvmw->pmd)) { ++ } else if (!pmd_present(pmde)) { + if (thp_migration_supported()) { + if (!(pvmw->flags & PVMW_MIGRATION)) + return not_found(pvmw); +- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { +- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); ++ if (is_migration_entry(pmd_to_swp_entry(pmde))) { ++ swp_entry_t entry = pmd_to_swp_entry(pmde); + + if (migration_entry_to_page(entry) != page) + return not_found(pvmw); +-- +2.30.2 + diff --git a/queue-4.14/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch b/queue-4.14/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch new file mode 100644 index 00000000000..32c1817cf4d --- /dev/null +++ b/queue-4.14/mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch @@ -0,0 +1,36 @@ +From 4b798265f1b86f30c79f18201638e845803ef085 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Feb 2021 17:17:56 -0800 +Subject: mm/rmap: remove unneeded semicolon in page_not_mapped() + +From: Miaohe Lin + +[ Upstream commit e0af87ff7afcde2660be44302836d2d5618185af ] + +Remove extra semicolon without any functional change intended. + +Link: https://lkml.kernel.org/r/20210127093425.39640-1-linmiaohe@huawei.com +Signed-off-by: Miaohe Lin +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/rmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/rmap.c b/mm/rmap.c +index 8bd2ddd8febd..e2506b6adb6a 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1671,7 +1671,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) + static int page_not_mapped(struct page *page) + { + return !page_mapped(page); +-}; ++} + + /** + * try_to_munlock - try to munlock a page +-- +2.30.2 + diff --git a/queue-4.14/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch b/queue-4.14/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch new file mode 100644 index 00000000000..51b4d11fa82 --- /dev/null +++ b/queue-4.14/mm-rmap-use-page_not_mapped-in-try_to_unmap.patch @@ -0,0 +1,65 @@ +From f77a2f7ab2a465fd416dcd53d23326d776e13131 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 25 Feb 2021 17:18:03 -0800 +Subject: mm/rmap: use page_not_mapped in try_to_unmap() + +From: Miaohe Lin + +[ Upstream commit b7e188ec98b1644ff70a6d3624ea16aadc39f5e0 ] + +page_mapcount_is_zero() calculates accurately how many mappings a hugepage +has in order to check against 0 only. This is a waste of cpu time. We +can do this via page_not_mapped() to save some possible atomic_read +cycles. Remove the function page_mapcount_is_zero() as it's not used +anymore and move page_not_mapped() above try_to_unmap() to avoid +identifier undeclared compilation error. + +Link: https://lkml.kernel.org/r/20210130084904.35307-1-linmiaohe@huawei.com +Signed-off-by: Miaohe Lin +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/rmap.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/mm/rmap.c b/mm/rmap.c +index e2506b6adb6a..e6a556fec9d1 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1624,9 +1624,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) + return is_vma_temporary_stack(vma); + } + +-static int page_mapcount_is_zero(struct page *page) ++static int page_not_mapped(struct page *page) + { +- return !total_mapcount(page); ++ return !page_mapped(page); + } + + /** +@@ -1644,7 +1644,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) + struct rmap_walk_control rwc = { + .rmap_one = try_to_unmap_one, + .arg = (void *)flags, +- .done = page_mapcount_is_zero, ++ .done = page_not_mapped, + .anon_lock = page_lock_anon_vma_read, + }; + +@@ -1668,11 +1668,6 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) + return !page_mapcount(page) ? true : false; + } + +-static int page_not_mapped(struct page *page) +-{ +- return !page_mapped(page); +-} +- + /** + * try_to_munlock - try to munlock a page + * @page: the page to be munlocked +-- +2.30.2 + diff --git a/queue-4.14/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch b/queue-4.14/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch new file mode 100644 index 00000000000..561f7f205e4 --- /dev/null +++ b/queue-4.14/mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch @@ -0,0 +1,53 @@ +From 4cd201ad0ceb1facb60176b5df0ddd52869812dc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:30 -0700 +Subject: mm/thp: another PVMW_SYNC fix in page_vma_mapped_walk() + +From: Hugh Dickins + +[ Upstream commit a7a69d8ba88d8dcee7ef00e91d413a4bd003a814 ] + +Aha! Shouldn't that quick scan over pte_none()s make sure that it holds +ptlock in the PVMW_SYNC case? That too might have been responsible for +BUGs or WARNs in split_huge_page_to_list() or its unmap_page(), though +I've never seen any. + +Link: https://lkml.kernel.org/r/1bdf384c-8137-a149-2a1e-475a4791c3c@google.com +Link: https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/ +Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Tested-by: Wang Yugui +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 16adeef76d00..a612daef5f00 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -271,6 +271,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + goto restart; + } + pvmw->pte++; ++ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { ++ pvmw->ptl = pte_lockptr(mm, pvmw->pmd); ++ spin_lock(pvmw->ptl); ++ } + } while (pte_none(*pvmw->pte)); + + if (!pvmw->ptl) { +-- +2.30.2 + diff --git a/queue-4.14/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch b/queue-4.14/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch new file mode 100644 index 00000000000..96f8b6a8e2f --- /dev/null +++ b/queue-4.14/mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch @@ -0,0 +1,66 @@ +From 8ac5455b753eeb27f78f0391626ad9c2f2cb2f37 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Jun 2021 18:24:00 -0700 +Subject: mm/thp: fix page_address_in_vma() on file THP tails + +From: Jue Wang + +[ Upstream commit 31657170deaf1d8d2f6a1955fbc6fa9d228be036 ] + +Anon THP tails were already supported, but memory-failure may need to +use page_address_in_vma() on file THP tails, which its page->mapping +check did not permit: fix it. + +hughd adds: no current usage is known to hit the issue, but this does +fix a subtle trap in a general helper: best fixed in stable sooner than +later. + +Link: https://lkml.kernel.org/r/a0d9b53-bf5d-8bab-ac5-759dc61819c1@google.com +Fixes: 800d8c63b2e9 ("shmem: add huge pages support") +Signed-off-by: Jue Wang +Signed-off-by: Hugh Dickins +Reviewed-by: Matthew Wilcox (Oracle) +Reviewed-by: Yang Shi +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Jan Kara +Cc: Miaohe Lin +Cc: Minchan Kim +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Shakeel Butt +Cc: Wang Yugui +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/rmap.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/mm/rmap.c b/mm/rmap.c +index bebe29a2c5f2..8ed8ec113d5a 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -695,11 +695,11 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) + if (!vma->anon_vma || !page__anon_vma || + vma->anon_vma->root != page__anon_vma->root) + return -EFAULT; +- } else if (page->mapping) { +- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) +- return -EFAULT; +- } else ++ } else if (!vma->vm_file) { ++ return -EFAULT; ++ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { + return -EFAULT; ++ } + + return vma_address(page, vma); + } +-- +2.30.2 + diff --git a/queue-4.14/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch b/queue-4.14/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch new file mode 100644 index 00000000000..a5a79ff0c3d --- /dev/null +++ b/queue-4.14/mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch @@ -0,0 +1,126 @@ +From 8c4e927453029fc06914f8984bc2e4f9c093670f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Jun 2021 18:39:26 -0700 +Subject: mm/thp: fix page_vma_mapped_walk() if THP mapped by ptes + +From: Hugh Dickins + +[ Upstream commit a9a7504d9beaf395481faa91e70e2fd08f7a3dde ] + +Running certain tests with a DEBUG_VM kernel would crash within hours, +on the total_mapcount BUG() in split_huge_page_to_list(), while trying +to free up some memory by punching a hole in a shmem huge page: split's +try_to_unmap() was unable to find all the mappings of the page (which, +on a !DEBUG_VM kernel, would then keep the huge page pinned in memory). + +Crash dumps showed two tail pages of a shmem huge page remained mapped +by pte: ptes in a non-huge-aligned vma of a gVisor process, at the end +of a long unmapped range; and no page table had yet been allocated for +the head of the huge page to be mapped into. + +Although designed to handle these odd misaligned huge-page-mapped-by-pte +cases, page_vma_mapped_walk() falls short by returning false prematurely +when !pmd_present or !pud_present or !p4d_present or !pgd_present: there +are cases when a huge page may span the boundary, with ptes present in +the next. + +Restructure page_vma_mapped_walk() as a loop to continue in these cases, +while keeping its layout much as before. Add a step_forward() helper to +advance pvmw->address across those boundaries: originally I tried to use +mm's standard p?d_addr_end() macros, but hit the same crash 512 times +less often: because of the way redundant levels are folded together, but +folded differently in different configurations, it was just too +difficult to use them correctly; and step_forward() is simpler anyway. + +Link: https://lkml.kernel.org/r/fedb8632-1798-de42-f39e-873551d5bc81@google.com +Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Matthew Wilcox +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Wang Yugui +Cc: Will Deacon +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + mm/page_vma_mapped.c | 34 +++++++++++++++++++++++++--------- + 1 file changed, 25 insertions(+), 9 deletions(-) + +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 96d4c4738590..16adeef76d00 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -110,6 +110,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) + return true; + } + ++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) ++{ ++ pvmw->address = (pvmw->address + size) & ~(size - 1); ++ if (!pvmw->address) ++ pvmw->address = ULONG_MAX; ++} ++ + /** + * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at + * @pvmw->address +@@ -178,16 +185,22 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (pvmw->pte) + goto next_pte; + restart: +- { ++ do { + pgd = pgd_offset(mm, pvmw->address); +- if (!pgd_present(*pgd)) +- return false; ++ if (!pgd_present(*pgd)) { ++ step_forward(pvmw, PGDIR_SIZE); ++ continue; ++ } + p4d = p4d_offset(pgd, pvmw->address); +- if (!p4d_present(*p4d)) +- return false; ++ if (!p4d_present(*p4d)) { ++ step_forward(pvmw, P4D_SIZE); ++ continue; ++ } + pud = pud_offset(p4d, pvmw->address); +- if (!pud_present(*pud)) +- return false; ++ if (!pud_present(*pud)) { ++ step_forward(pvmw, PUD_SIZE); ++ continue; ++ } + + pvmw->pmd = pmd_offset(pud, pvmw->address); + /* +@@ -234,7 +247,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + + spin_unlock(ptl); + } +- return false; ++ step_forward(pvmw, PMD_SIZE); ++ continue; + } + if (!map_pte(pvmw)) + goto next_pte; +@@ -264,7 +278,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + spin_lock(pvmw->ptl); + } + goto this_pte; +- } ++ } while (pvmw->address < end); ++ ++ return false; + } + + /** +-- +2.30.2 + diff --git a/queue-4.14/mm-thp-fix-vma_address-if-virtual-address-below-file.patch b/queue-4.14/mm-thp-fix-vma_address-if-virtual-address-below-file.patch new file mode 100644 index 00000000000..6ecb2bf878e --- /dev/null +++ b/queue-4.14/mm-thp-fix-vma_address-if-virtual-address-below-file.patch @@ -0,0 +1,259 @@ +From 32ee0628da926ae1cadb95196361e702f2a9122c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Jun 2021 18:23:56 -0700 +Subject: mm/thp: fix vma_address() if virtual address below file offset + +From: Hugh Dickins + +[ Upstream commit 494334e43c16d63b878536a26505397fce6ff3a2 ] + +Running certain tests with a DEBUG_VM kernel would crash within hours, +on the total_mapcount BUG() in split_huge_page_to_list(), while trying +to free up some memory by punching a hole in a shmem huge page: split's +try_to_unmap() was unable to find all the mappings of the page (which, +on a !DEBUG_VM kernel, would then keep the huge page pinned in memory). + +When that BUG() was changed to a WARN(), it would later crash on the +VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma) in +mm/internal.h:vma_address(), used by rmap_walk_file() for +try_to_unmap(). + +vma_address() is usually correct, but there's a wraparound case when the +vm_start address is unusually low, but vm_pgoff not so low: +vma_address() chooses max(start, vma->vm_start), but that decides on the +wrong address, because start has become almost ULONG_MAX. + +Rewrite vma_address() to be more careful about vm_pgoff; move the +VM_BUG_ON_VMA() out of it, returning -EFAULT for errors, so that it can +be safely used from page_mapped_in_vma() and page_address_in_vma() too. + +Add vma_address_end() to apply similar care to end address calculation, +in page_vma_mapped_walk() and page_mkclean_one() and try_to_unmap_one(); +though it raises a question of whether callers would do better to supply +pvmw->end to page_vma_mapped_walk() - I chose not, for a smaller patch. + +An irritation is that their apparent generality breaks down on KSM +pages, which cannot be located by the page->index that page_to_pgoff() +uses: as commit 4b0ece6fa016 ("mm: migrate: fix remove_migration_pte() +for ksm pages") once discovered. I dithered over the best thing to do +about that, and have ended up with a VM_BUG_ON_PAGE(PageKsm) in both +vma_address() and vma_address_end(); though the only place in danger of +using it on them was try_to_unmap_one(). + +Sidenote: vma_address() and vma_address_end() now use compound_nr() on a +head page, instead of thp_size(): to make the right calculation on a +hugetlbfs page, whether or not THPs are configured. try_to_unmap() is +used on hugetlbfs pages, but perhaps the wrong calculation never +mattered. + +Link: https://lkml.kernel.org/r/caf1c1a3-7cfb-7f8f-1beb-ba816e932825@google.com +Fixes: a8fa41ad2f6f ("mm, rmap: check all VMAs that PTE-mapped THP can be part of") +Signed-off-by: Hugh Dickins +Acked-by: Kirill A. Shutemov +Cc: Alistair Popple +Cc: Jan Kara +Cc: Jue Wang +Cc: "Matthew Wilcox (Oracle)" +Cc: Miaohe Lin +Cc: Minchan Kim +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Shakeel Butt +Cc: Wang Yugui +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +Note on stable backport: fixed up conflicts on intervening thp_size(), +and mmu_notifier_range initializations; substitute for compound_nr(). + +Signed-off-by: Hugh Dickins +Signed-off-by: Sasha Levin +--- + mm/internal.h | 53 ++++++++++++++++++++++++++++++++------------ + mm/page_vma_mapped.c | 16 +++++-------- + mm/rmap.c | 14 ++++++------ + 3 files changed, 52 insertions(+), 31 deletions(-) + +diff --git a/mm/internal.h b/mm/internal.h +index a182506242c4..97c8e896cd2f 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -330,27 +330,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) + extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); + + /* +- * At what user virtual address is page expected in @vma? ++ * At what user virtual address is page expected in vma? ++ * Returns -EFAULT if all of the page is outside the range of vma. ++ * If page is a compound head, the entire compound page is considered. + */ + static inline unsigned long +-__vma_address(struct page *page, struct vm_area_struct *vma) ++vma_address(struct page *page, struct vm_area_struct *vma) + { +- pgoff_t pgoff = page_to_pgoff(page); +- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); ++ pgoff_t pgoff; ++ unsigned long address; ++ ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ ++ pgoff = page_to_pgoff(page); ++ if (pgoff >= vma->vm_pgoff) { ++ address = vma->vm_start + ++ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); ++ /* Check for address beyond vma (or wrapped through 0?) */ ++ if (address < vma->vm_start || address >= vma->vm_end) ++ address = -EFAULT; ++ } else if (PageHead(page) && ++ pgoff + (1UL << compound_order(page)) - 1 >= vma->vm_pgoff) { ++ /* Test above avoids possibility of wrap to 0 on 32-bit */ ++ address = vma->vm_start; ++ } else { ++ address = -EFAULT; ++ } ++ return address; + } + ++/* ++ * Then at what user virtual address will none of the page be found in vma? ++ * Assumes that vma_address() already returned a good starting address. ++ * If page is a compound head, the entire compound page is considered. ++ */ + static inline unsigned long +-vma_address(struct page *page, struct vm_area_struct *vma) ++vma_address_end(struct page *page, struct vm_area_struct *vma) + { +- unsigned long start, end; +- +- start = __vma_address(page, vma); +- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1); +- +- /* page should be within @vma mapping range */ +- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); +- +- return max(start, vma->vm_start); ++ pgoff_t pgoff; ++ unsigned long address; ++ ++ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ ++ pgoff = page_to_pgoff(page) + (1UL << compound_order(page)); ++ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); ++ /* Check for address beyond vma (or wrapped through 0?) */ ++ if (address < vma->vm_start || address > vma->vm_end) ++ address = vma->vm_end; ++ return address; + } + + #else /* !CONFIG_MMU */ +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index 31879f2175d0..340207ba3743 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -223,18 +223,18 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + if (!map_pte(pvmw)) + goto next_pte; + while (1) { ++ unsigned long end; ++ + if (check_pte(pvmw)) + return true; + next_pte: + /* Seek to next pte only makes sense for THP */ + if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) + return not_found(pvmw); ++ end = vma_address_end(pvmw->page, pvmw->vma); + do { + pvmw->address += PAGE_SIZE; +- if (pvmw->address >= pvmw->vma->vm_end || +- pvmw->address >= +- __vma_address(pvmw->page, pvmw->vma) + +- hpage_nr_pages(pvmw->page) * PAGE_SIZE) ++ if (pvmw->address >= end) + return not_found(pvmw); + /* Did we cross page table boundary? */ + if (pvmw->address % PMD_SIZE == 0) { +@@ -272,14 +272,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) + .vma = vma, + .flags = PVMW_SYNC, + }; +- unsigned long start, end; +- +- start = __vma_address(page, vma); +- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1); + +- if (unlikely(end < vma->vm_start || start >= vma->vm_end)) ++ pvmw.address = vma_address(page, vma); ++ if (pvmw.address == -EFAULT) + return 0; +- pvmw.address = max(start, vma->vm_start); + if (!page_vma_mapped_walk(&pvmw)) + return 0; + page_vma_mapped_walk_done(&pvmw); +diff --git a/mm/rmap.c b/mm/rmap.c +index b6571c739723..bebe29a2c5f2 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -686,7 +686,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) + */ + unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) + { +- unsigned long address; + if (PageAnon(page)) { + struct anon_vma *page__anon_vma = page_anon_vma(page); + /* +@@ -701,10 +700,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) + return -EFAULT; + } else + return -EFAULT; +- address = __vma_address(page, vma); +- if (unlikely(address < vma->vm_start || address >= vma->vm_end)) +- return -EFAULT; +- return address; ++ ++ return vma_address(page, vma); + } + + pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) +@@ -896,7 +893,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, + * We have to assume the worse case ie pmd for invalidation. Note that + * the page can not be free from this function. + */ +- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); ++ end = vma_address_end(page, vma); + mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); + + while (page_vma_mapped_walk(&pvmw)) { +@@ -1374,7 +1371,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + * Note that the page can not be free in this function as call of + * try_to_unmap() must hold a reference on the page. + */ +- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); ++ end = PageKsm(page) ? ++ address + PAGE_SIZE : vma_address_end(page, vma); + if (PageHuge(page)) { + /* + * If sharing is possible, start and end will be adjusted +@@ -1777,6 +1775,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, + struct vm_area_struct *vma = avc->vma; + unsigned long address = vma_address(page, vma); + ++ VM_BUG_ON_VMA(address == -EFAULT, vma); + cond_resched(); + + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) +@@ -1831,6 +1830,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, + pgoff_start, pgoff_end) { + unsigned long address = vma_address(page, vma); + ++ VM_BUG_ON_VMA(address == -EFAULT, vma); + cond_resched(); + + if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) +-- +2.30.2 + diff --git a/queue-4.14/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch b/queue-4.14/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch new file mode 100644 index 00000000000..fdc95d73846 --- /dev/null +++ b/queue-4.14/mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch @@ -0,0 +1,123 @@ +From 7175ca692c3b7e191de52a2f757d0f0f8336be7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Jun 2021 18:24:07 -0700 +Subject: mm: thp: replace DEBUG_VM BUG with VM_WARN when unmap fails for split + +From: Yang Shi + +[ Upstream commit 504e070dc08f757bccaed6d05c0f53ecbfac8a23 ] + +When debugging the bug reported by Wang Yugui [1], try_to_unmap() may +fail, but the first VM_BUG_ON_PAGE() just checks page_mapcount() however +it may miss the failure when head page is unmapped but other subpage is +mapped. Then the second DEBUG_VM BUG() that check total mapcount would +catch it. This may incur some confusion. + +As this is not a fatal issue, so consolidate the two DEBUG_VM checks +into one VM_WARN_ON_ONCE_PAGE(). + +[1] https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/ + +Link: https://lkml.kernel.org/r/d0f0db68-98b8-ebfb-16dc-f29df24cf012@google.com +Signed-off-by: Yang Shi +Reviewed-by: Zi Yan +Acked-by: Kirill A. Shutemov +Signed-off-by: Hugh Dickins +Cc: Alistair Popple +Cc: Jan Kara +Cc: Jue Wang +Cc: "Matthew Wilcox (Oracle)" +Cc: Miaohe Lin +Cc: Minchan Kim +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Shakeel Butt +Cc: Wang Yugui +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +Note on stable backport: fixed up variables, split_queue_lock, tree_lock +in split_huge_page_to_list(), and conflict on ttu_flags in unmap_page(). + +Signed-off-by: Hugh Dickins +Signed-off-by: Sasha Levin +--- + mm/huge_memory.c | 24 +++++++----------------- + 1 file changed, 7 insertions(+), 17 deletions(-) + +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 5705ccff3e7f..972893908bcd 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2325,15 +2325,15 @@ static void unmap_page(struct page *page) + { + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | + TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC; +- bool unmap_success; + + VM_BUG_ON_PAGE(!PageHead(page), page); + + if (PageAnon(page)) + ttu_flags |= TTU_SPLIT_FREEZE; + +- unmap_success = try_to_unmap(page, ttu_flags); +- VM_BUG_ON_PAGE(!unmap_success, page); ++ try_to_unmap(page, ttu_flags); ++ ++ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page); + } + + static void remap_page(struct page *page) +@@ -2586,7 +2586,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); + struct anon_vma *anon_vma = NULL; + struct address_space *mapping = NULL; +- int count, mapcount, extra_pins, ret; ++ int extra_pins, ret; + bool mlocked; + unsigned long flags; + pgoff_t end; +@@ -2648,7 +2648,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + + mlocked = PageMlocked(page); + unmap_page(head); +- VM_BUG_ON_PAGE(compound_mapcount(head), head); + + /* Make sure the page is not on per-CPU pagevec as it takes pin */ + if (mlocked) +@@ -2674,9 +2673,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + + /* Prevent deferred_split_scan() touching ->_refcount */ + spin_lock(&pgdata->split_queue_lock); +- count = page_count(head); +- mapcount = total_mapcount(head); +- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { ++ if (page_ref_freeze(head, 1 + extra_pins)) { + if (!list_empty(page_deferred_list(head))) { + pgdata->split_queue_len--; + list_del(page_deferred_list(head)); +@@ -2692,16 +2689,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) + } else + ret = 0; + } else { +- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { +- pr_alert("total_mapcount: %u, page_count(): %u\n", +- mapcount, count); +- if (PageTail(page)) +- dump_page(head, NULL); +- dump_page(page, "total_mapcount(head) > 0"); +- BUG(); +- } + spin_unlock(&pgdata->split_queue_lock); +-fail: if (mapping) ++fail: ++ if (mapping) + spin_unlock(&mapping->tree_lock); + spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags); + remap_page(head); +-- +2.30.2 + diff --git a/queue-4.14/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch b/queue-4.14/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch new file mode 100644 index 00000000000..6196ee0533f --- /dev/null +++ b/queue-4.14/mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch @@ -0,0 +1,159 @@ +From dae6673b7a92395057695e7fd2fd3600e7644aba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Jun 2021 18:23:53 -0700 +Subject: mm/thp: try_to_unmap() use TTU_SYNC for safe splitting + +From: Hugh Dickins + +[ Upstream commit 732ed55823fc3ad998d43b86bf771887bcc5ec67 ] + +Stressing huge tmpfs often crashed on unmap_page()'s VM_BUG_ON_PAGE +(!unmap_success): with dump_page() showing mapcount:1, but then its raw +struct page output showing _mapcount ffffffff i.e. mapcount 0. + +And even if that particular VM_BUG_ON_PAGE(!unmap_success) is removed, +it is immediately followed by a VM_BUG_ON_PAGE(compound_mapcount(head)), +and further down an IS_ENABLED(CONFIG_DEBUG_VM) total_mapcount BUG(): +all indicative of some mapcount difficulty in development here perhaps. +But the !CONFIG_DEBUG_VM path handles the failures correctly and +silently. + +I believe the problem is that once a racing unmap has cleared pte or +pmd, try_to_unmap_one() may skip taking the page table lock, and emerge +from try_to_unmap() before the racing task has reached decrementing +mapcount. + +Instead of abandoning the unsafe VM_BUG_ON_PAGE(), and the ones that +follow, use PVMW_SYNC in try_to_unmap_one() in this case: adding +TTU_SYNC to the options, and passing that from unmap_page(). + +When CONFIG_DEBUG_VM, or for non-debug too? Consensus is to do the same +for both: the slight overhead added should rarely matter, except perhaps +if splitting sparsely-populated multiply-mapped shmem. Once confident +that bugs are fixed, TTU_SYNC here can be removed, and the race +tolerated. + +Link: https://lkml.kernel.org/r/c1e95853-8bcd-d8fd-55fa-e7f2488e78f@google.com +Fixes: fec89c109f3a ("thp: rewrite freeze_page()/unfreeze_page() with generic rmap walkers") +Signed-off-by: Hugh Dickins +Cc: Alistair Popple +Cc: Jan Kara +Cc: Jue Wang +Cc: Kirill A. Shutemov +Cc: "Matthew Wilcox (Oracle)" +Cc: Miaohe Lin +Cc: Minchan Kim +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: Peter Xu +Cc: Ralph Campbell +Cc: Shakeel Butt +Cc: Wang Yugui +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +Note on stable backport: upstream TTU_SYNC 0x10 takes the value which +5.11 commit 013339df116c ("mm/rmap: always do TTU_IGNORE_ACCESS") freed. +It is very tempting to backport that commit (as 5.10 already did) and +make no change here; but on reflection, good as that commit is, I'm +reluctant to include any possible side-effect of it in this series. + +Signed-off-by: Hugh Dickins +Signed-off-by: Sasha Levin +--- + include/linux/rmap.h | 3 ++- + mm/huge_memory.c | 2 +- + mm/page_vma_mapped.c | 11 +++++++++++ + mm/rmap.c | 17 ++++++++++++++++- + 4 files changed, 30 insertions(+), 3 deletions(-) + +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index d7d6d4eb1794..91ccae946716 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -98,7 +98,8 @@ enum ttu_flags { + * do a final flush if necessary */ + TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: + * caller holds it */ +- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */ ++ TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */ ++ TTU_SYNC = 0x200, /* avoid racy checks with PVMW_SYNC */ + }; + + #ifdef CONFIG_MMU +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 513f0cf173ad..5705ccff3e7f 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2324,7 +2324,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, + static void unmap_page(struct page *page) + { + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS | +- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; ++ TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC; + bool unmap_success; + + VM_BUG_ON_PAGE(!PageHead(page), page); +diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c +index e00d985a51c5..31879f2175d0 100644 +--- a/mm/page_vma_mapped.c ++++ b/mm/page_vma_mapped.c +@@ -207,6 +207,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) + pvmw->ptl = NULL; + } + } else if (!pmd_present(pmde)) { ++ /* ++ * If PVMW_SYNC, take and drop THP pmd lock so that we ++ * cannot return prematurely, while zap_huge_pmd() has ++ * cleared *pmd but not decremented compound_mapcount(). ++ */ ++ if ((pvmw->flags & PVMW_SYNC) && ++ PageTransCompound(pvmw->page)) { ++ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); ++ ++ spin_unlock(ptl); ++ } + return false; + } + if (!map_pte(pvmw)) +diff --git a/mm/rmap.c b/mm/rmap.c +index e6a556fec9d1..b6571c739723 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -1344,6 +1344,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, + unsigned long start = address, end; + enum ttu_flags flags = (enum ttu_flags)arg; + ++ /* ++ * When racing against e.g. zap_pte_range() on another cpu, ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(), ++ * try_to_unmap() may return false when it is about to become true, ++ * if page table locking is skipped: use TTU_SYNC to wait for that. ++ */ ++ if (flags & TTU_SYNC) ++ pvmw.flags = PVMW_SYNC; ++ + /* munlock has nothing to gain from examining un-locked vmas */ + if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) + return true; +@@ -1665,7 +1674,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) + else + rmap_walk(page, &rwc); + +- return !page_mapcount(page) ? true : false; ++ /* ++ * When racing against e.g. zap_pte_range() on another cpu, ++ * in between its ptep_get_and_clear_full() and page_remove_rmap(), ++ * try_to_unmap() may return false when it is about to become true, ++ * if page table locking is skipped: use TTU_SYNC to wait for that. ++ */ ++ return !page_mapcount(page); + } + + /** +-- +2.30.2 + diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..2bac7bda778 --- /dev/null +++ b/queue-4.14/series @@ -0,0 +1,19 @@ +include-linux-mmdebug.h-make-vm_warn-non-rvals.patch +mm-add-vm_warn_on_once_page-macro.patch +mm-rmap-remove-unneeded-semicolon-in-page_not_mapped.patch +mm-rmap-use-page_not_mapped-in-try_to_unmap.patch +mm-thp-try_to_unmap-use-ttu_sync-for-safe-splitting.patch +mm-thp-fix-vma_address-if-virtual-address-below-file.patch +mm-thp-fix-page_address_in_vma-on-file-thp-tails.patch +mm-thp-replace-debug_vm-bug-with-vm_warn-when-unmap-.patch +mm-page_vma_mapped_walk-use-page-for-pvmw-page.patch +mm-page_vma_mapped_walk-settle-pagehuge-on-entry.patch +mm-page_vma_mapped_walk-use-pmde-for-pvmw-pmd.patch +mm-page_vma_mapped_walk-prettify-pvmw_migration-bloc.patch +mm-page_vma_mapped_walk-crossing-page-table-boundary.patch +mm-page_vma_mapped_walk-add-a-level-of-indentation.patch +mm-page_vma_mapped_walk-use-goto-instead-of-while-1.patch +mm-page_vma_mapped_walk-get-vma_address_end-earlier.patch +mm-thp-fix-page_vma_mapped_walk-if-thp-mapped-by-pte.patch +mm-thp-another-pvmw_sync-fix-in-page_vma_mapped_walk.patch +mm-futex-fix-shared-futex-pgoff-on-shmem-huge-page.patch