From 7bcc26f2d2be13b4b144a0255841dc04ca8f5d6e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 15 Nov 2024 06:32:39 +0100 Subject: [PATCH] 6.6-stable patches added patches: mm-add-page_rmappable_folio-wrapper.patch mm-always-initialise-folio-_deferred_list.patch mm-readahead-do-not-allow-order-1-folio.patch mm-refactor-folio_undo_large_rmappable.patch mm-support-order-1-folios-in-the-page-cache.patch --- .../mm-add-page_rmappable_folio-wrapper.patch | 116 ++++++++ ...ways-initialise-folio-_deferred_list.patch | 137 +++++++++ ...readahead-do-not-allow-order-1-folio.patch | 59 ++++ ...-refactor-folio_undo_large_rmappable.patch | 106 +++++++ ...ort-order-1-folios-in-the-page-cache.patch | 123 ++++++++ ...red-split-unqueue-naming-and-locking.patch | 263 +++--------------- queue-6.6/series | 5 + 7 files changed, 590 insertions(+), 219 deletions(-) create mode 100644 queue-6.6/mm-add-page_rmappable_folio-wrapper.patch create mode 100644 queue-6.6/mm-always-initialise-folio-_deferred_list.patch create mode 100644 queue-6.6/mm-readahead-do-not-allow-order-1-folio.patch create mode 100644 queue-6.6/mm-refactor-folio_undo_large_rmappable.patch create mode 100644 queue-6.6/mm-support-order-1-folios-in-the-page-cache.patch diff --git a/queue-6.6/mm-add-page_rmappable_folio-wrapper.patch b/queue-6.6/mm-add-page_rmappable_folio-wrapper.patch new file mode 100644 index 00000000000..dd80a06da40 --- /dev/null +++ b/queue-6.6/mm-add-page_rmappable_folio-wrapper.patch @@ -0,0 +1,116 @@ +From d2264c4c8809ac569893e55bb9b1945b7ed0a810 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Tue, 3 Oct 2023 02:25:33 -0700 +Subject: mm: add page_rmappable_folio() wrapper + +From: Hugh Dickins + +commit 23e4883248f0472d806c8b3422ba6257e67bf1a5 upstream. + +folio_prep_large_rmappable() is being used repeatedly along with a +conversion from page to folio, a check non-NULL, a check order > 1: wrap +it all up into struct folio *page_rmappable_folio(struct page *). + +Link: https://lkml.kernel.org/r/8d92c6cf-eebe-748-e29c-c8ab224c741@google.com +Signed-off-by: Hugh Dickins +Cc: Andi Kleen +Cc: Christoph Lameter +Cc: David Hildenbrand +Cc: Greg Kroah-Hartman +Cc: "Huang, Ying" +Cc: Kefeng Wang +Cc: Matthew Wilcox (Oracle) +Cc: Mel Gorman +Cc: Michal Hocko +Cc: Mike Kravetz +Cc: Nhat Pham +Cc: Sidhartha Kumar +Cc: Suren Baghdasaryan +Cc: Tejun heo +Cc: Vishal Moola (Oracle) +Cc: Yang Shi +Cc: Yosry Ahmed +Signed-off-by: Andrew Morton +Signed-off-by: Hugh Dickins +Signed-off-by: Greg Kroah-Hartman +--- + mm/internal.h | 9 +++++++++ + mm/mempolicy.c | 17 +++-------------- + mm/page_alloc.c | 8 ++------ + 3 files changed, 14 insertions(+), 20 deletions(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -415,6 +415,15 @@ static inline void folio_set_order(struc + + void folio_undo_large_rmappable(struct folio *folio); + ++static inline struct folio *page_rmappable_folio(struct page *page) ++{ ++ struct folio *folio = (struct folio *)page; ++ ++ if (folio && folio_order(folio) > 1) ++ folio_prep_large_rmappable(folio); ++ return folio; ++} ++ + static inline void prep_compound_head(struct page *page, unsigned int order) + { + struct folio *folio = (struct folio *)page; +--- a/mm/mempolicy.c ++++ b/mm/mempolicy.c +@@ -2200,10 +2200,7 @@ struct folio *vma_alloc_folio(gfp_t gfp, + mpol_cond_put(pol); + gfp |= __GFP_COMP; + page = alloc_page_interleave(gfp, order, nid); +- folio = (struct folio *)page; +- if (folio && order > 1) +- folio_prep_large_rmappable(folio); +- goto out; ++ return page_rmappable_folio(page); + } + + if (pol->mode == MPOL_PREFERRED_MANY) { +@@ -2213,10 +2210,7 @@ struct folio *vma_alloc_folio(gfp_t gfp, + gfp |= __GFP_COMP; + page = alloc_pages_preferred_many(gfp, order, node, pol); + mpol_cond_put(pol); +- folio = (struct folio *)page; +- if (folio && order > 1) +- folio_prep_large_rmappable(folio); +- goto out; ++ return page_rmappable_folio(page); + } + + if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { +@@ -2310,12 +2304,7 @@ EXPORT_SYMBOL(alloc_pages); + + struct folio *folio_alloc(gfp_t gfp, unsigned order) + { +- struct page *page = alloc_pages(gfp | __GFP_COMP, order); +- struct folio *folio = (struct folio *)page; +- +- if (folio && order > 1) +- folio_prep_large_rmappable(folio); +- return folio; ++ return page_rmappable_folio(alloc_pages(gfp | __GFP_COMP, order)); + } + EXPORT_SYMBOL(folio_alloc); + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -4464,12 +4464,8 @@ struct folio *__folio_alloc(gfp_t gfp, u + nodemask_t *nodemask) + { + struct page *page = __alloc_pages(gfp | __GFP_COMP, order, +- preferred_nid, nodemask); +- struct folio *folio = (struct folio *)page; +- +- if (folio && order > 1) +- folio_prep_large_rmappable(folio); +- return folio; ++ preferred_nid, nodemask); ++ return page_rmappable_folio(page); + } + EXPORT_SYMBOL(__folio_alloc); + diff --git a/queue-6.6/mm-always-initialise-folio-_deferred_list.patch b/queue-6.6/mm-always-initialise-folio-_deferred_list.patch new file mode 100644 index 00000000000..844b31a2ad9 --- /dev/null +++ b/queue-6.6/mm-always-initialise-folio-_deferred_list.patch @@ -0,0 +1,137 @@ +From 6de4c7b38006fed2270c1a215acc4d8e3240e970 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Thu, 21 Mar 2024 14:24:39 +0000 +Subject: mm: always initialise folio->_deferred_list + +From: "Matthew Wilcox (Oracle)" + +commit b7b098cf00a2b65d5654a86dc8edf82f125289c1 upstream. + +Patch series "Various significant MM patches". + +These patches all interact in annoying ways which make it tricky to send +them out in any way other than a big batch, even though there's not really +an overarching theme to connect them. + +The big effects of this patch series are: + + - folio_test_hugetlb() becomes reliable, even when called without a + page reference + - We free up PG_slab, and we could always use more page flags + - We no longer need to check PageSlab before calling page_mapcount() + +This patch (of 9): + +For compound pages which are at least order-2 (and hence have a +deferred_list), initialise it and then we can check at free that the page +is not part of a deferred list. We recently found this useful to rule out +a source of corruption. + +[peterx@redhat.com: always initialise folio->_deferred_list] + Link: https://lkml.kernel.org/r/20240417211836.2742593-2-peterx@redhat.com +Link: https://lkml.kernel.org/r/20240321142448.1645400-1-willy@infradead.org +Link: https://lkml.kernel.org/r/20240321142448.1645400-2-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) +Signed-off-by: Peter Xu +Reviewed-by: David Hildenbrand +Acked-by: Vlastimil Babka +Cc: Miaohe Lin +Cc: Muchun Song +Cc: Oscar Salvador +Signed-off-by: Andrew Morton +[ Include three small changes from the upstream commit, for backport safety: + replace list_del() by list_del_init() in split_huge_page_to_list(), + like c010d47f107f ("mm: thp: split huge page to any lower order pages"); + replace list_del() by list_del_init() in folio_undo_large_rmappable(), like + 9bcef5973e31 ("mm: memcg: fix split queue list crash when large folio migration"); + keep __free_pages() instead of folio_put() in __update_and_free_hugetlb_folio(). ] +Signed-off-by: Hugh Dickins +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 6 ++---- + mm/hugetlb.c | 1 + + mm/internal.h | 2 ++ + mm/memcontrol.c | 3 +++ + mm/page_alloc.c | 9 +++++---- + 5 files changed, 13 insertions(+), 8 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -571,8 +571,6 @@ void folio_prep_large_rmappable(struct f + { + if (!folio || !folio_test_large(folio)) + return; +- if (folio_order(folio) > 1) +- INIT_LIST_HEAD(&folio->_deferred_list); + folio_set_large_rmappable(folio); + } + +@@ -2725,7 +2723,7 @@ int split_huge_page_to_list(struct page + if (folio_order(folio) > 1 && + !list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; +- list_del(&folio->_deferred_list); ++ list_del_init(&folio->_deferred_list); + } + spin_unlock(&ds_queue->split_queue_lock); + if (mapping) { +@@ -2789,7 +2787,7 @@ void folio_undo_large_rmappable(struct f + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + if (!list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; +- list_del(&folio->_deferred_list); ++ list_del_init(&folio->_deferred_list); + } + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); + } +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1795,6 +1795,7 @@ static void __update_and_free_hugetlb_fo + destroy_compound_gigantic_folio(folio, huge_page_order(h)); + free_gigantic_folio(folio, huge_page_order(h)); + } else { ++ INIT_LIST_HEAD(&folio->_deferred_list); + __free_pages(&folio->page, huge_page_order(h)); + } + } +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -431,6 +431,8 @@ static inline void prep_compound_head(st + atomic_set(&folio->_entire_mapcount, -1); + atomic_set(&folio->_nr_pages_mapped, 0); + atomic_set(&folio->_pincount, 0); ++ if (order > 1) ++ INIT_LIST_HEAD(&folio->_deferred_list); + } + + static inline void prep_compound_tail(struct page *head, int tail_idx) +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -7153,6 +7153,9 @@ static void uncharge_folio(struct folio + struct obj_cgroup *objcg; + + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); ++ VM_BUG_ON_FOLIO(folio_order(folio) > 1 && ++ !folio_test_hugetlb(folio) && ++ !list_empty(&folio->_deferred_list), folio); + + /* + * Nobody should be changing or seriously looking at +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1002,10 +1002,11 @@ static int free_tail_page_prepare(struct + } + break; + case 2: +- /* +- * the second tail page: ->mapping is +- * deferred_list.next -- ignore value. +- */ ++ /* the second tail page: deferred_list overlaps ->mapping */ ++ if (unlikely(!list_empty(&folio->_deferred_list))) { ++ bad_page(page, "on deferred list"); ++ goto out; ++ } + break; + default: + if (page->mapping != TAIL_MAPPING) { diff --git a/queue-6.6/mm-readahead-do-not-allow-order-1-folio.patch b/queue-6.6/mm-readahead-do-not-allow-order-1-folio.patch new file mode 100644 index 00000000000..2ac7c8e7e30 --- /dev/null +++ b/queue-6.6/mm-readahead-do-not-allow-order-1-folio.patch @@ -0,0 +1,59 @@ +From 3744de75e512ebfea9d764bd4f85fdc4201cca73 Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Fri, 1 Dec 2023 16:10:45 +0000 +Subject: mm/readahead: do not allow order-1 folio + +From: Ryan Roberts + +commit ec056cef76a525706601b32048f174f9bea72c7c upstream. + +The THP machinery does not support order-1 folios because it requires meta +data spanning the first 3 `struct page`s. So order-2 is the smallest +large folio that we can safely create. + +There was a theoretical bug whereby if ra->size was 2 or 3 pages (due to +the device-specific bdi->ra_pages being set that way), we could end up +with order = 1. Fix this by unconditionally checking if the preferred +order is 1 and if so, set it to 0. Previously this was done in a few +specific places, but with this refactoring it is done just once, +unconditionally, at the end of the calculation. + +This is a theoretical bug found during review of the code; I have no +evidence to suggest this manifests in the real world (I expect all +device-specific ra_pages values are much bigger than 3). + +Link: https://lkml.kernel.org/r/20231201161045.3962614-1-ryan.roberts@arm.com +Signed-off-by: Ryan Roberts +Reviewed-by: Matthew Wilcox (Oracle) +Signed-off-by: Andrew Morton +Signed-off-by: Hugh Dickins +Signed-off-by: Greg Kroah-Hartman +--- + mm/readahead.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -514,16 +514,14 @@ void page_cache_ra_order(struct readahea + unsigned int order = new_order; + + /* Align with smaller pages if needed */ +- if (index & ((1UL << order) - 1)) { ++ if (index & ((1UL << order) - 1)) + order = __ffs(index); +- if (order == 1) +- order = 0; +- } + /* Don't allocate pages past EOF */ +- while (index + (1UL << order) - 1 > limit) { +- if (--order == 1) +- order = 0; +- } ++ while (index + (1UL << order) - 1 > limit) ++ order--; ++ /* THP machinery does not support order-1 */ ++ if (order == 1) ++ order = 0; + err = ra_alloc_folio(ractl, index, mark, order, gfp); + if (err) + break; diff --git a/queue-6.6/mm-refactor-folio_undo_large_rmappable.patch b/queue-6.6/mm-refactor-folio_undo_large_rmappable.patch new file mode 100644 index 00000000000..bd856c06e75 --- /dev/null +++ b/queue-6.6/mm-refactor-folio_undo_large_rmappable.patch @@ -0,0 +1,106 @@ +From e70941e043a9ff84859ce0b52dd0a3f065ff590c Mon Sep 17 00:00:00 2001 +From: Kefeng Wang +Date: Tue, 21 May 2024 21:03:15 +0800 +Subject: mm: refactor folio_undo_large_rmappable() + +From: Kefeng Wang + +commit 593a10dabe08dcf93259fce2badd8dc2528859a8 upstream. + +Folios of order <= 1 are not in deferred list, the check of order is added +into folio_undo_large_rmappable() from commit 8897277acfef ("mm: support +order-1 folios in the page cache"), but there is a repeated check for +small folio (order 0) during each call of the +folio_undo_large_rmappable(), so only keep folio_order() check inside the +function. + +In addition, move all the checks into header file to save a function call +for non-large-rmappable or empty deferred_list folio. + +Link: https://lkml.kernel.org/r/20240521130315.46072-1-wangkefeng.wang@huawei.com +Signed-off-by: Kefeng Wang +Reviewed-by: David Hildenbrand +Reviewed-by: Vishal Moola (Oracle) +Cc: Johannes Weiner +Cc: Lance Yang +Cc: Matthew Wilcox (Oracle) +Cc: Michal Hocko +Cc: Muchun Song +Cc: Roman Gushchin +Cc: Shakeel Butt +Signed-off-by: Andrew Morton +[ Upstream commit itself does not apply cleanly, because there + are fewer calls to folio_undo_large_rmappable() in this tree. ] +Signed-off-by: Hugh Dickins +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 13 +------------ + mm/internal.h | 17 ++++++++++++++++- + mm/page_alloc.c | 4 +--- + 3 files changed, 18 insertions(+), 16 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2767,22 +2767,11 @@ out: + return ret; + } + +-void folio_undo_large_rmappable(struct folio *folio) ++void __folio_undo_large_rmappable(struct folio *folio) + { + struct deferred_split *ds_queue; + unsigned long flags; + +- if (folio_order(folio) <= 1) +- return; +- +- /* +- * At this point, there is no one trying to add the folio to +- * deferred_list. If folio is not in deferred_list, it's safe +- * to check without acquiring the split_queue_lock. +- */ +- if (data_race(list_empty(&folio->_deferred_list))) +- return; +- + ds_queue = get_deferred_split_queue(folio); + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + if (!list_empty(&folio->_deferred_list)) { +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -413,7 +413,22 @@ static inline void folio_set_order(struc + #endif + } + +-void folio_undo_large_rmappable(struct folio *folio); ++void __folio_undo_large_rmappable(struct folio *folio); ++static inline void folio_undo_large_rmappable(struct folio *folio) ++{ ++ if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) ++ return; ++ ++ /* ++ * At this point, there is no one trying to add the folio to ++ * deferred_list. If folio is not in deferred_list, it's safe ++ * to check without acquiring the split_queue_lock. ++ */ ++ if (data_race(list_empty(&folio->_deferred_list))) ++ return; ++ ++ __folio_undo_large_rmappable(folio); ++} + + static inline struct folio *page_rmappable_folio(struct page *page) + { +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -600,9 +600,7 @@ void destroy_large_folio(struct folio *f + return; + } + +- if (folio_test_large_rmappable(folio)) +- folio_undo_large_rmappable(folio); +- ++ folio_undo_large_rmappable(folio); + mem_cgroup_uncharge(folio); + free_the_page(&folio->page, folio_order(folio)); + } diff --git a/queue-6.6/mm-support-order-1-folios-in-the-page-cache.patch b/queue-6.6/mm-support-order-1-folios-in-the-page-cache.patch new file mode 100644 index 00000000000..72b5de23cf3 --- /dev/null +++ b/queue-6.6/mm-support-order-1-folios-in-the-page-cache.patch @@ -0,0 +1,123 @@ +From b15d2d08375ba6bd3c87cc397d931f6165216bc0 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Mon, 26 Feb 2024 15:55:28 -0500 +Subject: mm: support order-1 folios in the page cache + +From: "Matthew Wilcox (Oracle)" + +commit 8897277acfef7f70fdecc054073bea2542fc7a1b upstream. + +Folios of order 1 have no space to store the deferred list. This is not a +problem for the page cache as file-backed folios are never placed on the +deferred list. All we need to do is prevent the core MM from touching the +deferred list for order 1 folios and remove the code which prevented us +from allocating order 1 folios. + +Link: https://lore.kernel.org/linux-mm/90344ea7-4eec-47ee-5996-0c22f42d6a6a@google.com/ +Link: https://lkml.kernel.org/r/20240226205534.1603748-3-zi.yan@sent.com +Signed-off-by: Matthew Wilcox (Oracle) +Signed-off-by: Zi Yan +Cc: David Hildenbrand +Cc: Hugh Dickins +Cc: Kirill A. Shutemov +Cc: Luis Chamberlain +Cc: Michal Koutny +Cc: Roman Gushchin +Cc: Ryan Roberts +Cc: Yang Shi +Cc: Yu Zhao +Cc: Zach O'Keefe +Signed-off-by: Andrew Morton +Signed-off-by: Hugh Dickins +Signed-off-by: Greg Kroah-Hartman +--- + mm/filemap.c | 2 -- + mm/huge_memory.c | 19 +++++++++++++++---- + mm/internal.h | 3 +-- + mm/readahead.c | 3 --- + 4 files changed, 16 insertions(+), 11 deletions(-) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1957,8 +1957,6 @@ no_page: + gfp_t alloc_gfp = gfp; + + err = -ENOMEM; +- if (order == 1) +- order = 0; + if (order > 0) + alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; + folio = filemap_alloc_folio(alloc_gfp, order); +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -569,8 +569,10 @@ struct deferred_split *get_deferred_spli + + void folio_prep_large_rmappable(struct folio *folio) + { +- VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); +- INIT_LIST_HEAD(&folio->_deferred_list); ++ if (!folio || !folio_test_large(folio)) ++ return; ++ if (folio_order(folio) > 1) ++ INIT_LIST_HEAD(&folio->_deferred_list); + folio_set_large_rmappable(folio); + } + +@@ -2720,7 +2722,8 @@ int split_huge_page_to_list(struct page + /* Prevent deferred_split_scan() touching ->_refcount */ + spin_lock(&ds_queue->split_queue_lock); + if (folio_ref_freeze(folio, 1 + extra_pins)) { +- if (!list_empty(&folio->_deferred_list)) { ++ if (folio_order(folio) > 1 && ++ !list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; + list_del(&folio->_deferred_list); + } +@@ -2771,6 +2774,9 @@ void folio_undo_large_rmappable(struct f + struct deferred_split *ds_queue; + unsigned long flags; + ++ if (folio_order(folio) <= 1) ++ return; ++ + /* + * At this point, there is no one trying to add the folio to + * deferred_list. If folio is not in deferred_list, it's safe +@@ -2796,7 +2802,12 @@ void deferred_split_folio(struct folio * + #endif + unsigned long flags; + +- VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); ++ /* ++ * Order 1 folios have no space for a deferred list, but we also ++ * won't waste much memory by not adding them to the deferred list. ++ */ ++ if (folio_order(folio) <= 1) ++ return; + + /* + * The try_to_unmap() in page reclaim path might reach here too, +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -419,8 +419,7 @@ static inline struct folio *page_rmappab + { + struct folio *folio = (struct folio *)page; + +- if (folio && folio_order(folio) > 1) +- folio_prep_large_rmappable(folio); ++ folio_prep_large_rmappable(folio); + return folio; + } + +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -519,9 +519,6 @@ void page_cache_ra_order(struct readahea + /* Don't allocate pages past EOF */ + while (index + (1UL << order) - 1 > limit) + order--; +- /* THP machinery does not support order-1 */ +- if (order == 1) +- order = 0; + err = ra_alloc_folio(ractl, index, mark, order, gfp); + if (err) + break; diff --git a/queue-6.6/mm-thp-fix-deferred-split-unqueue-naming-and-locking.patch b/queue-6.6/mm-thp-fix-deferred-split-unqueue-naming-and-locking.patch index b8c2539dfbe..ee195729763 100644 --- a/queue-6.6/mm-thp-fix-deferred-split-unqueue-naming-and-locking.patch +++ b/queue-6.6/mm-thp-fix-deferred-split-unqueue-naming-and-locking.patch @@ -1,4 +1,4 @@ -From f8f931bba0f92052cf842b7e30917b1afcc77d5a Mon Sep 17 00:00:00 2001 +From e6d53c04ceb5677d25e9c052f6d3f5959284b2fa Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 27 Oct 2024 13:02:13 -0700 Subject: mm/thp: fix deferred split unqueue naming and locking @@ -95,72 +95,23 @@ Signed-off-by: Andrew Morton [ Upstream commit itself does not apply cleanly, because there are fewer calls to folio_undo_large_rmappable() in this tree (in particular, folio migration does not migrate memcg charge), - and mm/memcontrol-v1.c has not been split out of mm/memcontrol.c. - This single commit is merged from upstream commits: - 23e4883248f0 ("mm: add page_rmappable_folio() wrapper") - ec056cef76a5 ("mm/readahead: do not allow order-1 folio") - 8897277acfef ("mm: support order-1 folios in the page cache") - b7b098cf00a2 ("mm: always initialise folio->_deferred_list") - 593a10dabe08 ("mm: refactor folio_undo_large_rmappable()") - f8f931bba0f9 ("mm/thp: fix deferred split unqueue naming and locking") - With list_del_init() replacing list_del() like in: - c010d47f107f ("mm: thp: split huge page to any lower order pages") - 9bcef5973e31 ("mm: memcg: fix split queue list crash when large folio migration") ] + and mm/memcontrol-v1.c has not been split out of mm/memcontrol.c. ] Signed-off-by: Hugh Dickins Signed-off-by: Greg Kroah-Hartman --- - mm/filemap.c | 2 - - mm/huge_memory.c | 59 ++++++++++++++++++++++++++++++++++--------------------- - mm/hugetlb.c | 1 - mm/internal.h | 27 ++++++++++++++++++++++++- - mm/memcontrol.c | 29 +++++++++++++++++++++++++++ - mm/mempolicy.c | 17 ++------------- - mm/page_alloc.c | 21 +++++++------------ - mm/readahead.c | 11 ++-------- - 8 files changed, 107 insertions(+), 60 deletions(-) + mm/huge_memory.c | 35 ++++++++++++++++++++++++++--------- + mm/internal.h | 10 +++++----- + mm/memcontrol.c | 32 +++++++++++++++++++++++++++++--- + mm/page_alloc.c | 2 +- + 4 files changed, 61 insertions(+), 18 deletions(-) ---- a/mm/filemap.c -+++ b/mm/filemap.c -@@ -1957,8 +1957,6 @@ no_page: - gfp_t alloc_gfp = gfp; - - err = -ENOMEM; -- if (order == 1) -- order = 0; - if (order > 0) - alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; - folio = filemap_alloc_folio(alloc_gfp, order); --- a/mm/huge_memory.c +++ b/mm/huge_memory.c -@@ -569,8 +569,8 @@ struct deferred_split *get_deferred_spli - - void folio_prep_large_rmappable(struct folio *folio) - { -- VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); -- INIT_LIST_HEAD(&folio->_deferred_list); -+ if (!folio || !folio_test_large(folio)) -+ return; - folio_set_large_rmappable(folio); - } - -@@ -2720,9 +2720,10 @@ int split_huge_page_to_list(struct page - /* Prevent deferred_split_scan() touching ->_refcount */ - spin_lock(&ds_queue->split_queue_lock); - if (folio_ref_freeze(folio, 1 + extra_pins)) { -- if (!list_empty(&folio->_deferred_list)) { -+ if (folio_order(folio) > 1 && -+ !list_empty(&folio->_deferred_list)) { - ds_queue->split_queue_len--; -- list_del(&folio->_deferred_list); -+ list_del_init(&folio->_deferred_list); - } - spin_unlock(&ds_queue->split_queue_lock); - if (mapping) { -@@ -2766,26 +2767,38 @@ out: +@@ -2767,18 +2767,38 @@ out: return ret; } --void folio_undo_large_rmappable(struct folio *folio) +-void __folio_undo_large_rmappable(struct folio *folio) +/* + * __folio_unqueue_deferred_split() is not to be called directly: + * the folio_unqueue_deferred_split() inline wrapper in mm/internal.h @@ -179,14 +130,7 @@ Signed-off-by: Greg Kroah-Hartman struct deferred_split *ds_queue; unsigned long flags; + bool unqueued = false; - -- /* -- * At this point, there is no one trying to add the folio to -- * deferred_list. If folio is not in deferred_list, it's safe -- * to check without acquiring the split_queue_lock. -- */ -- if (data_race(list_empty(&folio->_deferred_list))) -- return; ++ + WARN_ON_ONCE(folio_ref_count(folio)); + WARN_ON_ONCE(!mem_cgroup_disabled() && !folio_memcg(folio)); @@ -194,8 +138,7 @@ Signed-off-by: Greg Kroah-Hartman spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (!list_empty(&folio->_deferred_list)) { ds_queue->split_queue_len--; -- list_del(&folio->_deferred_list); -+ list_del_init(&folio->_deferred_list); + list_del_init(&folio->_deferred_list); + unqueued = true; } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); @@ -204,17 +147,8 @@ Signed-off-by: Greg Kroah-Hartman } void deferred_split_folio(struct folio *folio) -@@ -2796,17 +2809,19 @@ void deferred_split_folio(struct folio * - #endif - unsigned long flags; - -- VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); -+ /* -+ * Order 1 folios have no space for a deferred list, but we also -+ * won't waste much memory by not adding them to the deferred list. -+ */ -+ if (folio_order(folio) <= 1) -+ return; +@@ -2797,14 +2817,11 @@ void deferred_split_folio(struct folio * + return; /* - * The try_to_unmap() in page reclaim path might reach here too, @@ -233,59 +167,35 @@ Signed-off-by: Greg Kroah-Hartman */ if (folio_test_swapcache(folio)) return; ---- a/mm/hugetlb.c -+++ b/mm/hugetlb.c -@@ -1795,6 +1795,7 @@ static void __update_and_free_hugetlb_fo - destroy_compound_gigantic_folio(folio, huge_page_order(h)); - free_gigantic_folio(folio, huge_page_order(h)); - } else { -+ INIT_LIST_HEAD(&folio->_deferred_list); - __free_pages(&folio->page, huge_page_order(h)); - } - } --- a/mm/internal.h +++ b/mm/internal.h -@@ -413,7 +413,30 @@ static inline void folio_set_order(struc +@@ -413,11 +413,11 @@ static inline void folio_set_order(struc #endif } --void folio_undo_large_rmappable(struct folio *folio); +-void __folio_undo_large_rmappable(struct folio *folio); +-static inline void folio_undo_large_rmappable(struct folio *folio) +bool __folio_unqueue_deferred_split(struct folio *folio); +static inline bool folio_unqueue_deferred_split(struct folio *folio) -+{ -+ if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) + { + if (folio_order(folio) <= 1 || !folio_test_large_rmappable(folio)) +- return; + return false; -+ -+ /* -+ * At this point, there is no one trying to add the folio to -+ * deferred_list. If folio is not in deferred_list, it's safe -+ * to check without acquiring the split_queue_lock. -+ */ -+ if (data_race(list_empty(&folio->_deferred_list))) + + /* + * At this point, there is no one trying to add the folio to +@@ -425,9 +425,9 @@ static inline void folio_undo_large_rmap + * to check without acquiring the split_queue_lock. + */ + if (data_race(list_empty(&folio->_deferred_list))) +- return; + return false; -+ -+ return __folio_unqueue_deferred_split(folio); -+} -+ -+static inline struct folio *page_rmappable_folio(struct page *page) -+{ -+ struct folio *folio = (struct folio *)page; -+ -+ folio_prep_large_rmappable(folio); -+ return folio; -+} - static inline void prep_compound_head(struct page *page, unsigned int order) - { -@@ -423,6 +446,8 @@ static inline void prep_compound_head(st - atomic_set(&folio->_entire_mapcount, -1); - atomic_set(&folio->_nr_pages_mapped, 0); - atomic_set(&folio->_pincount, 0); -+ if (order > 1) -+ INIT_LIST_HEAD(&folio->_deferred_list); +- __folio_undo_large_rmappable(folio); ++ return __folio_unqueue_deferred_split(folio); } - static inline void prep_compound_tail(struct page *head, int tail_idx) + static inline struct folio *page_rmappable_folio(struct page *page) --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5873,6 +5873,8 @@ static int mem_cgroup_move_account(struc @@ -337,7 +247,17 @@ Signed-off-by: Greg Kroah-Hartman if (isolate_lru_page(page)) { if (!mem_cgroup_move_account(page, true, mc.from, mc.to)) { -@@ -7199,6 +7226,7 @@ static void uncharge_folio(struct folio +@@ -7153,9 +7180,6 @@ static void uncharge_folio(struct folio + struct obj_cgroup *objcg; + + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); +- VM_BUG_ON_FOLIO(folio_order(folio) > 1 && +- !folio_test_hugetlb(folio) && +- !list_empty(&folio->_deferred_list), folio); + + /* + * Nobody should be changing or seriously looking at +@@ -7202,6 +7226,7 @@ static void uncharge_folio(struct folio ug->nr_memory += nr_pages; ug->pgpgout++; @@ -345,7 +265,7 @@ Signed-off-by: Greg Kroah-Hartman folio->memcg_data = 0; } -@@ -7492,6 +7520,7 @@ void mem_cgroup_swapout(struct folio *fo +@@ -7495,6 +7520,7 @@ void mem_cgroup_swapout(struct folio *fo VM_BUG_ON_FOLIO(oldid, folio); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); @@ -353,109 +273,14 @@ Signed-off-by: Greg Kroah-Hartman folio->memcg_data = 0; if (!mem_cgroup_is_root(memcg)) ---- a/mm/mempolicy.c -+++ b/mm/mempolicy.c -@@ -2200,10 +2200,7 @@ struct folio *vma_alloc_folio(gfp_t gfp, - mpol_cond_put(pol); - gfp |= __GFP_COMP; - page = alloc_page_interleave(gfp, order, nid); -- folio = (struct folio *)page; -- if (folio && order > 1) -- folio_prep_large_rmappable(folio); -- goto out; -+ return page_rmappable_folio(page); - } - - if (pol->mode == MPOL_PREFERRED_MANY) { -@@ -2213,10 +2210,7 @@ struct folio *vma_alloc_folio(gfp_t gfp, - gfp |= __GFP_COMP; - page = alloc_pages_preferred_many(gfp, order, node, pol); - mpol_cond_put(pol); -- folio = (struct folio *)page; -- if (folio && order > 1) -- folio_prep_large_rmappable(folio); -- goto out; -+ return page_rmappable_folio(page); - } - - if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { -@@ -2310,12 +2304,7 @@ EXPORT_SYMBOL(alloc_pages); - - struct folio *folio_alloc(gfp_t gfp, unsigned order) - { -- struct page *page = alloc_pages(gfp | __GFP_COMP, order); -- struct folio *folio = (struct folio *)page; -- -- if (folio && order > 1) -- folio_prep_large_rmappable(folio); -- return folio; -+ return page_rmappable_folio(alloc_pages(gfp | __GFP_COMP, order)); - } - EXPORT_SYMBOL(folio_alloc); - --- a/mm/page_alloc.c +++ b/mm/page_alloc.c -@@ -600,9 +600,7 @@ void destroy_large_folio(struct folio *f +@@ -600,7 +600,7 @@ void destroy_large_folio(struct folio *f return; } -- if (folio_test_large_rmappable(folio)) -- folio_undo_large_rmappable(folio); -- +- folio_undo_large_rmappable(folio); + folio_unqueue_deferred_split(folio); mem_cgroup_uncharge(folio); free_the_page(&folio->page, folio_order(folio)); } -@@ -1002,10 +1000,11 @@ static int free_tail_page_prepare(struct - } - break; - case 2: -- /* -- * the second tail page: ->mapping is -- * deferred_list.next -- ignore value. -- */ -+ /* the second tail page: deferred_list overlaps ->mapping */ -+ if (unlikely(!list_empty(&folio->_deferred_list))) { -+ bad_page(page, "on deferred list"); -+ goto out; -+ } - break; - default: - if (page->mapping != TAIL_MAPPING) { -@@ -4464,12 +4463,8 @@ struct folio *__folio_alloc(gfp_t gfp, u - nodemask_t *nodemask) - { - struct page *page = __alloc_pages(gfp | __GFP_COMP, order, -- preferred_nid, nodemask); -- struct folio *folio = (struct folio *)page; -- -- if (folio && order > 1) -- folio_prep_large_rmappable(folio); -- return folio; -+ preferred_nid, nodemask); -+ return page_rmappable_folio(page); - } - EXPORT_SYMBOL(__folio_alloc); - ---- a/mm/readahead.c -+++ b/mm/readahead.c -@@ -514,16 +514,11 @@ void page_cache_ra_order(struct readahea - unsigned int order = new_order; - - /* Align with smaller pages if needed */ -- if (index & ((1UL << order) - 1)) { -+ if (index & ((1UL << order) - 1)) - order = __ffs(index); -- if (order == 1) -- order = 0; -- } - /* Don't allocate pages past EOF */ -- while (index + (1UL << order) - 1 > limit) { -- if (--order == 1) -- order = 0; -- } -+ while (index + (1UL << order) - 1 > limit) -+ order--; - err = ra_alloc_folio(ractl, index, mark, order, gfp); - if (err) - break; diff --git a/queue-6.6/series b/queue-6.6/series index 6d131bfc7d1..1738be0d93e 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -39,4 +39,9 @@ net-usb-qmi_wwan-add-fibocom-fg132-0x0112-compositio.patch bpf-check-validity-of-link-type-in-bpf_link_show_fdi.patch io_uring-fix-possible-deadlock-in-io_register_iowq_max_workers.patch mm-krealloc-fix-mte-false-alarm-in-__do_krealloc.patch +mm-add-page_rmappable_folio-wrapper.patch +mm-readahead-do-not-allow-order-1-folio.patch +mm-support-order-1-folios-in-the-page-cache.patch +mm-always-initialise-folio-_deferred_list.patch +mm-refactor-folio_undo_large_rmappable.patch mm-thp-fix-deferred-split-unqueue-naming-and-locking.patch -- 2.47.2