From c83109e95c9d78e41b39e65b6490e511f4b8fba2 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 12 Jan 2026 23:09:50 +0800 Subject: [PATCH] mm: page_isolation: introduce page_is_unmovable() Patch series "mm: accelerate gigantic folio allocation". Optimize pfn_range_valid_contig() and replace_free_hugepage_folios() in alloc_contig_frozen_pages() to speed up gigantic folio allocation. The allocation time for 120*1G folios drops from 3.605s to 0.431s. This patch (of 5): Factor out the check if a page is unmovable into a new helper, and will be reused in the following patch. No functional change intended, the minor changes are as follows, 1) Avoid unnecessary calls by checking CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 2) Directly call PageCompound since PageTransCompound may be dropped 3) Using folio_test_hugetlb() Link: https://lkml.kernel.org/r/20260112150954.1802953-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20260112150954.1802953-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Zi Yan Reviewed-by: Oscar Salvador Cc: Brendan Jackman Cc: David Hildenbrand Cc: Jane Chu Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Sidhartha Kumar Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page-isolation.h | 2 + mm/page_isolation.c | 187 +++++++++++++++++---------------- 2 files changed, 101 insertions(+), 88 deletions(-) diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3e2f960e166ca..6f8638c9904ff 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -67,4 +67,6 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, enum pb_isolate_mode mode); +bool page_is_unmovable(struct zone *zone, struct page *page, + enum pb_isolate_mode mode, unsigned long *step); #endif diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b5924eff4f8b2..c48ff5c002449 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -15,6 +15,100 @@ #define CREATE_TRACE_POINTS #include +bool page_is_unmovable(struct zone *zone, struct page *page, + enum pb_isolate_mode mode, unsigned long *step) +{ + /* + * Both, bootmem allocations and memory holes are marked + * PG_reserved and are unmovable. We can even have unmovable + * allocations inside ZONE_MOVABLE, for example when + * specifying "movablecore". + */ + if (PageReserved(page)) + return true; + + /* + * If the zone is movable and we have ruled out all reserved + * pages then it should be reasonably safe to assume the rest + * is movable. + */ + if (zone_idx(zone) == ZONE_MOVABLE) + return false; + + /* + * Hugepages are not in LRU lists, but they're movable. + * THPs are on the LRU, but need to be counted as #small pages. + * We need not scan over tail pages because we don't + * handle each tail page individually in migration. + */ + if (PageHuge(page) || PageCompound(page)) { + struct folio *folio = page_folio(page); + + if (folio_test_hugetlb(folio)) { + struct hstate *h; + + if (!IS_ENABLED(CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION)) + return true; + + /* + * The huge page may be freed so can not + * use folio_hstate() directly. + */ + h = size_to_hstate(folio_size(folio)); + if (h && !hugepage_migration_supported(h)) + return true; + + } else if (!folio_test_lru(folio)) { + return true; + } + + *step = folio_nr_pages(folio) - folio_page_idx(folio, page); + return false; + } + + /* + * We can't use page_count without pin a page + * because another CPU can free compound page. + * This check already skips compound tails of THP + * because their page->_refcount is zero at all time. + */ + if (!page_ref_count(page)) { + if (PageBuddy(page)) + *step = (1 << buddy_order(page)); + return false; + } + + /* + * The HWPoisoned page may be not in buddy system, and + * page_count() is not 0. + */ + if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageHWPoison(page)) + return false; + + /* + * We treat all PageOffline() pages as movable when offlining + * to give drivers a chance to decrement their reference count + * in MEM_GOING_OFFLINE in order to indicate that these pages + * can be offlined as there are no direct references anymore. + * For actually unmovable PageOffline() where the driver does + * not support this, we will fail later when trying to actually + * move these pages that still have a reference count > 0. + * (false negatives in this function only) + */ + if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageOffline(page)) + return false; + + if (PageLRU(page) || page_has_movable_ops(page)) + return false; + + /* + * If there are RECLAIMABLE pages, we need to check + * it. But now, memory offline itself doesn't call + * shrink_node_slabs() and it still to be fixed. + */ + return true; +} + /* * This function checks whether the range [start_pfn, end_pfn) includes * unmovable pages or not. The range must fall into a single pageblock and @@ -35,7 +129,6 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e { struct page *page = pfn_to_page(start_pfn); struct zone *zone = page_zone(page); - unsigned long pfn; VM_BUG_ON(pageblock_start_pfn(start_pfn) != pageblock_start_pfn(end_pfn - 1)); @@ -52,96 +145,14 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e return page; } - for (pfn = start_pfn; pfn < end_pfn; pfn++) { - page = pfn_to_page(pfn); + while (start_pfn < end_pfn) { + unsigned long step = 1; - /* - * Both, bootmem allocations and memory holes are marked - * PG_reserved and are unmovable. We can even have unmovable - * allocations inside ZONE_MOVABLE, for example when - * specifying "movablecore". - */ - if (PageReserved(page)) + page = pfn_to_page(start_pfn); + if (page_is_unmovable(zone, page, mode, &step)) return page; - /* - * If the zone is movable and we have ruled out all reserved - * pages then it should be reasonably safe to assume the rest - * is movable. - */ - if (zone_idx(zone) == ZONE_MOVABLE) - continue; - - /* - * Hugepages are not in LRU lists, but they're movable. - * THPs are on the LRU, but need to be counted as #small pages. - * We need not scan over tail pages because we don't - * handle each tail page individually in migration. - */ - if (PageHuge(page) || PageTransCompound(page)) { - struct folio *folio = page_folio(page); - unsigned int skip_pages; - - if (PageHuge(page)) { - struct hstate *h; - - /* - * The huge page may be freed so can not - * use folio_hstate() directly. - */ - h = size_to_hstate(folio_size(folio)); - if (h && !hugepage_migration_supported(h)) - return page; - } else if (!folio_test_lru(folio)) { - return page; - } - - skip_pages = folio_nr_pages(folio) - folio_page_idx(folio, page); - pfn += skip_pages - 1; - continue; - } - - /* - * We can't use page_count without pin a page - * because another CPU can free compound page. - * This check already skips compound tails of THP - * because their page->_refcount is zero at all time. - */ - if (!page_ref_count(page)) { - if (PageBuddy(page)) - pfn += (1 << buddy_order(page)) - 1; - continue; - } - - /* - * The HWPoisoned page may be not in buddy system, and - * page_count() is not 0. - */ - if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageHWPoison(page)) - continue; - - /* - * We treat all PageOffline() pages as movable when offlining - * to give drivers a chance to decrement their reference count - * in MEM_GOING_OFFLINE in order to indicate that these pages - * can be offlined as there are no direct references anymore. - * For actually unmovable PageOffline() where the driver does - * not support this, we will fail later when trying to actually - * move these pages that still have a reference count > 0. - * (false negatives in this function only) - */ - if ((mode == PB_ISOLATE_MODE_MEM_OFFLINE) && PageOffline(page)) - continue; - - if (PageLRU(page) || page_has_movable_ops(page)) - continue; - - /* - * If there are RECLAIMABLE pages, we need to check - * it. But now, memory offline itself doesn't call - * shrink_node_slabs() and it still to be fixed. - */ - return page; + start_pfn += step; } return NULL; } -- 2.47.3