From 2a1aed42a274c3bc9ea24a12b674ed15be6bd3d1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 2 Jul 2024 11:56:53 +0200 Subject: [PATCH] 6.6-stable patches added patches: mm-page_alloc-separate-thp-pcp-into-movable-and-non-movable-categories.patch --- ...o-movable-and-non-movable-categories.patch | 109 ++++++++++++++++++ queue-6.6/series | 1 + 2 files changed, 110 insertions(+) create mode 100644 queue-6.6/mm-page_alloc-separate-thp-pcp-into-movable-and-non-movable-categories.patch diff --git a/queue-6.6/mm-page_alloc-separate-thp-pcp-into-movable-and-non-movable-categories.patch b/queue-6.6/mm-page_alloc-separate-thp-pcp-into-movable-and-non-movable-categories.patch new file mode 100644 index 00000000000..ef7bda1d52c --- /dev/null +++ b/queue-6.6/mm-page_alloc-separate-thp-pcp-into-movable-and-non-movable-categories.patch @@ -0,0 +1,109 @@ +From bf14ed81f571f8dba31cd72ab2e50fbcc877cc31 Mon Sep 17 00:00:00 2001 +From: yangge +Date: Thu, 20 Jun 2024 08:59:50 +0800 +Subject: mm/page_alloc: Separate THP PCP into movable and non-movable categories + +From: yangge + +commit bf14ed81f571f8dba31cd72ab2e50fbcc877cc31 upstream. + +Since commit 5d0a661d808f ("mm/page_alloc: use only one PCP list for +THP-sized allocations") no longer differentiates the migration type of +pages in THP-sized PCP list, it's possible that non-movable allocation +requests may get a CMA page from the list, in some cases, it's not +acceptable. + +If a large number of CMA memory are configured in system (for example, the +CMA memory accounts for 50% of the system memory), starting a virtual +machine with device passthrough will get stuck. During starting the +virtual machine, it will call pin_user_pages_remote(..., FOLL_LONGTERM, +...) to pin memory. Normally if a page is present and in CMA area, +pin_user_pages_remote() will migrate the page from CMA area to non-CMA +area because of FOLL_LONGTERM flag. But if non-movable allocation +requests return CMA memory, migrate_longterm_unpinnable_pages() will +migrate a CMA page to another CMA page, which will fail to pass the check +in check_and_migrate_movable_pages() and cause migration endless. + +Call trace: +pin_user_pages_remote +--__gup_longterm_locked // endless loops in this function +----_get_user_pages_locked +----check_and_migrate_movable_pages +------migrate_longterm_unpinnable_pages +--------alloc_migration_target + +This problem will also have a negative impact on CMA itself. For example, +when CMA is borrowed by THP, and we need to reclaim it through cma_alloc() +or dma_alloc_coherent(), we must move those pages out to ensure CMA's +users can retrieve that contigous memory. Currently, CMA's memory is +occupied by non-movable pages, meaning we can't relocate them. As a +result, cma_alloc() is more likely to fail. + +To fix the problem above, we add one PCP list for THP, which will not +introduce a new cacheline for struct per_cpu_pages. THP will have 2 PCP +lists, one PCP list is used by MOVABLE allocation, and the other PCP list +is used by UNMOVABLE allocation. MOVABLE allocation contains GPF_MOVABLE, +and UNMOVABLE allocation contains GFP_UNMOVABLE and GFP_RECLAIMABLE. + +Link: https://lkml.kernel.org/r/1718845190-4456-1-git-send-email-yangge1116@126.com +Fixes: 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations") +Signed-off-by: yangge +Cc: Baolin Wang +Cc: Barry Song <21cnbao@gmail.com> +Cc: Mel Gorman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mmzone.h | 9 ++++----- + mm/page_alloc.c | 9 +++++++-- + 2 files changed, 11 insertions(+), 7 deletions(-) + +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -664,13 +664,12 @@ enum zone_watermarks { + }; + + /* +- * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list +- * for THP which will usually be GFP_MOVABLE. Even if it is another type, +- * it should not contribute to serious fragmentation causing THP allocation +- * failures. ++ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists ++ * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list ++ * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE. + */ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +-#define NR_PCP_THP 1 ++#define NR_PCP_THP 2 + #else + #define NR_PCP_THP 0 + #endif +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -519,10 +519,15 @@ out: + + static inline unsigned int order_to_pindex(int migratetype, int order) + { ++ bool __maybe_unused movable; ++ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (order > PAGE_ALLOC_COSTLY_ORDER) { + VM_BUG_ON(order != pageblock_order); +- return NR_LOWORDER_PCP_LISTS; ++ ++ movable = migratetype == MIGRATE_MOVABLE; ++ ++ return NR_LOWORDER_PCP_LISTS + movable; + } + #else + VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); +@@ -536,7 +541,7 @@ static inline int pindex_to_order(unsign + int order = pindex / MIGRATE_PCPTYPES; + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE +- if (pindex == NR_LOWORDER_PCP_LISTS) ++ if (pindex >= NR_LOWORDER_PCP_LISTS) + order = pageblock_order; + #else + VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); diff --git a/queue-6.6/series b/queue-6.6/series index d2cc9b2fc67..2ddd7642190 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -150,3 +150,4 @@ syscalls-fix-sys_fanotify_mark-prototype.patch erofs-fix-null-dereference-of-dif-bdev_handle-in-fscache-mode.patch pwm-stm32-refuse-too-small-period-requests.patch revert-cpufreq-amd-pstate-fix-the-inconsistency-in-max-frequency-units.patch +mm-page_alloc-separate-thp-pcp-into-movable-and-non-movable-categories.patch -- 2.47.3