From: Sasha Levin Date: Wed, 6 Nov 2024 01:54:06 +0000 (-0500) Subject: Fixes for 5.15 X-Git-Tag: v4.19.323~56 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c1bd90769bd234d86d7bff74e280a1df8dad2990;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch b/queue-5.15/mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch new file mode 100644 index 00000000000..c7cc9cf8a1d --- /dev/null +++ b/queue-5.15/mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch @@ -0,0 +1,80 @@ +From 8a6a5f3d06c606cfc62400eac84532a110cd2844 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Mar 2022 14:43:57 -0700 +Subject: mm/page_alloc: call check_new_pages() while zone spinlock is not held + +From: Eric Dumazet + +[ Upstream commit 3313204c8ad553cf93f1ee8cc89456c73a7df938 ] + +For high order pages not using pcp, rmqueue() is currently calling the +costly check_new_pages() while zone spinlock is held, and hard irqs +masked. + +This is not needed, we can release the spinlock sooner to reduce zone +spinlock contention. + +Note that after this patch, we call __mod_zone_freepage_state() before +deciding to leak the page because it is in bad state. + +Link: https://lkml.kernel.org/r/20220304170215.1868106-1-eric.dumazet@gmail.com +Signed-off-by: Eric Dumazet +Reviewed-by: Shakeel Butt +Acked-by: David Rientjes +Acked-by: Mel Gorman +Reviewed-by: Vlastimil Babka +Cc: Michal Hocko +Cc: Wei Xu +Cc: Greg Thelen +Cc: Hugh Dickins +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 264efa022fa96..474150584ba48 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3698,10 +3698,10 @@ struct page *rmqueue(struct zone *preferred_zone, + * allocate greater than order-1 page units with __GFP_NOFAIL. + */ + WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); +- spin_lock_irqsave(&zone->lock, flags); + + do { + page = NULL; ++ spin_lock_irqsave(&zone->lock, flags); + /* + * order-0 request can reach here when the pcplist is skipped + * due to non-CMA allocation context. HIGHATOMIC area is +@@ -3713,15 +3713,15 @@ struct page *rmqueue(struct zone *preferred_zone, + if (page) + trace_mm_page_alloc_zone_locked(page, order, migratetype); + } +- if (!page) ++ if (!page) { + page = __rmqueue(zone, order, migratetype, alloc_flags); +- } while (page && check_new_pages(page, order)); +- if (!page) +- goto failed; +- +- __mod_zone_freepage_state(zone, -(1 << order), +- get_pcppage_migratetype(page)); +- spin_unlock_irqrestore(&zone->lock, flags); ++ if (!page) ++ goto failed; ++ } ++ __mod_zone_freepage_state(zone, -(1 << order), ++ get_pcppage_migratetype(page)); ++ spin_unlock_irqrestore(&zone->lock, flags); ++ } while (check_new_pages(page, order)); + + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); + zone_statistics(preferred_zone, zone, 1); +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch b/queue-5.15/mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch new file mode 100644 index 00000000000..fd434448d33 --- /dev/null +++ b/queue-5.15/mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch @@ -0,0 +1,151 @@ +From bda5b38029b455b098258a2defce2740d878718c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Jan 2023 11:12:16 +0000 +Subject: mm/page_alloc: explicitly define how __GFP_HIGH non-blocking + allocations accesses reserves + +From: Mel Gorman + +[ Upstream commit 1ebbb21811b76c3b932959787f37985af36f62fa ] + +GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague +description. In preparation for the removal of GFP_ATOMIC redefine +__GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to +ALLOC_NON_BLOCK accordingly. __GFP_HIGH is required for access to +reserves but non-blocking is granted more access. For example, GFP_NOWAIT +is non-blocking but has no special access to reserves. A __GFP_NOFAIL +blocking allocation is granted access similar to __GFP_HIGH if the only +alternative is an OOM kill. + +Link: https://lkml.kernel.org/r/20230113111217.14134-6-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Acked-by: Michal Hocko +Cc: Matthew Wilcox +Cc: NeilBrown +Cc: Thierry Reding +Cc: Vlastimil Babka +Signed-off-by: Andrew Morton +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + mm/internal.h | 7 +++++-- + mm/page_alloc.c | 44 ++++++++++++++++++++++++-------------------- + 2 files changed, 29 insertions(+), 22 deletions(-) + +diff --git a/mm/internal.h b/mm/internal.h +index 717e75313693c..cd444aa7a10af 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -592,7 +592,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + #define ALLOC_OOM ALLOC_NO_WATERMARKS + #endif + +-#define ALLOC_HARDER 0x10 /* try to alloc harder */ ++#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access ++ * to 25% of the min watermark or ++ * 62.5% if __GFP_HIGH is set. ++ */ + #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% + * of the min watermark. + */ +@@ -607,7 +610,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ + + /* Flags that allow allocations below the min watermark. */ +-#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) ++#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) + + enum ttu_flags; + struct tlbflush_unmap_batch; +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 7778c2b11d8cb..404cee30dcc26 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3878,18 +3878,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + * __GFP_HIGH allows access to 50% of the min reserve as well + * as OOM. + */ +- if (alloc_flags & ALLOC_MIN_RESERVE) ++ if (alloc_flags & ALLOC_MIN_RESERVE) { + min -= min / 2; + +- /* +- * Non-blocking allocations can access some of the reserve +- * with more access if also __GFP_HIGH. The reasoning is that +- * a non-blocking caller may incur a more severe penalty +- * if it cannot get memory quickly, particularly if it's +- * also __GFP_HIGH. +- */ +- if (alloc_flags & ALLOC_HARDER) +- min -= min / 4; ++ /* ++ * Non-blocking allocations (e.g. GFP_ATOMIC) can ++ * access more reserves than just __GFP_HIGH. Other ++ * non-blocking allocations requests such as GFP_NOWAIT ++ * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get ++ * access to the min reserve. ++ */ ++ if (alloc_flags & ALLOC_NON_BLOCK) ++ min -= min / 4; ++ } + + /* + * OOM victims can try even harder than the normal reserve +@@ -4729,28 +4730,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) + * The caller may dip into page reserves a bit more if the caller + * cannot run direct reclaim, or if the caller has realtime scheduling + * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will +- * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH). ++ * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH). + */ + alloc_flags |= (__force int) + (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM)); + +- if (gfp_mask & __GFP_ATOMIC) { ++ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) { + /* + * Not worth trying to allocate harder for __GFP_NOMEMALLOC even + * if it can't schedule. + */ + if (!(gfp_mask & __GFP_NOMEMALLOC)) { +- alloc_flags |= ALLOC_HARDER; ++ alloc_flags |= ALLOC_NON_BLOCK; + + if (order > 0) + alloc_flags |= ALLOC_HIGHATOMIC; + } + + /* +- * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the +- * comment for __cpuset_node_allowed(). ++ * Ignore cpuset mems for non-blocking __GFP_HIGH (probably ++ * GFP_ATOMIC) rather than fail, see the comment for ++ * __cpuset_node_allowed(). + */ +- alloc_flags &= ~ALLOC_CPUSET; ++ if (alloc_flags & ALLOC_MIN_RESERVE) ++ alloc_flags &= ~ALLOC_CPUSET; + } else if (unlikely(rt_task(current)) && in_task()) + alloc_flags |= ALLOC_MIN_RESERVE; + +@@ -5188,12 +5191,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, + WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER); + + /* +- * Help non-failing allocations by giving them access to memory +- * reserves but do not use ALLOC_NO_WATERMARKS because this ++ * Help non-failing allocations by giving some access to memory ++ * reserves normally used for high priority non-blocking ++ * allocations but do not use ALLOC_NO_WATERMARKS because this + * could deplete whole memory reserves which would just make +- * the situation worse ++ * the situation worse. + */ +- page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); ++ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac); + if (page) + goto got_pg; + +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch b/queue-5.15/mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch new file mode 100644 index 00000000000..2463171928b --- /dev/null +++ b/queue-5.15/mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch @@ -0,0 +1,113 @@ +From 30e94d83752cef0e1a750f02655ac56ec3dec6f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Jan 2023 11:12:15 +0000 +Subject: mm/page_alloc: explicitly define what alloc flags deplete min + reserves + +From: Mel Gorman + +[ Upstream commit ab3508854353793cd35e348fde89a5c09b2fd8b5 ] + +As there are more ALLOC_ flags that affect reserves, define what flags +affect reserves and clarify the effect of each flag. + +Link: https://lkml.kernel.org/r/20230113111217.14134-5-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Acked-by: Vlastimil Babka +Acked-by: Michal Hocko +Cc: Matthew Wilcox +Cc: NeilBrown +Cc: Thierry Reding +Signed-off-by: Andrew Morton +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + mm/internal.h | 3 +++ + mm/page_alloc.c | 34 ++++++++++++++++++++++------------ + 2 files changed, 25 insertions(+), 12 deletions(-) + +diff --git a/mm/internal.h b/mm/internal.h +index 136f435e0f1ab..717e75313693c 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -606,6 +606,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ + #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ + ++/* Flags that allow allocations below the min watermark. */ ++#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM) ++ + enum ttu_flags; + struct tlbflush_unmap_batch; + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 43122de999c4c..7778c2b11d8cb 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3838,15 +3838,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); + static inline long __zone_watermark_unusable_free(struct zone *z, + unsigned int order, unsigned int alloc_flags) + { +- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); + long unusable_free = (1 << order) - 1; + + /* +- * If the caller does not have rights to ALLOC_HARDER then subtract +- * the high-atomic reserves. This will over-estimate the size of the +- * atomic reserve but it avoids a search. ++ * If the caller does not have rights to reserves below the min ++ * watermark then subtract the high-atomic reserves. This will ++ * over-estimate the size of the atomic reserve but it avoids a search. + */ +- if (likely(!alloc_harder)) ++ if (likely(!(alloc_flags & ALLOC_RESERVES))) + unusable_free += z->nr_reserved_highatomic; + + #ifdef CONFIG_CMA +@@ -3870,25 +3869,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + { + long min = mark; + int o; +- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM)); + + /* free_pages may go negative - that's OK */ + free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); + +- if (alloc_flags & ALLOC_MIN_RESERVE) +- min -= min / 2; ++ if (unlikely(alloc_flags & ALLOC_RESERVES)) { ++ /* ++ * __GFP_HIGH allows access to 50% of the min reserve as well ++ * as OOM. ++ */ ++ if (alloc_flags & ALLOC_MIN_RESERVE) ++ min -= min / 2; + +- if (unlikely(alloc_harder)) { + /* +- * OOM victims can try even harder than normal ALLOC_HARDER ++ * Non-blocking allocations can access some of the reserve ++ * with more access if also __GFP_HIGH. The reasoning is that ++ * a non-blocking caller may incur a more severe penalty ++ * if it cannot get memory quickly, particularly if it's ++ * also __GFP_HIGH. ++ */ ++ if (alloc_flags & ALLOC_HARDER) ++ min -= min / 4; ++ ++ /* ++ * OOM victims can try even harder than the normal reserve + * users on the grounds that it's definitely going to be in + * the exit path shortly and free memory. Any allocation it + * makes during the free path will be small and short-lived. + */ + if (alloc_flags & ALLOC_OOM) + min -= min / 2; +- else +- min -= min / 4; + } + + /* +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-explicitly-record-high-order-atomic-al.patch b/queue-5.15/mm-page_alloc-explicitly-record-high-order-atomic-al.patch new file mode 100644 index 00000000000..44c45cad858 --- /dev/null +++ b/queue-5.15/mm-page_alloc-explicitly-record-high-order-atomic-al.patch @@ -0,0 +1,124 @@ +From 033cc55389ad1443530b5b69b70d07760aee696e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Jan 2023 11:12:14 +0000 +Subject: mm/page_alloc: explicitly record high-order atomic allocations in + alloc_flags + +From: Mel Gorman + +[ Upstream commit eb2e2b425c6984ca8034448a3f2c680622bd3d4d ] + +A high-order ALLOC_HARDER allocation is assumed to be atomic. While that +is accurate, it changes later in the series. In preparation, explicitly +record high-order atomic allocations in gfp_to_alloc_flags(). + +Link: https://lkml.kernel.org/r/20230113111217.14134-4-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Acked-by: Vlastimil Babka +Acked-by: Michal Hocko +Cc: Matthew Wilcox +Cc: NeilBrown +Cc: Thierry Reding +Signed-off-by: Andrew Morton +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + mm/internal.h | 1 + + mm/page_alloc.c | 29 +++++++++++++++++++++++------ + 2 files changed, 24 insertions(+), 6 deletions(-) + +diff --git a/mm/internal.h b/mm/internal.h +index e6c96327b5855..136f435e0f1ab 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -603,6 +603,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + #else + #define ALLOC_NOFRAGMENT 0x0 + #endif ++#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */ + #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */ + + enum ttu_flags; +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 72835cf4034bc..43122de999c4c 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3614,10 +3614,20 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, + * reserved for high-order atomic allocation, so order-0 + * request should skip it. + */ +- if (order > 0 && alloc_flags & ALLOC_HARDER) ++ if (alloc_flags & ALLOC_HIGHATOMIC) + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); + if (!page) { + page = __rmqueue(zone, order, migratetype, alloc_flags); ++ ++ /* ++ * If the allocation fails, allow OOM handling access ++ * to HIGHATOMIC reserves as failing now is worse than ++ * failing a high-order atomic allocation in the ++ * future. ++ */ ++ if (!page && (alloc_flags & ALLOC_OOM)) ++ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); ++ + if (!page) { + spin_unlock_irqrestore(&zone->lock, flags); + return NULL; +@@ -3912,8 +3922,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + return true; + } + #endif +- if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC)) ++ if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) && ++ !free_area_empty(area, MIGRATE_HIGHATOMIC)) { + return true; ++ } + } + return false; + } +@@ -4172,7 +4184,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, + * If this is a high-order atomic allocation then check + * if the pageblock should be reserved for the future + */ +- if (unlikely(order && (alloc_flags & ALLOC_HARDER))) ++ if (unlikely(alloc_flags & ALLOC_HIGHATOMIC)) + reserve_highatomic_pageblock(page, zone, order); + + return page; +@@ -4691,7 +4703,7 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask, + } + + static inline unsigned int +-gfp_to_alloc_flags(gfp_t gfp_mask) ++gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) + { + unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; + +@@ -4717,8 +4729,13 @@ gfp_to_alloc_flags(gfp_t gfp_mask) + * Not worth trying to allocate harder for __GFP_NOMEMALLOC even + * if it can't schedule. + */ +- if (!(gfp_mask & __GFP_NOMEMALLOC)) ++ if (!(gfp_mask & __GFP_NOMEMALLOC)) { + alloc_flags |= ALLOC_HARDER; ++ ++ if (order > 0) ++ alloc_flags |= ALLOC_HIGHATOMIC; ++ } ++ + /* + * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the + * comment for __cpuset_node_allowed(). +@@ -4946,7 +4963,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, + * kswapd needs to be woken up, and to avoid the cost of setting up + * alloc_flags precisely. So we do that now. + */ +- alloc_flags = gfp_to_alloc_flags(gfp_mask); ++ alloc_flags = gfp_to_alloc_flags(gfp_mask, order); + + /* + * We need to recalculate the starting point for the zonelist iterator +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch b/queue-5.15/mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch new file mode 100644 index 00000000000..0c3b5474a53 --- /dev/null +++ b/queue-5.15/mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch @@ -0,0 +1,141 @@ +From b8c131116b907997152c4662e209487719e4d4ac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 May 2022 14:08:54 -0700 +Subject: mm/page_alloc: fix tracepoint mm_page_alloc_zone_locked() + +From: Wonhyuk Yang + +[ Upstream commit 10e0f7530205799e7e971aba699a7cb3a47456de ] + +Currently, trace point mm_page_alloc_zone_locked() doesn't show correct +information. + +First, when alloc_flag has ALLOC_HARDER/ALLOC_CMA, page can be allocated +from MIGRATE_HIGHATOMIC/MIGRATE_CMA. Nevertheless, tracepoint use +requested migration type not MIGRATE_HIGHATOMIC and MIGRATE_CMA. + +Second, after commit 44042b4498728 ("mm/page_alloc: allow high-order pages +to be stored on the per-cpu lists") percpu-list can store high order +pages. But trace point determine whether it is a refiil of percpu-list by +comparing requested order and 0. + +To handle these problems, make mm_page_alloc_zone_locked() only be called +by __rmqueue_smallest with correct migration type. With a new argument +called percpu_refill, it can show roughly whether it is a refill of +percpu-list. + +Link: https://lkml.kernel.org/r/20220512025307.57924-1-vvghjk1234@gmail.com +Signed-off-by: Wonhyuk Yang +Acked-by: Mel Gorman +Cc: Baik Song An +Cc: Hong Yeon Kim +Cc: Taeung Song +Cc: +Cc: Steven Rostedt +Cc: Ingo Molnar +Signed-off-by: Andrew Morton +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + include/trace/events/kmem.h | 14 +++++++++----- + mm/page_alloc.c | 13 +++++-------- + 2 files changed, 14 insertions(+), 13 deletions(-) + +diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h +index ddc8c944f417a..f89fb3afcd46a 100644 +--- a/include/trace/events/kmem.h ++++ b/include/trace/events/kmem.h +@@ -229,20 +229,23 @@ TRACE_EVENT(mm_page_alloc, + + DECLARE_EVENT_CLASS(mm_page, + +- TP_PROTO(struct page *page, unsigned int order, int migratetype), ++ TP_PROTO(struct page *page, unsigned int order, int migratetype, ++ int percpu_refill), + +- TP_ARGS(page, order, migratetype), ++ TP_ARGS(page, order, migratetype, percpu_refill), + + TP_STRUCT__entry( + __field( unsigned long, pfn ) + __field( unsigned int, order ) + __field( int, migratetype ) ++ __field( int, percpu_refill ) + ), + + TP_fast_assign( + __entry->pfn = page ? page_to_pfn(page) : -1UL; + __entry->order = order; + __entry->migratetype = migratetype; ++ __entry->percpu_refill = percpu_refill; + ), + + TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d", +@@ -250,14 +253,15 @@ DECLARE_EVENT_CLASS(mm_page, + __entry->pfn != -1UL ? __entry->pfn : 0, + __entry->order, + __entry->migratetype, +- __entry->order == 0) ++ __entry->percpu_refill) + ); + + DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, + +- TP_PROTO(struct page *page, unsigned int order, int migratetype), ++ TP_PROTO(struct page *page, unsigned int order, int migratetype, ++ int percpu_refill), + +- TP_ARGS(page, order, migratetype) ++ TP_ARGS(page, order, migratetype, percpu_refill) + ); + + TRACE_EVENT(mm_page_pcpu_drain, +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 474150584ba48..264cb1914ab5b 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2461,6 +2461,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, + del_page_from_free_list(page, zone, current_order); + expand(zone, page, order, current_order, migratetype); + set_pcppage_migratetype(page, migratetype); ++ trace_mm_page_alloc_zone_locked(page, order, migratetype, ++ pcp_allowed_order(order) && ++ migratetype < MIGRATE_PCPTYPES); + return page; + } + +@@ -2988,7 +2991,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, + zone_page_state(zone, NR_FREE_PAGES) / 2) { + page = __rmqueue_cma_fallback(zone, order); + if (page) +- goto out; ++ return page; + } + } + retry: +@@ -3001,9 +3004,6 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, + alloc_flags)) + goto retry; + } +-out: +- if (page) +- trace_mm_page_alloc_zone_locked(page, order, migratetype); + return page; + } + +@@ -3708,11 +3708,8 @@ struct page *rmqueue(struct zone *preferred_zone, + * reserved for high-order atomic allocation, so order-0 + * request should skip it. + */ +- if (order > 0 && alloc_flags & ALLOC_HARDER) { ++ if (order > 0 && alloc_flags & ALLOC_HARDER) + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); +- if (page) +- trace_mm_page_alloc_zone_locked(page, order, migratetype); +- } + if (!page) { + page = __rmqueue(zone, order, migratetype, alloc_flags); + if (!page) +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch b/queue-5.15/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch new file mode 100644 index 00000000000..fdf33a67fac --- /dev/null +++ b/queue-5.15/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch @@ -0,0 +1,88 @@ +From 685658478127e0b3fa3ea017a891f5f30f55011b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Oct 2024 13:07:37 +0100 +Subject: mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic + reserves + +From: Matt Fleming + +[ Upstream commit 281dd25c1a018261a04d1b8bf41a0674000bfe38 ] + +Under memory pressure it's possible for GFP_ATOMIC order-0 allocations to +fail even though free pages are available in the highatomic reserves. +GFP_ATOMIC allocations cannot trigger unreserve_highatomic_pageblock() +since it's only run from reclaim. + +Given that such allocations will pass the watermarks in +__zone_watermark_unusable_free(), it makes sense to fallback to highatomic +reserves the same way that ALLOC_OOM can. + +This fixes order-0 page allocation failures observed on Cloudflare's fleet +when handling network packets: + + kswapd1: page allocation failure: order:0, mode:0x820(GFP_ATOMIC), + nodemask=(null),cpuset=/,mems_allowed=0-7 + CPU: 10 PID: 696 Comm: kswapd1 Kdump: loaded Tainted: G O 6.6.43-CUSTOM #1 + Hardware name: MACHINE + Call Trace: + + dump_stack_lvl+0x3c/0x50 + warn_alloc+0x13a/0x1c0 + __alloc_pages_slowpath.constprop.0+0xc9d/0xd10 + __alloc_pages+0x327/0x340 + __napi_alloc_skb+0x16d/0x1f0 + bnxt_rx_page_skb+0x96/0x1b0 [bnxt_en] + bnxt_rx_pkt+0x201/0x15e0 [bnxt_en] + __bnxt_poll_work+0x156/0x2b0 [bnxt_en] + bnxt_poll+0xd9/0x1c0 [bnxt_en] + __napi_poll+0x2b/0x1b0 + bpf_trampoline_6442524138+0x7d/0x1000 + __napi_poll+0x5/0x1b0 + net_rx_action+0x342/0x740 + handle_softirqs+0xcf/0x2b0 + irq_exit_rcu+0x6c/0x90 + sysvec_apic_timer_interrupt+0x72/0x90 + + +[mfleming@cloudflare.com: update comment] + Link: https://lkml.kernel.org/r/20241015125158.3597702-1-matt@readmodwrite.com +Link: https://lkml.kernel.org/r/20241011120737.3300370-1-matt@readmodwrite.com +Link: https://lore.kernel.org/all/CAGis_TWzSu=P7QJmjD58WWiu3zjMTVKSzdOwWE8ORaGytzWJwQ@mail.gmail.com/ +Fixes: 1d91df85f399 ("mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs") +Signed-off-by: Matt Fleming +Suggested-by: Vlastimil Babka +Reviewed-by: Vlastimil Babka +Cc: Mel Gorman +Cc: Michal Hocko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 404cee30dcc26..6a64a75184888 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3620,12 +3620,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, + page = __rmqueue(zone, order, migratetype, alloc_flags); + + /* +- * If the allocation fails, allow OOM handling access +- * to HIGHATOMIC reserves as failing now is worse than +- * failing a high-order atomic allocation in the +- * future. ++ * If the allocation fails, allow OOM handling and ++ * order-0 (atomic) allocs access to HIGHATOMIC ++ * reserves as failing now is worse than failing a ++ * high-order atomic allocation in the future. + */ +- if (!page && (alloc_flags & ALLOC_OOM)) ++ if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK))) + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); + + if (!page) { +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch b/queue-5.15/mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch new file mode 100644 index 00000000000..d0f82dd3a79 --- /dev/null +++ b/queue-5.15/mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch @@ -0,0 +1,113 @@ +From 1331502c1782067490c126cbac49572ca69cd467 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Jan 2023 11:12:12 +0000 +Subject: mm/page_alloc: rename ALLOC_HIGH to ALLOC_MIN_RESERVE + +From: Mel Gorman + +[ Upstream commit 524c48072e5673f4511f1ad81493e2485863fd65 ] + +Patch series "Discard __GFP_ATOMIC", v3. + +Neil's patch has been residing in mm-unstable as commit 2fafb4fe8f7a ("mm: +discard __GFP_ATOMIC") for a long time and recently brought up again. +Most recently, I was worried that __GFP_HIGH allocations could use +high-order atomic reserves which is unintentional but there was no +response so lets revisit -- this series reworks how min reserves are used, +protects highorder reserves and then finishes with Neil's patch with very +minor modifications so it fits on top. + +There was a review discussion on renaming __GFP_DIRECT_RECLAIM to +__GFP_ALLOW_BLOCKING but I didn't think it was that big an issue and is +orthogonal to the removal of __GFP_ATOMIC. + +There were some concerns about how the gfp flags affect the min reserves +but it never reached a solid conclusion so I made my own attempt. + +The series tries to iron out some of the details on how reserves are used. +ALLOC_HIGH becomes ALLOC_MIN_RESERVE and ALLOC_HARDER becomes +ALLOC_NON_BLOCK and documents how the reserves are affected. For example, +ALLOC_NON_BLOCK (no direct reclaim) on its own allows 25% of the min +reserve. ALLOC_MIN_RESERVE (__GFP_HIGH) allows 50% and both combined +allows deeper access again. ALLOC_OOM allows access to 75%. + +High-order atomic allocations are explicitly handled with the caveat that +no __GFP_ATOMIC flag means that any high-order allocation that specifies +GFP_HIGH and cannot enter direct reclaim will be treated as if it was +GFP_ATOMIC. + +This patch (of 6): + +__GFP_HIGH aliases to ALLOC_HIGH but the name does not really hint what it +means. As ALLOC_HIGH is internal to the allocator, rename it to +ALLOC_MIN_RESERVE to document that the min reserves can be depleted. + +Link: https://lkml.kernel.org/r/20230113111217.14134-1-mgorman@techsingularity.net +Link: https://lkml.kernel.org/r/20230113111217.14134-2-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Acked-by: Vlastimil Babka +Acked-by: Michal Hocko +Cc: Matthew Wilcox +Cc: NeilBrown +Cc: Thierry Reding +Signed-off-by: Andrew Morton +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + mm/internal.h | 4 +++- + mm/page_alloc.c | 8 ++++---- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/mm/internal.h b/mm/internal.h +index cf3cb933eba3f..e6c96327b5855 100644 +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -593,7 +593,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + #endif + + #define ALLOC_HARDER 0x10 /* try to alloc harder */ +-#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ ++#define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50% ++ * of the min watermark. ++ */ + #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ + #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ + #ifdef CONFIG_ZONE_DMA32 +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index ae628574dc9fc..4e9e9cb98f336 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3865,7 +3865,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, + /* free_pages may go negative - that's OK */ + free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags); + +- if (alloc_flags & ALLOC_HIGH) ++ if (alloc_flags & ALLOC_MIN_RESERVE) + min -= min / 2; + + if (unlikely(alloc_harder)) { +@@ -4696,18 +4696,18 @@ gfp_to_alloc_flags(gfp_t gfp_mask) + unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; + + /* +- * __GFP_HIGH is assumed to be the same as ALLOC_HIGH ++ * __GFP_HIGH is assumed to be the same as ALLOC_MIN_RESERVE + * and __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD + * to save two branches. + */ +- BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); ++ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_MIN_RESERVE); + BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD); + + /* + * The caller may dip into page reserves a bit more if the caller + * cannot run direct reclaim, or if the caller has realtime scheduling + * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will +- * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH). ++ * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH). + */ + alloc_flags |= (__force int) + (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM)); +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch b/queue-5.15/mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch new file mode 100644 index 00000000000..f08ffefae92 --- /dev/null +++ b/queue-5.15/mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch @@ -0,0 +1,151 @@ +From fc6b92e69f211e4a3637ec0d4b8a328496c067d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Jun 2022 13:54:19 +0100 +Subject: mm/page_alloc: split out buddy removal code from rmqueue into + separate helper + +From: Mel Gorman + +[ Upstream commit 589d9973c1d2c3344a94a57441071340b0c71097 ] + +This is a preparation page to allow the buddy removal code to be reused in +a later patch. + +No functional change. + +Link: https://lkml.kernel.org/r/20220624125423.6126-4-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Tested-by: Minchan Kim +Acked-by: Minchan Kim +Reviewed-by: Nicolas Saenz Julienne +Acked-by: Vlastimil Babka +Tested-by: Yu Zhao +Cc: Hugh Dickins +Cc: Marcelo Tosatti +Cc: Marek Szyprowski +Cc: Michal Hocko +Signed-off-by: Andrew Morton +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 81 ++++++++++++++++++++++++++++--------------------- + 1 file changed, 47 insertions(+), 34 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 264cb1914ab5b..ae628574dc9fc 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3597,6 +3597,43 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z, + #endif + } + ++static __always_inline ++struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, ++ unsigned int order, unsigned int alloc_flags, ++ int migratetype) ++{ ++ struct page *page; ++ unsigned long flags; ++ ++ do { ++ page = NULL; ++ spin_lock_irqsave(&zone->lock, flags); ++ /* ++ * order-0 request can reach here when the pcplist is skipped ++ * due to non-CMA allocation context. HIGHATOMIC area is ++ * reserved for high-order atomic allocation, so order-0 ++ * request should skip it. ++ */ ++ if (order > 0 && alloc_flags & ALLOC_HARDER) ++ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); ++ if (!page) { ++ page = __rmqueue(zone, order, migratetype, alloc_flags); ++ if (!page) { ++ spin_unlock_irqrestore(&zone->lock, flags); ++ return NULL; ++ } ++ } ++ __mod_zone_freepage_state(zone, -(1 << order), ++ get_pcppage_migratetype(page)); ++ spin_unlock_irqrestore(&zone->lock, flags); ++ } while (check_new_pages(page, order)); ++ ++ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); ++ zone_statistics(preferred_zone, zone, 1); ++ ++ return page; ++} ++ + /* Remove page from the per-cpu list, caller must protect the list */ + static inline + struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order, +@@ -3677,9 +3714,14 @@ struct page *rmqueue(struct zone *preferred_zone, + gfp_t gfp_flags, unsigned int alloc_flags, + int migratetype) + { +- unsigned long flags; + struct page *page; + ++ /* ++ * We most definitely don't want callers attempting to ++ * allocate greater than order-1 page units with __GFP_NOFAIL. ++ */ ++ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); ++ + if (likely(pcp_allowed_order(order))) { + /* + * MIGRATE_MOVABLE pcplist could have the pages on CMA area and +@@ -3693,35 +3735,10 @@ struct page *rmqueue(struct zone *preferred_zone, + } + } + +- /* +- * We most definitely don't want callers attempting to +- * allocate greater than order-1 page units with __GFP_NOFAIL. +- */ +- WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); +- +- do { +- page = NULL; +- spin_lock_irqsave(&zone->lock, flags); +- /* +- * order-0 request can reach here when the pcplist is skipped +- * due to non-CMA allocation context. HIGHATOMIC area is +- * reserved for high-order atomic allocation, so order-0 +- * request should skip it. +- */ +- if (order > 0 && alloc_flags & ALLOC_HARDER) +- page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); +- if (!page) { +- page = __rmqueue(zone, order, migratetype, alloc_flags); +- if (!page) +- goto failed; +- } +- __mod_zone_freepage_state(zone, -(1 << order), +- get_pcppage_migratetype(page)); +- spin_unlock_irqrestore(&zone->lock, flags); +- } while (check_new_pages(page, order)); +- +- __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); +- zone_statistics(preferred_zone, zone, 1); ++ page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags, ++ migratetype); ++ if (unlikely(!page)) ++ return NULL; + + out: + /* Separate test+clear to avoid unnecessary atomics */ +@@ -3732,10 +3749,6 @@ struct page *rmqueue(struct zone *preferred_zone, + + VM_BUG_ON_PAGE(page && bad_range(zone, page), page); + return page; +- +-failed: +- spin_unlock_irqrestore(&zone->lock, flags); +- return NULL; + } + + #ifdef CONFIG_FAIL_PAGE_ALLOC +-- +2.43.0 + diff --git a/queue-5.15/mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch b/queue-5.15/mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch new file mode 100644 index 00000000000..abc62b88a43 --- /dev/null +++ b/queue-5.15/mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch @@ -0,0 +1,55 @@ +From a0507ad4ef8d62903a10c963660969828cc572d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Jan 2023 11:12:13 +0000 +Subject: mm/page_alloc: treat RT tasks similar to __GFP_HIGH + +From: Mel Gorman + +[ Upstream commit c988dcbecf3fd5430921eaa3fe9054754f76d185 ] + +RT tasks are allowed to dip below the min reserve but ALLOC_HARDER is +typically combined with ALLOC_MIN_RESERVE so RT tasks are a little +unusual. While there is some justification for allowing RT tasks access +to memory reserves, there is a strong chance that a RT task that is also +under memory pressure is at risk of missing deadlines anyway. Relax how +much reserves an RT task can access by treating it the same as __GFP_HIGH +allocations. + +Note that in a future kernel release that the RT special casing will be +removed. Hard realtime tasks should be locking down resources in advance +and ensuring enough memory is available. Even a soft-realtime task like +audio or video live decoding which cannot jitter should be allocating both +memory and any disk space required up-front before the recording starts +instead of relying on reserves. At best, reserve access will only delay +the problem by a very short interval. + +Link: https://lkml.kernel.org/r/20230113111217.14134-3-mgorman@techsingularity.net +Signed-off-by: Mel Gorman +Acked-by: Vlastimil Babka +Acked-by: Michal Hocko +Cc: Matthew Wilcox +Cc: NeilBrown +Cc: Thierry Reding +Signed-off-by: Andrew Morton +Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves") +Signed-off-by: Sasha Levin +--- + mm/page_alloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 4e9e9cb98f336..72835cf4034bc 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -4725,7 +4725,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) + */ + alloc_flags &= ~ALLOC_CPUSET; + } else if (unlikely(rt_task(current)) && in_task()) +- alloc_flags |= ALLOC_HARDER; ++ alloc_flags |= ALLOC_MIN_RESERVE; + + alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags); + +-- +2.43.0 + diff --git a/queue-5.15/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch b/queue-5.15/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch new file mode 100644 index 00000000000..6165c5b0fd1 --- /dev/null +++ b/queue-5.15/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch @@ -0,0 +1,60 @@ +From fe6424c229e1c0e381b3a30dc093e7ebcc43e8ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Oct 2024 19:43:47 +0800 +Subject: ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow + +From: Edward Adam Davis + +[ Upstream commit bc0a2f3a73fcdac651fca64df39306d1e5ebe3b0 ] + +Syzbot reported a kernel BUG in ocfs2_truncate_inline. There are two +reasons for this: first, the parameter value passed is greater than +ocfs2_max_inline_data_with_xattr, second, the start and end parameters of +ocfs2_truncate_inline are "unsigned int". + +So, we need to add a sanity check for byte_start and byte_len right before +ocfs2_truncate_inline() in ocfs2_remove_inode_range(), if they are greater +than ocfs2_max_inline_data_with_xattr return -EINVAL. + +Link: https://lkml.kernel.org/r/tencent_D48DB5122ADDAEDDD11918CFB68D93258C07@qq.com +Fixes: 1afc32b95233 ("ocfs2: Write support for inline data") +Signed-off-by: Edward Adam Davis +Reported-by: syzbot+81092778aac03460d6b7@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=81092778aac03460d6b7 +Reviewed-by: Joseph Qi +Cc: Joel Becker +Cc: Joseph Qi +Cc: Mark Fasheh +Cc: Junxiao Bi +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/ocfs2/file.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c +index 403c71a485c7c..fc1e929ae0381 100644 +--- a/fs/ocfs2/file.c ++++ b/fs/ocfs2/file.c +@@ -1787,6 +1787,14 @@ int ocfs2_remove_inode_range(struct inode *inode, + return 0; + + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { ++ int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di); ++ ++ if (byte_start > id_count || byte_start + byte_len > id_count) { ++ ret = -EINVAL; ++ mlog_errno(ret); ++ goto out; ++ } ++ + ret = ocfs2_truncate_inline(inode, di_bh, byte_start, + byte_start + byte_len, 0); + if (ret) { +-- +2.43.0 + diff --git a/queue-5.15/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch b/queue-5.15/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch new file mode 100644 index 00000000000..c135605ea76 --- /dev/null +++ b/queue-5.15/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch @@ -0,0 +1,48 @@ +From 793a7bb0be6deacd2f96f42fb3d3698eca37b010 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 29 Sep 2024 16:02:33 +0200 +Subject: riscv: efi: Set NX compat flag in PE/COFF header + +From: Heinrich Schuchardt + +[ Upstream commit d41373a4b910961df5a5e3527d7bde6ad45ca438 ] + +The IMAGE_DLLCHARACTERISTICS_NX_COMPAT informs the firmware that the +EFI binary does not rely on pages that are both executable and +writable. + +The flag is used by some distro versions of GRUB to decide if the EFI +binary may be executed. + +As the Linux kernel neither has RWX sections nor needs RWX pages for +relocation we should set the flag. + +Cc: Ard Biesheuvel +Cc: +Signed-off-by: Heinrich Schuchardt +Reviewed-by: Emil Renner Berthing +Fixes: cb7d2dd5612a ("RISC-V: Add PE/COFF header for EFI stub") +Acked-by: Ard Biesheuvel +Link: https://lore.kernel.org/r/20240929140233.211800-1-heinrich.schuchardt@canonical.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/efi-header.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S +index 8e733aa48ba6c..c306f3a6a800e 100644 +--- a/arch/riscv/kernel/efi-header.S ++++ b/arch/riscv/kernel/efi-header.S +@@ -59,7 +59,7 @@ extra_header_fields: + .long efi_header_end - _start // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem +- .short 0 // DllCharacteristics ++ .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve +-- +2.43.0 + diff --git a/queue-5.15/riscv-remove-duplicated-get_rm.patch b/queue-5.15/riscv-remove-duplicated-get_rm.patch new file mode 100644 index 00000000000..f546236c2f8 --- /dev/null +++ b/queue-5.15/riscv-remove-duplicated-get_rm.patch @@ -0,0 +1,38 @@ +From 62bbec0f2fec66bfe961fb0cd642088cd4796c1c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Oct 2024 17:41:39 +0800 +Subject: riscv: Remove duplicated GET_RM + +From: Chunyan Zhang + +[ Upstream commit 164f66de6bb6ef454893f193c898dc8f1da6d18b ] + +The macro GET_RM defined twice in this file, one can be removed. + +Reviewed-by: Alexandre Ghiti +Signed-off-by: Chunyan Zhang +Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20241008094141.549248-3-zhangchunyan@iscas.ac.cn +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/traps_misaligned.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c +index b246c3dc69930..d548d6992d988 100644 +--- a/arch/riscv/kernel/traps_misaligned.c ++++ b/arch/riscv/kernel/traps_misaligned.c +@@ -131,8 +131,6 @@ + #define REG_PTR(insn, pos, regs) \ + (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) + +-#define GET_RM(insn) (((insn) >> 12) & 7) +- + #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) + #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) + #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) +-- +2.43.0 + diff --git a/queue-5.15/riscv-remove-unused-generating_asm_offsets.patch b/queue-5.15/riscv-remove-unused-generating_asm_offsets.patch new file mode 100644 index 00000000000..d737d28d0c9 --- /dev/null +++ b/queue-5.15/riscv-remove-unused-generating_asm_offsets.patch @@ -0,0 +1,44 @@ +From 447c6d669f5bcd209d458ade7941840d3b37714c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Oct 2024 17:41:38 +0800 +Subject: riscv: Remove unused GENERATING_ASM_OFFSETS + +From: Chunyan Zhang + +[ Upstream commit 46d4e5ac6f2f801f97bcd0ec82365969197dc9b1 ] + +The macro is not used in the current version of kernel, it looks like +can be removed to avoid a build warning: + +../arch/riscv/kernel/asm-offsets.c: At top level: +../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" is not used [-Wunused-macros] + 7 | #define GENERATING_ASM_OFFSETS + +Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()") +Cc: stable@vger.kernel.org +Reviewed-by: Alexandre Ghiti +Tested-by: Alexandre Ghiti +Signed-off-by: Chunyan Zhang +Link: https://lore.kernel.org/r/20241008094141.549248-2-zhangchunyan@iscas.ac.cn +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/asm-offsets.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c +index 90f8ce64fa6f1..0b6064fec9e07 100644 +--- a/arch/riscv/kernel/asm-offsets.c ++++ b/arch/riscv/kernel/asm-offsets.c +@@ -4,8 +4,6 @@ + * Copyright (C) 2017 SiFive + */ + +-#define GENERATING_ASM_OFFSETS +- + #include + #include + #include +-- +2.43.0 + diff --git a/queue-5.15/riscv-use-u-to-format-the-output-of-cpu.patch b/queue-5.15/riscv-use-u-to-format-the-output-of-cpu.patch new file mode 100644 index 00000000000..00fbf87f644 --- /dev/null +++ b/queue-5.15/riscv-use-u-to-format-the-output-of-cpu.patch @@ -0,0 +1,43 @@ +From 5998836eb1a921346ceb0364c5a3734dde769cec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 11:20:10 +0800 +Subject: riscv: Use '%u' to format the output of 'cpu' + +From: WangYuli + +[ Upstream commit e0872ab72630dada3ae055bfa410bf463ff1d1e0 ] + +'cpu' is an unsigned integer, so its conversion specifier should +be %u, not %d. + +Suggested-by: Wentao Guan +Suggested-by: Maciej W. Rozycki +Link: https://lore.kernel.org/all/alpine.DEB.2.21.2409122309090.40372@angie.orcam.me.uk/ +Signed-off-by: WangYuli +Reviewed-by: Charlie Jenkins +Tested-by: Charlie Jenkins +Fixes: f1e58583b9c7 ("RISC-V: Support cpu hotplug") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/4C127DEECDA287C8+20241017032010.96772-1-wangyuli@uniontech.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/cpu-hotplug.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c +index 66ddfba1cfbef..28a3fa6e67d79 100644 +--- a/arch/riscv/kernel/cpu-hotplug.c ++++ b/arch/riscv/kernel/cpu-hotplug.c +@@ -71,7 +71,7 @@ void __cpu_die(unsigned int cpu) + if (cpu_ops[cpu]->cpu_is_stopped) + ret = cpu_ops[cpu]->cpu_is_stopped(cpu); + if (ret) +- pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); ++ pr_warn("CPU%u may not have stopped: %d\n", cpu, ret); + } + + /* +-- +2.43.0 + diff --git a/queue-5.15/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch b/queue-5.15/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch new file mode 100644 index 00000000000..0cc8314afb9 --- /dev/null +++ b/queue-5.15/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch @@ -0,0 +1,40 @@ +From 5c32140f24b70bc6811d7f5322075dd791a169c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Oct 2024 10:36:24 +0200 +Subject: riscv: vdso: Prevent the compiler from inserting calls to memset() + +From: Alexandre Ghiti + +[ Upstream commit bf40167d54d55d4b54d0103713d86a8638fb9290 ] + +The compiler is smart enough to insert a call to memset() in +riscv_vdso_get_cpus(), which generates a dynamic relocation. + +So prevent this by using -fno-builtin option. + +Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API") +Cc: stable@vger.kernel.org +Signed-off-by: Alexandre Ghiti +Reviewed-by: Guo Ren +Link: https://lore.kernel.org/r/20241016083625.136311-2-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/vdso/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile +index 06e6b27f3bcc9..c1b68f962bada 100644 +--- a/arch/riscv/kernel/vdso/Makefile ++++ b/arch/riscv/kernel/vdso/Makefile +@@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o + + ccflags-y := -fno-stack-protector + ccflags-y += -DDISABLE_BRANCH_PROFILING ++ccflags-y += -fno-builtin + + ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) +-- +2.43.0 + diff --git a/queue-5.15/series b/queue-5.15/series index 4cd1c1a3cdc..40ff12f8a96 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -48,3 +48,18 @@ staging-iio-frequency-ad9832-fix-division-by-zero-in-ad9832_calc_freqreg.patch iio-adc-ad7124-fix-division-by-zero-in-ad7124_set_channel_odr.patch iio-light-veml6030-fix-microlux-value-calculation.patch nilfs2-fix-potential-deadlock-with-newly-created-symlinks.patch +riscv-vdso-prevent-the-compiler-from-inserting-calls.patch +riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch +riscv-use-u-to-format-the-output-of-cpu.patch +riscv-remove-unused-generating_asm_offsets.patch +riscv-remove-duplicated-get_rm.patch +mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch +mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch +mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch +mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch +mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch +mm-page_alloc-explicitly-record-high-order-atomic-al.patch +mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch +mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch +mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch +ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch