--- /dev/null
+From 8a6a5f3d06c606cfc62400eac84532a110cd2844 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Mar 2022 14:43:57 -0700
+Subject: mm/page_alloc: call check_new_pages() while zone spinlock is not held
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3313204c8ad553cf93f1ee8cc89456c73a7df938 ]
+
+For high order pages not using pcp, rmqueue() is currently calling the
+costly check_new_pages() while zone spinlock is held, and hard irqs
+masked.
+
+This is not needed, we can release the spinlock sooner to reduce zone
+spinlock contention.
+
+Note that after this patch, we call __mod_zone_freepage_state() before
+deciding to leak the page because it is in bad state.
+
+Link: https://lkml.kernel.org/r/20220304170215.1868106-1-eric.dumazet@gmail.com
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Wei Xu <weixugc@google.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 264efa022fa96..474150584ba48 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3698,10 +3698,10 @@ struct page *rmqueue(struct zone *preferred_zone,
+ * allocate greater than order-1 page units with __GFP_NOFAIL.
+ */
+ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+- spin_lock_irqsave(&zone->lock, flags);
+
+ do {
+ page = NULL;
++ spin_lock_irqsave(&zone->lock, flags);
+ /*
+ * order-0 request can reach here when the pcplist is skipped
+ * due to non-CMA allocation context. HIGHATOMIC area is
+@@ -3713,15 +3713,15 @@ struct page *rmqueue(struct zone *preferred_zone,
+ if (page)
+ trace_mm_page_alloc_zone_locked(page, order, migratetype);
+ }
+- if (!page)
++ if (!page) {
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
+- } while (page && check_new_pages(page, order));
+- if (!page)
+- goto failed;
+-
+- __mod_zone_freepage_state(zone, -(1 << order),
+- get_pcppage_migratetype(page));
+- spin_unlock_irqrestore(&zone->lock, flags);
++ if (!page)
++ goto failed;
++ }
++ __mod_zone_freepage_state(zone, -(1 << order),
++ get_pcppage_migratetype(page));
++ spin_unlock_irqrestore(&zone->lock, flags);
++ } while (check_new_pages(page, order));
+
+ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+ zone_statistics(preferred_zone, zone, 1);
+--
+2.43.0
+
--- /dev/null
+From bda5b38029b455b098258a2defce2740d878718c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:16 +0000
+Subject: mm/page_alloc: explicitly define how __GFP_HIGH non-blocking
+ allocations accesses reserves
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 1ebbb21811b76c3b932959787f37985af36f62fa ]
+
+GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague
+description. In preparation for the removal of GFP_ATOMIC redefine
+__GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to
+ALLOC_NON_BLOCK accordingly. __GFP_HIGH is required for access to
+reserves but non-blocking is granted more access. For example, GFP_NOWAIT
+is non-blocking but has no special access to reserves. A __GFP_NOFAIL
+blocking allocation is granted access similar to __GFP_HIGH if the only
+alternative is an OOM kill.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-6-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 7 +++++--
+ mm/page_alloc.c | 44 ++++++++++++++++++++++++--------------------
+ 2 files changed, 29 insertions(+), 22 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index 717e75313693c..cd444aa7a10af 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -592,7 +592,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_OOM ALLOC_NO_WATERMARKS
+ #endif
+
+-#define ALLOC_HARDER 0x10 /* try to alloc harder */
++#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access
++ * to 25% of the min watermark or
++ * 62.5% if __GFP_HIGH is set.
++ */
+ #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
+ * of the min watermark.
+ */
+@@ -607,7 +610,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+
+ /* Flags that allow allocations below the min watermark. */
+-#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
++#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
+
+ enum ttu_flags;
+ struct tlbflush_unmap_batch;
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 7778c2b11d8cb..404cee30dcc26 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3878,18 +3878,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ * __GFP_HIGH allows access to 50% of the min reserve as well
+ * as OOM.
+ */
+- if (alloc_flags & ALLOC_MIN_RESERVE)
++ if (alloc_flags & ALLOC_MIN_RESERVE) {
+ min -= min / 2;
+
+- /*
+- * Non-blocking allocations can access some of the reserve
+- * with more access if also __GFP_HIGH. The reasoning is that
+- * a non-blocking caller may incur a more severe penalty
+- * if it cannot get memory quickly, particularly if it's
+- * also __GFP_HIGH.
+- */
+- if (alloc_flags & ALLOC_HARDER)
+- min -= min / 4;
++ /*
++ * Non-blocking allocations (e.g. GFP_ATOMIC) can
++ * access more reserves than just __GFP_HIGH. Other
++ * non-blocking allocations requests such as GFP_NOWAIT
++ * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get
++ * access to the min reserve.
++ */
++ if (alloc_flags & ALLOC_NON_BLOCK)
++ min -= min / 4;
++ }
+
+ /*
+ * OOM victims can try even harder than the normal reserve
+@@ -4729,28 +4730,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
+ * The caller may dip into page reserves a bit more if the caller
+ * cannot run direct reclaim, or if the caller has realtime scheduling
+ * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
+- * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
++ * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH).
+ */
+ alloc_flags |= (__force int)
+ (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
+
+- if (gfp_mask & __GFP_ATOMIC) {
++ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
+ /*
+ * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+ * if it can't schedule.
+ */
+ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+- alloc_flags |= ALLOC_HARDER;
++ alloc_flags |= ALLOC_NON_BLOCK;
+
+ if (order > 0)
+ alloc_flags |= ALLOC_HIGHATOMIC;
+ }
+
+ /*
+- * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+- * comment for __cpuset_node_allowed().
++ * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
++ * GFP_ATOMIC) rather than fail, see the comment for
++ * __cpuset_node_allowed().
+ */
+- alloc_flags &= ~ALLOC_CPUSET;
++ if (alloc_flags & ALLOC_MIN_RESERVE)
++ alloc_flags &= ~ALLOC_CPUSET;
+ } else if (unlikely(rt_task(current)) && in_task())
+ alloc_flags |= ALLOC_MIN_RESERVE;
+
+@@ -5188,12 +5191,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
+
+ /*
+- * Help non-failing allocations by giving them access to memory
+- * reserves but do not use ALLOC_NO_WATERMARKS because this
++ * Help non-failing allocations by giving some access to memory
++ * reserves normally used for high priority non-blocking
++ * allocations but do not use ALLOC_NO_WATERMARKS because this
+ * could deplete whole memory reserves which would just make
+- * the situation worse
++ * the situation worse.
+ */
+- page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
++ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
+ if (page)
+ goto got_pg;
+
+--
+2.43.0
+
--- /dev/null
+From 30e94d83752cef0e1a750f02655ac56ec3dec6f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:15 +0000
+Subject: mm/page_alloc: explicitly define what alloc flags deplete min
+ reserves
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit ab3508854353793cd35e348fde89a5c09b2fd8b5 ]
+
+As there are more ALLOC_ flags that affect reserves, define what flags
+affect reserves and clarify the effect of each flag.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-5-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 3 +++
+ mm/page_alloc.c | 34 ++++++++++++++++++++++------------
+ 2 files changed, 25 insertions(+), 12 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index 136f435e0f1ab..717e75313693c 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -606,6 +606,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
+ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+
++/* Flags that allow allocations below the min watermark. */
++#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
++
+ enum ttu_flags;
+ struct tlbflush_unmap_batch;
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 43122de999c4c..7778c2b11d8cb 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3838,15 +3838,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
+ static inline long __zone_watermark_unusable_free(struct zone *z,
+ unsigned int order, unsigned int alloc_flags)
+ {
+- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
+ long unusable_free = (1 << order) - 1;
+
+ /*
+- * If the caller does not have rights to ALLOC_HARDER then subtract
+- * the high-atomic reserves. This will over-estimate the size of the
+- * atomic reserve but it avoids a search.
++ * If the caller does not have rights to reserves below the min
++ * watermark then subtract the high-atomic reserves. This will
++ * over-estimate the size of the atomic reserve but it avoids a search.
+ */
+- if (likely(!alloc_harder))
++ if (likely(!(alloc_flags & ALLOC_RESERVES)))
+ unusable_free += z->nr_reserved_highatomic;
+
+ #ifdef CONFIG_CMA
+@@ -3870,25 +3869,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ {
+ long min = mark;
+ int o;
+- const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
+
+ /* free_pages may go negative - that's OK */
+ free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
+
+- if (alloc_flags & ALLOC_MIN_RESERVE)
+- min -= min / 2;
++ if (unlikely(alloc_flags & ALLOC_RESERVES)) {
++ /*
++ * __GFP_HIGH allows access to 50% of the min reserve as well
++ * as OOM.
++ */
++ if (alloc_flags & ALLOC_MIN_RESERVE)
++ min -= min / 2;
+
+- if (unlikely(alloc_harder)) {
+ /*
+- * OOM victims can try even harder than normal ALLOC_HARDER
++ * Non-blocking allocations can access some of the reserve
++ * with more access if also __GFP_HIGH. The reasoning is that
++ * a non-blocking caller may incur a more severe penalty
++ * if it cannot get memory quickly, particularly if it's
++ * also __GFP_HIGH.
++ */
++ if (alloc_flags & ALLOC_HARDER)
++ min -= min / 4;
++
++ /*
++ * OOM victims can try even harder than the normal reserve
+ * users on the grounds that it's definitely going to be in
+ * the exit path shortly and free memory. Any allocation it
+ * makes during the free path will be small and short-lived.
+ */
+ if (alloc_flags & ALLOC_OOM)
+ min -= min / 2;
+- else
+- min -= min / 4;
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 033cc55389ad1443530b5b69b70d07760aee696e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:14 +0000
+Subject: mm/page_alloc: explicitly record high-order atomic allocations in
+ alloc_flags
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit eb2e2b425c6984ca8034448a3f2c680622bd3d4d ]
+
+A high-order ALLOC_HARDER allocation is assumed to be atomic. While that
+is accurate, it changes later in the series. In preparation, explicitly
+record high-order atomic allocations in gfp_to_alloc_flags().
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-4-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 1 +
+ mm/page_alloc.c | 29 +++++++++++++++++++++++------
+ 2 files changed, 24 insertions(+), 6 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index e6c96327b5855..136f435e0f1ab 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -603,6 +603,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #else
+ #define ALLOC_NOFRAGMENT 0x0
+ #endif
++#define ALLOC_HIGHATOMIC 0x200 /* Allows access to MIGRATE_HIGHATOMIC */
+ #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+
+ enum ttu_flags;
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 72835cf4034bc..43122de999c4c 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3614,10 +3614,20 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
+ * reserved for high-order atomic allocation, so order-0
+ * request should skip it.
+ */
+- if (order > 0 && alloc_flags & ALLOC_HARDER)
++ if (alloc_flags & ALLOC_HIGHATOMIC)
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+ if (!page) {
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
++
++ /*
++ * If the allocation fails, allow OOM handling access
++ * to HIGHATOMIC reserves as failing now is worse than
++ * failing a high-order atomic allocation in the
++ * future.
++ */
++ if (!page && (alloc_flags & ALLOC_OOM))
++ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
++
+ if (!page) {
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return NULL;
+@@ -3912,8 +3922,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ return true;
+ }
+ #endif
+- if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC))
++ if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) &&
++ !free_area_empty(area, MIGRATE_HIGHATOMIC)) {
+ return true;
++ }
+ }
+ return false;
+ }
+@@ -4172,7 +4184,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
+ * If this is a high-order atomic allocation then check
+ * if the pageblock should be reserved for the future
+ */
+- if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
++ if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+ reserve_highatomic_pageblock(page, zone, order);
+
+ return page;
+@@ -4691,7 +4703,7 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
+ }
+
+ static inline unsigned int
+-gfp_to_alloc_flags(gfp_t gfp_mask)
++gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
+ {
+ unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+
+@@ -4717,8 +4729,13 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+ * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+ * if it can't schedule.
+ */
+- if (!(gfp_mask & __GFP_NOMEMALLOC))
++ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+ alloc_flags |= ALLOC_HARDER;
++
++ if (order > 0)
++ alloc_flags |= ALLOC_HIGHATOMIC;
++ }
++
+ /*
+ * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+ * comment for __cpuset_node_allowed().
+@@ -4946,7 +4963,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+ * kswapd needs to be woken up, and to avoid the cost of setting up
+ * alloc_flags precisely. So we do that now.
+ */
+- alloc_flags = gfp_to_alloc_flags(gfp_mask);
++ alloc_flags = gfp_to_alloc_flags(gfp_mask, order);
+
+ /*
+ * We need to recalculate the starting point for the zonelist iterator
+--
+2.43.0
+
--- /dev/null
+From b8c131116b907997152c4662e209487719e4d4ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 May 2022 14:08:54 -0700
+Subject: mm/page_alloc: fix tracepoint mm_page_alloc_zone_locked()
+
+From: Wonhyuk Yang <vvghjk1234@gmail.com>
+
+[ Upstream commit 10e0f7530205799e7e971aba699a7cb3a47456de ]
+
+Currently, trace point mm_page_alloc_zone_locked() doesn't show correct
+information.
+
+First, when alloc_flag has ALLOC_HARDER/ALLOC_CMA, page can be allocated
+from MIGRATE_HIGHATOMIC/MIGRATE_CMA. Nevertheless, tracepoint use
+requested migration type not MIGRATE_HIGHATOMIC and MIGRATE_CMA.
+
+Second, after commit 44042b4498728 ("mm/page_alloc: allow high-order pages
+to be stored on the per-cpu lists") percpu-list can store high order
+pages. But trace point determine whether it is a refiil of percpu-list by
+comparing requested order and 0.
+
+To handle these problems, make mm_page_alloc_zone_locked() only be called
+by __rmqueue_smallest with correct migration type. With a new argument
+called percpu_refill, it can show roughly whether it is a refill of
+percpu-list.
+
+Link: https://lkml.kernel.org/r/20220512025307.57924-1-vvghjk1234@gmail.com
+Signed-off-by: Wonhyuk Yang <vvghjk1234@gmail.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Baik Song An <bsahn@etri.re.kr>
+Cc: Hong Yeon Kim <kimhy@etri.re.kr>
+Cc: Taeung Song <taeung@reallinux.co.kr>
+Cc: <linuxgeek@linuxgeek.io>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/kmem.h | 14 +++++++++-----
+ mm/page_alloc.c | 13 +++++--------
+ 2 files changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
+index ddc8c944f417a..f89fb3afcd46a 100644
+--- a/include/trace/events/kmem.h
++++ b/include/trace/events/kmem.h
+@@ -229,20 +229,23 @@ TRACE_EVENT(mm_page_alloc,
+
+ DECLARE_EVENT_CLASS(mm_page,
+
+- TP_PROTO(struct page *page, unsigned int order, int migratetype),
++ TP_PROTO(struct page *page, unsigned int order, int migratetype,
++ int percpu_refill),
+
+- TP_ARGS(page, order, migratetype),
++ TP_ARGS(page, order, migratetype, percpu_refill),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, pfn )
+ __field( unsigned int, order )
+ __field( int, migratetype )
++ __field( int, percpu_refill )
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = page ? page_to_pfn(page) : -1UL;
+ __entry->order = order;
+ __entry->migratetype = migratetype;
++ __entry->percpu_refill = percpu_refill;
+ ),
+
+ TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d",
+@@ -250,14 +253,15 @@ DECLARE_EVENT_CLASS(mm_page,
+ __entry->pfn != -1UL ? __entry->pfn : 0,
+ __entry->order,
+ __entry->migratetype,
+- __entry->order == 0)
++ __entry->percpu_refill)
+ );
+
+ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
+
+- TP_PROTO(struct page *page, unsigned int order, int migratetype),
++ TP_PROTO(struct page *page, unsigned int order, int migratetype,
++ int percpu_refill),
+
+- TP_ARGS(page, order, migratetype)
++ TP_ARGS(page, order, migratetype, percpu_refill)
+ );
+
+ TRACE_EVENT(mm_page_pcpu_drain,
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 474150584ba48..264cb1914ab5b 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2461,6 +2461,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
+ del_page_from_free_list(page, zone, current_order);
+ expand(zone, page, order, current_order, migratetype);
+ set_pcppage_migratetype(page, migratetype);
++ trace_mm_page_alloc_zone_locked(page, order, migratetype,
++ pcp_allowed_order(order) &&
++ migratetype < MIGRATE_PCPTYPES);
+ return page;
+ }
+
+@@ -2988,7 +2991,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
+ zone_page_state(zone, NR_FREE_PAGES) / 2) {
+ page = __rmqueue_cma_fallback(zone, order);
+ if (page)
+- goto out;
++ return page;
+ }
+ }
+ retry:
+@@ -3001,9 +3004,6 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
+ alloc_flags))
+ goto retry;
+ }
+-out:
+- if (page)
+- trace_mm_page_alloc_zone_locked(page, order, migratetype);
+ return page;
+ }
+
+@@ -3708,11 +3708,8 @@ struct page *rmqueue(struct zone *preferred_zone,
+ * reserved for high-order atomic allocation, so order-0
+ * request should skip it.
+ */
+- if (order > 0 && alloc_flags & ALLOC_HARDER) {
++ if (order > 0 && alloc_flags & ALLOC_HARDER)
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+- if (page)
+- trace_mm_page_alloc_zone_locked(page, order, migratetype);
+- }
+ if (!page) {
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
+ if (!page)
+--
+2.43.0
+
--- /dev/null
+From 685658478127e0b3fa3ea017a891f5f30f55011b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Oct 2024 13:07:37 +0100
+Subject: mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic
+ reserves
+
+From: Matt Fleming <mfleming@cloudflare.com>
+
+[ Upstream commit 281dd25c1a018261a04d1b8bf41a0674000bfe38 ]
+
+Under memory pressure it's possible for GFP_ATOMIC order-0 allocations to
+fail even though free pages are available in the highatomic reserves.
+GFP_ATOMIC allocations cannot trigger unreserve_highatomic_pageblock()
+since it's only run from reclaim.
+
+Given that such allocations will pass the watermarks in
+__zone_watermark_unusable_free(), it makes sense to fallback to highatomic
+reserves the same way that ALLOC_OOM can.
+
+This fixes order-0 page allocation failures observed on Cloudflare's fleet
+when handling network packets:
+
+ kswapd1: page allocation failure: order:0, mode:0x820(GFP_ATOMIC),
+ nodemask=(null),cpuset=/,mems_allowed=0-7
+ CPU: 10 PID: 696 Comm: kswapd1 Kdump: loaded Tainted: G O 6.6.43-CUSTOM #1
+ Hardware name: MACHINE
+ Call Trace:
+ <IRQ>
+ dump_stack_lvl+0x3c/0x50
+ warn_alloc+0x13a/0x1c0
+ __alloc_pages_slowpath.constprop.0+0xc9d/0xd10
+ __alloc_pages+0x327/0x340
+ __napi_alloc_skb+0x16d/0x1f0
+ bnxt_rx_page_skb+0x96/0x1b0 [bnxt_en]
+ bnxt_rx_pkt+0x201/0x15e0 [bnxt_en]
+ __bnxt_poll_work+0x156/0x2b0 [bnxt_en]
+ bnxt_poll+0xd9/0x1c0 [bnxt_en]
+ __napi_poll+0x2b/0x1b0
+ bpf_trampoline_6442524138+0x7d/0x1000
+ __napi_poll+0x5/0x1b0
+ net_rx_action+0x342/0x740
+ handle_softirqs+0xcf/0x2b0
+ irq_exit_rcu+0x6c/0x90
+ sysvec_apic_timer_interrupt+0x72/0x90
+ </IRQ>
+
+[mfleming@cloudflare.com: update comment]
+ Link: https://lkml.kernel.org/r/20241015125158.3597702-1-matt@readmodwrite.com
+Link: https://lkml.kernel.org/r/20241011120737.3300370-1-matt@readmodwrite.com
+Link: https://lore.kernel.org/all/CAGis_TWzSu=P7QJmjD58WWiu3zjMTVKSzdOwWE8ORaGytzWJwQ@mail.gmail.com/
+Fixes: 1d91df85f399 ("mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs")
+Signed-off-by: Matt Fleming <mfleming@cloudflare.com>
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 404cee30dcc26..6a64a75184888 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3620,12 +3620,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
+
+ /*
+- * If the allocation fails, allow OOM handling access
+- * to HIGHATOMIC reserves as failing now is worse than
+- * failing a high-order atomic allocation in the
+- * future.
++ * If the allocation fails, allow OOM handling and
++ * order-0 (atomic) allocs access to HIGHATOMIC
++ * reserves as failing now is worse than failing a
++ * high-order atomic allocation in the future.
+ */
+- if (!page && (alloc_flags & ALLOC_OOM))
++ if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
+ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+
+ if (!page) {
+--
+2.43.0
+
--- /dev/null
+From 1331502c1782067490c126cbac49572ca69cd467 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:12 +0000
+Subject: mm/page_alloc: rename ALLOC_HIGH to ALLOC_MIN_RESERVE
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 524c48072e5673f4511f1ad81493e2485863fd65 ]
+
+Patch series "Discard __GFP_ATOMIC", v3.
+
+Neil's patch has been residing in mm-unstable as commit 2fafb4fe8f7a ("mm:
+discard __GFP_ATOMIC") for a long time and recently brought up again.
+Most recently, I was worried that __GFP_HIGH allocations could use
+high-order atomic reserves which is unintentional but there was no
+response so lets revisit -- this series reworks how min reserves are used,
+protects highorder reserves and then finishes with Neil's patch with very
+minor modifications so it fits on top.
+
+There was a review discussion on renaming __GFP_DIRECT_RECLAIM to
+__GFP_ALLOW_BLOCKING but I didn't think it was that big an issue and is
+orthogonal to the removal of __GFP_ATOMIC.
+
+There were some concerns about how the gfp flags affect the min reserves
+but it never reached a solid conclusion so I made my own attempt.
+
+The series tries to iron out some of the details on how reserves are used.
+ALLOC_HIGH becomes ALLOC_MIN_RESERVE and ALLOC_HARDER becomes
+ALLOC_NON_BLOCK and documents how the reserves are affected. For example,
+ALLOC_NON_BLOCK (no direct reclaim) on its own allows 25% of the min
+reserve. ALLOC_MIN_RESERVE (__GFP_HIGH) allows 50% and both combined
+allows deeper access again. ALLOC_OOM allows access to 75%.
+
+High-order atomic allocations are explicitly handled with the caveat that
+no __GFP_ATOMIC flag means that any high-order allocation that specifies
+GFP_HIGH and cannot enter direct reclaim will be treated as if it was
+GFP_ATOMIC.
+
+This patch (of 6):
+
+__GFP_HIGH aliases to ALLOC_HIGH but the name does not really hint what it
+means. As ALLOC_HIGH is internal to the allocator, rename it to
+ALLOC_MIN_RESERVE to document that the min reserves can be depleted.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-1-mgorman@techsingularity.net
+Link: https://lkml.kernel.org/r/20230113111217.14134-2-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h | 4 +++-
+ mm/page_alloc.c | 8 ++++----
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index cf3cb933eba3f..e6c96327b5855 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -593,7 +593,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #endif
+
+ #define ALLOC_HARDER 0x10 /* try to alloc harder */
+-#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
++#define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
++ * of the min watermark.
++ */
+ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
+ #ifdef CONFIG_ZONE_DMA32
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index ae628574dc9fc..4e9e9cb98f336 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3865,7 +3865,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ /* free_pages may go negative - that's OK */
+ free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
+
+- if (alloc_flags & ALLOC_HIGH)
++ if (alloc_flags & ALLOC_MIN_RESERVE)
+ min -= min / 2;
+
+ if (unlikely(alloc_harder)) {
+@@ -4696,18 +4696,18 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+ unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+
+ /*
+- * __GFP_HIGH is assumed to be the same as ALLOC_HIGH
++ * __GFP_HIGH is assumed to be the same as ALLOC_MIN_RESERVE
+ * and __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD
+ * to save two branches.
+ */
+- BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
++ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_MIN_RESERVE);
+ BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD);
+
+ /*
+ * The caller may dip into page reserves a bit more if the caller
+ * cannot run direct reclaim, or if the caller has realtime scheduling
+ * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
+- * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
++ * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
+ */
+ alloc_flags |= (__force int)
+ (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
+--
+2.43.0
+
--- /dev/null
+From fc6b92e69f211e4a3637ec0d4b8a328496c067d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Jun 2022 13:54:19 +0100
+Subject: mm/page_alloc: split out buddy removal code from rmqueue into
+ separate helper
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 589d9973c1d2c3344a94a57441071340b0c71097 ]
+
+This is a preparation page to allow the buddy removal code to be reused in
+a later patch.
+
+No functional change.
+
+Link: https://lkml.kernel.org/r/20220624125423.6126-4-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Tested-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Tested-by: Yu Zhao <yuzhao@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 81 ++++++++++++++++++++++++++++---------------------
+ 1 file changed, 47 insertions(+), 34 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 264cb1914ab5b..ae628574dc9fc 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3597,6 +3597,43 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
+ #endif
+ }
+
++static __always_inline
++struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
++ unsigned int order, unsigned int alloc_flags,
++ int migratetype)
++{
++ struct page *page;
++ unsigned long flags;
++
++ do {
++ page = NULL;
++ spin_lock_irqsave(&zone->lock, flags);
++ /*
++ * order-0 request can reach here when the pcplist is skipped
++ * due to non-CMA allocation context. HIGHATOMIC area is
++ * reserved for high-order atomic allocation, so order-0
++ * request should skip it.
++ */
++ if (order > 0 && alloc_flags & ALLOC_HARDER)
++ page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
++ if (!page) {
++ page = __rmqueue(zone, order, migratetype, alloc_flags);
++ if (!page) {
++ spin_unlock_irqrestore(&zone->lock, flags);
++ return NULL;
++ }
++ }
++ __mod_zone_freepage_state(zone, -(1 << order),
++ get_pcppage_migratetype(page));
++ spin_unlock_irqrestore(&zone->lock, flags);
++ } while (check_new_pages(page, order));
++
++ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
++ zone_statistics(preferred_zone, zone, 1);
++
++ return page;
++}
++
+ /* Remove page from the per-cpu list, caller must protect the list */
+ static inline
+ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
+@@ -3677,9 +3714,14 @@ struct page *rmqueue(struct zone *preferred_zone,
+ gfp_t gfp_flags, unsigned int alloc_flags,
+ int migratetype)
+ {
+- unsigned long flags;
+ struct page *page;
+
++ /*
++ * We most definitely don't want callers attempting to
++ * allocate greater than order-1 page units with __GFP_NOFAIL.
++ */
++ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
++
+ if (likely(pcp_allowed_order(order))) {
+ /*
+ * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
+@@ -3693,35 +3735,10 @@ struct page *rmqueue(struct zone *preferred_zone,
+ }
+ }
+
+- /*
+- * We most definitely don't want callers attempting to
+- * allocate greater than order-1 page units with __GFP_NOFAIL.
+- */
+- WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+-
+- do {
+- page = NULL;
+- spin_lock_irqsave(&zone->lock, flags);
+- /*
+- * order-0 request can reach here when the pcplist is skipped
+- * due to non-CMA allocation context. HIGHATOMIC area is
+- * reserved for high-order atomic allocation, so order-0
+- * request should skip it.
+- */
+- if (order > 0 && alloc_flags & ALLOC_HARDER)
+- page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+- if (!page) {
+- page = __rmqueue(zone, order, migratetype, alloc_flags);
+- if (!page)
+- goto failed;
+- }
+- __mod_zone_freepage_state(zone, -(1 << order),
+- get_pcppage_migratetype(page));
+- spin_unlock_irqrestore(&zone->lock, flags);
+- } while (check_new_pages(page, order));
+-
+- __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+- zone_statistics(preferred_zone, zone, 1);
++ page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
++ migratetype);
++ if (unlikely(!page))
++ return NULL;
+
+ out:
+ /* Separate test+clear to avoid unnecessary atomics */
+@@ -3732,10 +3749,6 @@ struct page *rmqueue(struct zone *preferred_zone,
+
+ VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
+ return page;
+-
+-failed:
+- spin_unlock_irqrestore(&zone->lock, flags);
+- return NULL;
+ }
+
+ #ifdef CONFIG_FAIL_PAGE_ALLOC
+--
+2.43.0
+
--- /dev/null
+From a0507ad4ef8d62903a10c963660969828cc572d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:13 +0000
+Subject: mm/page_alloc: treat RT tasks similar to __GFP_HIGH
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit c988dcbecf3fd5430921eaa3fe9054754f76d185 ]
+
+RT tasks are allowed to dip below the min reserve but ALLOC_HARDER is
+typically combined with ALLOC_MIN_RESERVE so RT tasks are a little
+unusual. While there is some justification for allowing RT tasks access
+to memory reserves, there is a strong chance that a RT task that is also
+under memory pressure is at risk of missing deadlines anyway. Relax how
+much reserves an RT task can access by treating it the same as __GFP_HIGH
+allocations.
+
+Note that in a future kernel release that the RT special casing will be
+removed. Hard realtime tasks should be locking down resources in advance
+and ensuring enough memory is available. Even a soft-realtime task like
+audio or video live decoding which cannot jitter should be allocating both
+memory and any disk space required up-front before the recording starts
+instead of relying on reserves. At best, reserve access will only delay
+the problem by a very short interval.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-3-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 4e9e9cb98f336..72835cf4034bc 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -4725,7 +4725,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+ */
+ alloc_flags &= ~ALLOC_CPUSET;
+ } else if (unlikely(rt_task(current)) && in_task())
+- alloc_flags |= ALLOC_HARDER;
++ alloc_flags |= ALLOC_MIN_RESERVE;
+
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
+
+--
+2.43.0
+
--- /dev/null
+From fe6424c229e1c0e381b3a30dc093e7ebcc43e8ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 19:43:47 +0800
+Subject: ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow
+
+From: Edward Adam Davis <eadavis@qq.com>
+
+[ Upstream commit bc0a2f3a73fcdac651fca64df39306d1e5ebe3b0 ]
+
+Syzbot reported a kernel BUG in ocfs2_truncate_inline. There are two
+reasons for this: first, the parameter value passed is greater than
+ocfs2_max_inline_data_with_xattr, second, the start and end parameters of
+ocfs2_truncate_inline are "unsigned int".
+
+So, we need to add a sanity check for byte_start and byte_len right before
+ocfs2_truncate_inline() in ocfs2_remove_inode_range(), if they are greater
+than ocfs2_max_inline_data_with_xattr return -EINVAL.
+
+Link: https://lkml.kernel.org/r/tencent_D48DB5122ADDAEDDD11918CFB68D93258C07@qq.com
+Fixes: 1afc32b95233 ("ocfs2: Write support for inline data")
+Signed-off-by: Edward Adam Davis <eadavis@qq.com>
+Reported-by: syzbot+81092778aac03460d6b7@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=81092778aac03460d6b7
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ocfs2/file.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 403c71a485c7c..fc1e929ae0381 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1787,6 +1787,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
+ return 0;
+
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
++ int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
++
++ if (byte_start > id_count || byte_start + byte_len > id_count) {
++ ret = -EINVAL;
++ mlog_errno(ret);
++ goto out;
++ }
++
+ ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
+ byte_start + byte_len, 0);
+ if (ret) {
+--
+2.43.0
+
--- /dev/null
+From 793a7bb0be6deacd2f96f42fb3d3698eca37b010 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Sep 2024 16:02:33 +0200
+Subject: riscv: efi: Set NX compat flag in PE/COFF header
+
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+
+[ Upstream commit d41373a4b910961df5a5e3527d7bde6ad45ca438 ]
+
+The IMAGE_DLLCHARACTERISTICS_NX_COMPAT informs the firmware that the
+EFI binary does not rely on pages that are both executable and
+writable.
+
+The flag is used by some distro versions of GRUB to decide if the EFI
+binary may be executed.
+
+As the Linux kernel neither has RWX sections nor needs RWX pages for
+relocation we should set the flag.
+
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
+Fixes: cb7d2dd5612a ("RISC-V: Add PE/COFF header for EFI stub")
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240929140233.211800-1-heinrich.schuchardt@canonical.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/efi-header.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
+index 8e733aa48ba6c..c306f3a6a800e 100644
+--- a/arch/riscv/kernel/efi-header.S
++++ b/arch/riscv/kernel/efi-header.S
+@@ -59,7 +59,7 @@ extra_header_fields:
+ .long efi_header_end - _start // SizeOfHeaders
+ .long 0 // CheckSum
+ .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
+- .short 0 // DllCharacteristics
++ .short IMAGE_DLL_CHARACTERISTICS_NX_COMPAT // DllCharacteristics
+ .quad 0 // SizeOfStackReserve
+ .quad 0 // SizeOfStackCommit
+ .quad 0 // SizeOfHeapReserve
+--
+2.43.0
+
--- /dev/null
+From 62bbec0f2fec66bfe961fb0cd642088cd4796c1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:39 +0800
+Subject: riscv: Remove duplicated GET_RM
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 164f66de6bb6ef454893f193c898dc8f1da6d18b ]
+
+The macro GET_RM defined twice in this file, one can be removed.
+
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241008094141.549248-3-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps_misaligned.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
+index b246c3dc69930..d548d6992d988 100644
+--- a/arch/riscv/kernel/traps_misaligned.c
++++ b/arch/riscv/kernel/traps_misaligned.c
+@@ -131,8 +131,6 @@
+ #define REG_PTR(insn, pos, regs) \
+ (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))
+
+-#define GET_RM(insn) (((insn) >> 12) & 7)
+-
+ #define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
+ #define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
+ #define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
+--
+2.43.0
+
--- /dev/null
+From 447c6d669f5bcd209d458ade7941840d3b37714c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:38 +0800
+Subject: riscv: Remove unused GENERATING_ASM_OFFSETS
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 46d4e5ac6f2f801f97bcd0ec82365969197dc9b1 ]
+
+The macro is not used in the current version of kernel, it looks like
+can be removed to avoid a build warning:
+
+../arch/riscv/kernel/asm-offsets.c: At top level:
+../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" is not used [-Wunused-macros]
+ 7 | #define GENERATING_ASM_OFFSETS
+
+Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()")
+Cc: stable@vger.kernel.org
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Link: https://lore.kernel.org/r/20241008094141.549248-2-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/asm-offsets.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index 90f8ce64fa6f1..0b6064fec9e07 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -4,8 +4,6 @@
+ * Copyright (C) 2017 SiFive
+ */
+
+-#define GENERATING_ASM_OFFSETS
+-
+ #include <linux/kbuild.h>
+ #include <linux/sched.h>
+ #include <asm/thread_info.h>
+--
+2.43.0
+
--- /dev/null
+From 5998836eb1a921346ceb0364c5a3734dde769cec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 11:20:10 +0800
+Subject: riscv: Use '%u' to format the output of 'cpu'
+
+From: WangYuli <wangyuli@uniontech.com>
+
+[ Upstream commit e0872ab72630dada3ae055bfa410bf463ff1d1e0 ]
+
+'cpu' is an unsigned integer, so its conversion specifier should
+be %u, not %d.
+
+Suggested-by: Wentao Guan <guanwentao@uniontech.com>
+Suggested-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/all/alpine.DEB.2.21.2409122309090.40372@angie.orcam.me.uk/
+Signed-off-by: WangYuli <wangyuli@uniontech.com>
+Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
+Tested-by: Charlie Jenkins <charlie@rivosinc.com>
+Fixes: f1e58583b9c7 ("RISC-V: Support cpu hotplug")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/4C127DEECDA287C8+20241017032010.96772-1-wangyuli@uniontech.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/cpu-hotplug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
+index 66ddfba1cfbef..28a3fa6e67d79 100644
+--- a/arch/riscv/kernel/cpu-hotplug.c
++++ b/arch/riscv/kernel/cpu-hotplug.c
+@@ -71,7 +71,7 @@ void __cpu_die(unsigned int cpu)
+ if (cpu_ops[cpu]->cpu_is_stopped)
+ ret = cpu_ops[cpu]->cpu_is_stopped(cpu);
+ if (ret)
+- pr_warn("CPU%d may not have stopped: %d\n", cpu, ret);
++ pr_warn("CPU%u may not have stopped: %d\n", cpu, ret);
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 5c32140f24b70bc6811d7f5322075dd791a169c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 10:36:24 +0200
+Subject: riscv: vdso: Prevent the compiler from inserting calls to memset()
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit bf40167d54d55d4b54d0103713d86a8638fb9290 ]
+
+The compiler is smart enough to insert a call to memset() in
+riscv_vdso_get_cpus(), which generates a dynamic relocation.
+
+So prevent this by using -fno-builtin option.
+
+Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API")
+Cc: stable@vger.kernel.org
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20241016083625.136311-2-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/vdso/Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
+index 06e6b27f3bcc9..c1b68f962bada 100644
+--- a/arch/riscv/kernel/vdso/Makefile
++++ b/arch/riscv/kernel/vdso/Makefile
+@@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+
+ ccflags-y := -fno-stack-protector
+ ccflags-y += -DDISABLE_BRANCH_PROFILING
++ccflags-y += -fno-builtin
+
+ ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
+--
+2.43.0
+
iio-adc-ad7124-fix-division-by-zero-in-ad7124_set_channel_odr.patch
iio-light-veml6030-fix-microlux-value-calculation.patch
nilfs2-fix-potential-deadlock-with-newly-created-symlinks.patch
+riscv-vdso-prevent-the-compiler-from-inserting-calls.patch
+riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch
+riscv-use-u-to-format-the-output-of-cpu.patch
+riscv-remove-unused-generating_asm_offsets.patch
+riscv-remove-duplicated-get_rm.patch
+mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch
+mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch
+mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch
+mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch
+mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch
+mm-page_alloc-explicitly-record-high-order-atomic-al.patch
+mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch
+mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch
+mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch
+ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch