]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Wed, 6 Nov 2024 01:54:06 +0000 (20:54 -0500)
committerSasha Levin <sashal@kernel.org>
Wed, 6 Nov 2024 01:55:03 +0000 (20:55 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
16 files changed:
queue-5.15/mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-explicitly-record-high-order-atomic-al.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch [new file with mode: 0644]
queue-5.15/mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch [new file with mode: 0644]
queue-5.15/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch [new file with mode: 0644]
queue-5.15/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch [new file with mode: 0644]
queue-5.15/riscv-remove-duplicated-get_rm.patch [new file with mode: 0644]
queue-5.15/riscv-remove-unused-generating_asm_offsets.patch [new file with mode: 0644]
queue-5.15/riscv-use-u-to-format-the-output-of-cpu.patch [new file with mode: 0644]
queue-5.15/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch b/queue-5.15/mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch
new file mode 100644 (file)
index 0000000..c7cc9cf
--- /dev/null
@@ -0,0 +1,80 @@
+From 8a6a5f3d06c606cfc62400eac84532a110cd2844 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Mar 2022 14:43:57 -0700
+Subject: mm/page_alloc: call check_new_pages() while zone spinlock is not held
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3313204c8ad553cf93f1ee8cc89456c73a7df938 ]
+
+For high order pages not using pcp, rmqueue() is currently calling the
+costly check_new_pages() while zone spinlock is held, and hard irqs
+masked.
+
+This is not needed, we can release the spinlock sooner to reduce zone
+spinlock contention.
+
+Note that after this patch, we call __mod_zone_freepage_state() before
+deciding to leak the page because it is in bad state.
+
+Link: https://lkml.kernel.org/r/20220304170215.1868106-1-eric.dumazet@gmail.com
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Shakeel Butt <shakeelb@google.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Wei Xu <weixugc@google.com>
+Cc: Greg Thelen <gthelen@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 264efa022fa96..474150584ba48 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3698,10 +3698,10 @@ struct page *rmqueue(struct zone *preferred_zone,
+        * allocate greater than order-1 page units with __GFP_NOFAIL.
+        */
+       WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+-      spin_lock_irqsave(&zone->lock, flags);
+       do {
+               page = NULL;
++              spin_lock_irqsave(&zone->lock, flags);
+               /*
+                * order-0 request can reach here when the pcplist is skipped
+                * due to non-CMA allocation context. HIGHATOMIC area is
+@@ -3713,15 +3713,15 @@ struct page *rmqueue(struct zone *preferred_zone,
+                       if (page)
+                               trace_mm_page_alloc_zone_locked(page, order, migratetype);
+               }
+-              if (!page)
++              if (!page) {
+                       page = __rmqueue(zone, order, migratetype, alloc_flags);
+-      } while (page && check_new_pages(page, order));
+-      if (!page)
+-              goto failed;
+-
+-      __mod_zone_freepage_state(zone, -(1 << order),
+-                                get_pcppage_migratetype(page));
+-      spin_unlock_irqrestore(&zone->lock, flags);
++                      if (!page)
++                              goto failed;
++              }
++              __mod_zone_freepage_state(zone, -(1 << order),
++                                        get_pcppage_migratetype(page));
++              spin_unlock_irqrestore(&zone->lock, flags);
++      } while (check_new_pages(page, order));
+       __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+       zone_statistics(preferred_zone, zone, 1);
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch b/queue-5.15/mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch
new file mode 100644 (file)
index 0000000..fd43444
--- /dev/null
@@ -0,0 +1,151 @@
+From bda5b38029b455b098258a2defce2740d878718c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:16 +0000
+Subject: mm/page_alloc: explicitly define how __GFP_HIGH non-blocking
+ allocations accesses reserves
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 1ebbb21811b76c3b932959787f37985af36f62fa ]
+
+GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague
+description.  In preparation for the removal of GFP_ATOMIC redefine
+__GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to
+ALLOC_NON_BLOCK accordingly.  __GFP_HIGH is required for access to
+reserves but non-blocking is granted more access.  For example, GFP_NOWAIT
+is non-blocking but has no special access to reserves.  A __GFP_NOFAIL
+blocking allocation is granted access similar to __GFP_HIGH if the only
+alternative is an OOM kill.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-6-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h   |  7 +++++--
+ mm/page_alloc.c | 44 ++++++++++++++++++++++++--------------------
+ 2 files changed, 29 insertions(+), 22 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index 717e75313693c..cd444aa7a10af 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -592,7 +592,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_OOM             ALLOC_NO_WATERMARKS
+ #endif
+-#define ALLOC_HARDER           0x10 /* try to alloc harder */
++#define ALLOC_NON_BLOCK                0x10 /* Caller cannot block. Allow access
++                                     * to 25% of the min watermark or
++                                     * 62.5% if __GFP_HIGH is set.
++                                     */
+ #define ALLOC_MIN_RESERVE      0x20 /* __GFP_HIGH set. Allow access to 50%
+                                      * of the min watermark.
+                                      */
+@@ -607,7 +610,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_KSWAPD          0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+ /* Flags that allow allocations below the min watermark. */
+-#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
++#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
+ enum ttu_flags;
+ struct tlbflush_unmap_batch;
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 7778c2b11d8cb..404cee30dcc26 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3878,18 +3878,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+                * __GFP_HIGH allows access to 50% of the min reserve as well
+                * as OOM.
+                */
+-              if (alloc_flags & ALLOC_MIN_RESERVE)
++              if (alloc_flags & ALLOC_MIN_RESERVE) {
+                       min -= min / 2;
+-              /*
+-               * Non-blocking allocations can access some of the reserve
+-               * with more access if also __GFP_HIGH. The reasoning is that
+-               * a non-blocking caller may incur a more severe penalty
+-               * if it cannot get memory quickly, particularly if it's
+-               * also __GFP_HIGH.
+-               */
+-              if (alloc_flags & ALLOC_HARDER)
+-                      min -= min / 4;
++                      /*
++                       * Non-blocking allocations (e.g. GFP_ATOMIC) can
++                       * access more reserves than just __GFP_HIGH. Other
++                       * non-blocking allocations requests such as GFP_NOWAIT
++                       * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get
++                       * access to the min reserve.
++                       */
++                      if (alloc_flags & ALLOC_NON_BLOCK)
++                              min -= min / 4;
++              }
+               /*
+                * OOM victims can try even harder than the normal reserve
+@@ -4729,28 +4730,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
+        * The caller may dip into page reserves a bit more if the caller
+        * cannot run direct reclaim, or if the caller has realtime scheduling
+        * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
+-       * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
++       * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH).
+        */
+       alloc_flags |= (__force int)
+               (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
+-      if (gfp_mask & __GFP_ATOMIC) {
++      if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
+               /*
+                * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+                * if it can't schedule.
+                */
+               if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+-                      alloc_flags |= ALLOC_HARDER;
++                      alloc_flags |= ALLOC_NON_BLOCK;
+                       if (order > 0)
+                               alloc_flags |= ALLOC_HIGHATOMIC;
+               }
+               /*
+-               * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+-               * comment for __cpuset_node_allowed().
++               * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
++               * GFP_ATOMIC) rather than fail, see the comment for
++               * __cpuset_node_allowed().
+                */
+-              alloc_flags &= ~ALLOC_CPUSET;
++              if (alloc_flags & ALLOC_MIN_RESERVE)
++                      alloc_flags &= ~ALLOC_CPUSET;
+       } else if (unlikely(rt_task(current)) && in_task())
+               alloc_flags |= ALLOC_MIN_RESERVE;
+@@ -5188,12 +5191,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+               WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER);
+               /*
+-               * Help non-failing allocations by giving them access to memory
+-               * reserves but do not use ALLOC_NO_WATERMARKS because this
++               * Help non-failing allocations by giving some access to memory
++               * reserves normally used for high priority non-blocking
++               * allocations but do not use ALLOC_NO_WATERMARKS because this
+                * could deplete whole memory reserves which would just make
+-               * the situation worse
++               * the situation worse.
+                */
+-              page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
++              page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
+               if (page)
+                       goto got_pg;
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch b/queue-5.15/mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch
new file mode 100644 (file)
index 0000000..2463171
--- /dev/null
@@ -0,0 +1,113 @@
+From 30e94d83752cef0e1a750f02655ac56ec3dec6f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:15 +0000
+Subject: mm/page_alloc: explicitly define what alloc flags deplete min
+ reserves
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit ab3508854353793cd35e348fde89a5c09b2fd8b5 ]
+
+As there are more ALLOC_ flags that affect reserves, define what flags
+affect reserves and clarify the effect of each flag.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-5-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h   |  3 +++
+ mm/page_alloc.c | 34 ++++++++++++++++++++++------------
+ 2 files changed, 25 insertions(+), 12 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index 136f435e0f1ab..717e75313693c 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -606,6 +606,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #define ALLOC_HIGHATOMIC      0x200 /* Allows access to MIGRATE_HIGHATOMIC */
+ #define ALLOC_KSWAPD          0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
++/* Flags that allow allocations below the min watermark. */
++#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
++
+ enum ttu_flags;
+ struct tlbflush_unmap_batch;
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 43122de999c4c..7778c2b11d8cb 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3838,15 +3838,14 @@ ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE);
+ static inline long __zone_watermark_unusable_free(struct zone *z,
+                               unsigned int order, unsigned int alloc_flags)
+ {
+-      const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
+       long unusable_free = (1 << order) - 1;
+       /*
+-       * If the caller does not have rights to ALLOC_HARDER then subtract
+-       * the high-atomic reserves. This will over-estimate the size of the
+-       * atomic reserve but it avoids a search.
++       * If the caller does not have rights to reserves below the min
++       * watermark then subtract the high-atomic reserves. This will
++       * over-estimate the size of the atomic reserve but it avoids a search.
+        */
+-      if (likely(!alloc_harder))
++      if (likely(!(alloc_flags & ALLOC_RESERVES)))
+               unusable_free += z->nr_reserved_highatomic;
+ #ifdef CONFIG_CMA
+@@ -3870,25 +3869,36 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+ {
+       long min = mark;
+       int o;
+-      const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
+       /* free_pages may go negative - that's OK */
+       free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
+-      if (alloc_flags & ALLOC_MIN_RESERVE)
+-              min -= min / 2;
++      if (unlikely(alloc_flags & ALLOC_RESERVES)) {
++              /*
++               * __GFP_HIGH allows access to 50% of the min reserve as well
++               * as OOM.
++               */
++              if (alloc_flags & ALLOC_MIN_RESERVE)
++                      min -= min / 2;
+-      if (unlikely(alloc_harder)) {
+               /*
+-               * OOM victims can try even harder than normal ALLOC_HARDER
++               * Non-blocking allocations can access some of the reserve
++               * with more access if also __GFP_HIGH. The reasoning is that
++               * a non-blocking caller may incur a more severe penalty
++               * if it cannot get memory quickly, particularly if it's
++               * also __GFP_HIGH.
++               */
++              if (alloc_flags & ALLOC_HARDER)
++                      min -= min / 4;
++
++              /*
++               * OOM victims can try even harder than the normal reserve
+                * users on the grounds that it's definitely going to be in
+                * the exit path shortly and free memory. Any allocation it
+                * makes during the free path will be small and short-lived.
+                */
+               if (alloc_flags & ALLOC_OOM)
+                       min -= min / 2;
+-              else
+-                      min -= min / 4;
+       }
+       /*
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-explicitly-record-high-order-atomic-al.patch b/queue-5.15/mm-page_alloc-explicitly-record-high-order-atomic-al.patch
new file mode 100644 (file)
index 0000000..44c45ca
--- /dev/null
@@ -0,0 +1,124 @@
+From 033cc55389ad1443530b5b69b70d07760aee696e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:14 +0000
+Subject: mm/page_alloc: explicitly record high-order atomic allocations in
+ alloc_flags
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit eb2e2b425c6984ca8034448a3f2c680622bd3d4d ]
+
+A high-order ALLOC_HARDER allocation is assumed to be atomic.  While that
+is accurate, it changes later in the series.  In preparation, explicitly
+record high-order atomic allocations in gfp_to_alloc_flags().
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-4-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h   |  1 +
+ mm/page_alloc.c | 29 +++++++++++++++++++++++------
+ 2 files changed, 24 insertions(+), 6 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index e6c96327b5855..136f435e0f1ab 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -603,6 +603,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #else
+ #define ALLOC_NOFRAGMENT        0x0
+ #endif
++#define ALLOC_HIGHATOMIC      0x200 /* Allows access to MIGRATE_HIGHATOMIC */
+ #define ALLOC_KSWAPD          0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
+ enum ttu_flags;
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 72835cf4034bc..43122de999c4c 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3614,10 +3614,20 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
+                * reserved for high-order atomic allocation, so order-0
+                * request should skip it.
+                */
+-              if (order > 0 && alloc_flags & ALLOC_HARDER)
++              if (alloc_flags & ALLOC_HIGHATOMIC)
+                       page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+               if (!page) {
+                       page = __rmqueue(zone, order, migratetype, alloc_flags);
++
++                      /*
++                       * If the allocation fails, allow OOM handling access
++                       * to HIGHATOMIC reserves as failing now is worse than
++                       * failing a high-order atomic allocation in the
++                       * future.
++                       */
++                      if (!page && (alloc_flags & ALLOC_OOM))
++                              page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
++
+                       if (!page) {
+                               spin_unlock_irqrestore(&zone->lock, flags);
+                               return NULL;
+@@ -3912,8 +3922,10 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+                       return true;
+               }
+ #endif
+-              if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC))
++              if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) &&
++                  !free_area_empty(area, MIGRATE_HIGHATOMIC)) {
+                       return true;
++              }
+       }
+       return false;
+ }
+@@ -4172,7 +4184,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
+                        * If this is a high-order atomic allocation then check
+                        * if the pageblock should be reserved for the future
+                        */
+-                      if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
++                      if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
+                               reserve_highatomic_pageblock(page, zone, order);
+                       return page;
+@@ -4691,7 +4703,7 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
+ }
+ static inline unsigned int
+-gfp_to_alloc_flags(gfp_t gfp_mask)
++gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
+ {
+       unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+@@ -4717,8 +4729,13 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+                * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
+                * if it can't schedule.
+                */
+-              if (!(gfp_mask & __GFP_NOMEMALLOC))
++              if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+                       alloc_flags |= ALLOC_HARDER;
++
++                      if (order > 0)
++                              alloc_flags |= ALLOC_HIGHATOMIC;
++              }
++
+               /*
+                * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
+                * comment for __cpuset_node_allowed().
+@@ -4946,7 +4963,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
+        * kswapd needs to be woken up, and to avoid the cost of setting up
+        * alloc_flags precisely. So we do that now.
+        */
+-      alloc_flags = gfp_to_alloc_flags(gfp_mask);
++      alloc_flags = gfp_to_alloc_flags(gfp_mask, order);
+       /*
+        * We need to recalculate the starting point for the zonelist iterator
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch b/queue-5.15/mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch
new file mode 100644 (file)
index 0000000..0c3b547
--- /dev/null
@@ -0,0 +1,141 @@
+From b8c131116b907997152c4662e209487719e4d4ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 May 2022 14:08:54 -0700
+Subject: mm/page_alloc: fix tracepoint mm_page_alloc_zone_locked()
+
+From: Wonhyuk Yang <vvghjk1234@gmail.com>
+
+[ Upstream commit 10e0f7530205799e7e971aba699a7cb3a47456de ]
+
+Currently, trace point mm_page_alloc_zone_locked() doesn't show correct
+information.
+
+First, when alloc_flag has ALLOC_HARDER/ALLOC_CMA, page can be allocated
+from MIGRATE_HIGHATOMIC/MIGRATE_CMA.  Nevertheless, tracepoint use
+requested migration type not MIGRATE_HIGHATOMIC and MIGRATE_CMA.
+
+Second, after commit 44042b4498728 ("mm/page_alloc: allow high-order pages
+to be stored on the per-cpu lists") percpu-list can store high order
+pages.  But trace point determine whether it is a refiil of percpu-list by
+comparing requested order and 0.
+
+To handle these problems, make mm_page_alloc_zone_locked() only be called
+by __rmqueue_smallest with correct migration type.  With a new argument
+called percpu_refill, it can show roughly whether it is a refill of
+percpu-list.
+
+Link: https://lkml.kernel.org/r/20220512025307.57924-1-vvghjk1234@gmail.com
+Signed-off-by: Wonhyuk Yang <vvghjk1234@gmail.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Cc: Baik Song An <bsahn@etri.re.kr>
+Cc: Hong Yeon Kim <kimhy@etri.re.kr>
+Cc: Taeung Song <taeung@reallinux.co.kr>
+Cc: <linuxgeek@linuxgeek.io>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/kmem.h | 14 +++++++++-----
+ mm/page_alloc.c             | 13 +++++--------
+ 2 files changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
+index ddc8c944f417a..f89fb3afcd46a 100644
+--- a/include/trace/events/kmem.h
++++ b/include/trace/events/kmem.h
+@@ -229,20 +229,23 @@ TRACE_EVENT(mm_page_alloc,
+ DECLARE_EVENT_CLASS(mm_page,
+-      TP_PROTO(struct page *page, unsigned int order, int migratetype),
++      TP_PROTO(struct page *page, unsigned int order, int migratetype,
++               int percpu_refill),
+-      TP_ARGS(page, order, migratetype),
++      TP_ARGS(page, order, migratetype, percpu_refill),
+       TP_STRUCT__entry(
+               __field(        unsigned long,  pfn             )
+               __field(        unsigned int,   order           )
+               __field(        int,            migratetype     )
++              __field(        int,            percpu_refill   )
+       ),
+       TP_fast_assign(
+               __entry->pfn            = page ? page_to_pfn(page) : -1UL;
+               __entry->order          = order;
+               __entry->migratetype    = migratetype;
++              __entry->percpu_refill  = percpu_refill;
+       ),
+       TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d",
+@@ -250,14 +253,15 @@ DECLARE_EVENT_CLASS(mm_page,
+               __entry->pfn != -1UL ? __entry->pfn : 0,
+               __entry->order,
+               __entry->migratetype,
+-              __entry->order == 0)
++              __entry->percpu_refill)
+ );
+ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
+-      TP_PROTO(struct page *page, unsigned int order, int migratetype),
++      TP_PROTO(struct page *page, unsigned int order, int migratetype,
++               int percpu_refill),
+-      TP_ARGS(page, order, migratetype)
++      TP_ARGS(page, order, migratetype, percpu_refill)
+ );
+ TRACE_EVENT(mm_page_pcpu_drain,
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 474150584ba48..264cb1914ab5b 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2461,6 +2461,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
+               del_page_from_free_list(page, zone, current_order);
+               expand(zone, page, order, current_order, migratetype);
+               set_pcppage_migratetype(page, migratetype);
++              trace_mm_page_alloc_zone_locked(page, order, migratetype,
++                              pcp_allowed_order(order) &&
++                              migratetype < MIGRATE_PCPTYPES);
+               return page;
+       }
+@@ -2988,7 +2991,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
+                   zone_page_state(zone, NR_FREE_PAGES) / 2) {
+                       page = __rmqueue_cma_fallback(zone, order);
+                       if (page)
+-                              goto out;
++                              return page;
+               }
+       }
+ retry:
+@@ -3001,9 +3004,6 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
+                                                               alloc_flags))
+                       goto retry;
+       }
+-out:
+-      if (page)
+-              trace_mm_page_alloc_zone_locked(page, order, migratetype);
+       return page;
+ }
+@@ -3708,11 +3708,8 @@ struct page *rmqueue(struct zone *preferred_zone,
+                * reserved for high-order atomic allocation, so order-0
+                * request should skip it.
+                */
+-              if (order > 0 && alloc_flags & ALLOC_HARDER) {
++              if (order > 0 && alloc_flags & ALLOC_HARDER)
+                       page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+-                      if (page)
+-                              trace_mm_page_alloc_zone_locked(page, order, migratetype);
+-              }
+               if (!page) {
+                       page = __rmqueue(zone, order, migratetype, alloc_flags);
+                       if (!page)
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch b/queue-5.15/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch
new file mode 100644 (file)
index 0000000..fdf33a6
--- /dev/null
@@ -0,0 +1,88 @@
+From 685658478127e0b3fa3ea017a891f5f30f55011b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Oct 2024 13:07:37 +0100
+Subject: mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic
+ reserves
+
+From: Matt Fleming <mfleming@cloudflare.com>
+
+[ Upstream commit 281dd25c1a018261a04d1b8bf41a0674000bfe38 ]
+
+Under memory pressure it's possible for GFP_ATOMIC order-0 allocations to
+fail even though free pages are available in the highatomic reserves.
+GFP_ATOMIC allocations cannot trigger unreserve_highatomic_pageblock()
+since it's only run from reclaim.
+
+Given that such allocations will pass the watermarks in
+__zone_watermark_unusable_free(), it makes sense to fallback to highatomic
+reserves the same way that ALLOC_OOM can.
+
+This fixes order-0 page allocation failures observed on Cloudflare's fleet
+when handling network packets:
+
+  kswapd1: page allocation failure: order:0, mode:0x820(GFP_ATOMIC),
+  nodemask=(null),cpuset=/,mems_allowed=0-7
+  CPU: 10 PID: 696 Comm: kswapd1 Kdump: loaded Tainted: G           O 6.6.43-CUSTOM #1
+  Hardware name: MACHINE
+  Call Trace:
+   <IRQ>
+   dump_stack_lvl+0x3c/0x50
+   warn_alloc+0x13a/0x1c0
+   __alloc_pages_slowpath.constprop.0+0xc9d/0xd10
+   __alloc_pages+0x327/0x340
+   __napi_alloc_skb+0x16d/0x1f0
+   bnxt_rx_page_skb+0x96/0x1b0 [bnxt_en]
+   bnxt_rx_pkt+0x201/0x15e0 [bnxt_en]
+   __bnxt_poll_work+0x156/0x2b0 [bnxt_en]
+   bnxt_poll+0xd9/0x1c0 [bnxt_en]
+   __napi_poll+0x2b/0x1b0
+   bpf_trampoline_6442524138+0x7d/0x1000
+   __napi_poll+0x5/0x1b0
+   net_rx_action+0x342/0x740
+   handle_softirqs+0xcf/0x2b0
+   irq_exit_rcu+0x6c/0x90
+   sysvec_apic_timer_interrupt+0x72/0x90
+   </IRQ>
+
+[mfleming@cloudflare.com: update comment]
+  Link: https://lkml.kernel.org/r/20241015125158.3597702-1-matt@readmodwrite.com
+Link: https://lkml.kernel.org/r/20241011120737.3300370-1-matt@readmodwrite.com
+Link: https://lore.kernel.org/all/CAGis_TWzSu=P7QJmjD58WWiu3zjMTVKSzdOwWE8ORaGytzWJwQ@mail.gmail.com/
+Fixes: 1d91df85f399 ("mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs")
+Signed-off-by: Matt Fleming <mfleming@cloudflare.com>
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 404cee30dcc26..6a64a75184888 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3620,12 +3620,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
+                       page = __rmqueue(zone, order, migratetype, alloc_flags);
+                       /*
+-                       * If the allocation fails, allow OOM handling access
+-                       * to HIGHATOMIC reserves as failing now is worse than
+-                       * failing a high-order atomic allocation in the
+-                       * future.
++                       * If the allocation fails, allow OOM handling and
++                       * order-0 (atomic) allocs access to HIGHATOMIC
++                       * reserves as failing now is worse than failing a
++                       * high-order atomic allocation in the future.
+                        */
+-                      if (!page && (alloc_flags & ALLOC_OOM))
++                      if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
+                               page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+                       if (!page) {
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch b/queue-5.15/mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch
new file mode 100644 (file)
index 0000000..d0f82dd
--- /dev/null
@@ -0,0 +1,113 @@
+From 1331502c1782067490c126cbac49572ca69cd467 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:12 +0000
+Subject: mm/page_alloc: rename ALLOC_HIGH to ALLOC_MIN_RESERVE
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 524c48072e5673f4511f1ad81493e2485863fd65 ]
+
+Patch series "Discard __GFP_ATOMIC", v3.
+
+Neil's patch has been residing in mm-unstable as commit 2fafb4fe8f7a ("mm:
+discard __GFP_ATOMIC") for a long time and recently brought up again.
+Most recently, I was worried that __GFP_HIGH allocations could use
+high-order atomic reserves which is unintentional but there was no
+response so lets revisit -- this series reworks how min reserves are used,
+protects highorder reserves and then finishes with Neil's patch with very
+minor modifications so it fits on top.
+
+There was a review discussion on renaming __GFP_DIRECT_RECLAIM to
+__GFP_ALLOW_BLOCKING but I didn't think it was that big an issue and is
+orthogonal to the removal of __GFP_ATOMIC.
+
+There were some concerns about how the gfp flags affect the min reserves
+but it never reached a solid conclusion so I made my own attempt.
+
+The series tries to iron out some of the details on how reserves are used.
+ALLOC_HIGH becomes ALLOC_MIN_RESERVE and ALLOC_HARDER becomes
+ALLOC_NON_BLOCK and documents how the reserves are affected.  For example,
+ALLOC_NON_BLOCK (no direct reclaim) on its own allows 25% of the min
+reserve.  ALLOC_MIN_RESERVE (__GFP_HIGH) allows 50% and both combined
+allows deeper access again.  ALLOC_OOM allows access to 75%.
+
+High-order atomic allocations are explicitly handled with the caveat that
+no __GFP_ATOMIC flag means that any high-order allocation that specifies
+GFP_HIGH and cannot enter direct reclaim will be treated as if it was
+GFP_ATOMIC.
+
+This patch (of 6):
+
+__GFP_HIGH aliases to ALLOC_HIGH but the name does not really hint what it
+means.  As ALLOC_HIGH is internal to the allocator, rename it to
+ALLOC_MIN_RESERVE to document that the min reserves can be depleted.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-1-mgorman@techsingularity.net
+Link: https://lkml.kernel.org/r/20230113111217.14134-2-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/internal.h   | 4 +++-
+ mm/page_alloc.c | 8 ++++----
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/mm/internal.h b/mm/internal.h
+index cf3cb933eba3f..e6c96327b5855 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -593,7 +593,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
+ #endif
+ #define ALLOC_HARDER           0x10 /* try to alloc harder */
+-#define ALLOC_HIGH             0x20 /* __GFP_HIGH set */
++#define ALLOC_MIN_RESERVE      0x20 /* __GFP_HIGH set. Allow access to 50%
++                                     * of the min watermark.
++                                     */
+ #define ALLOC_CPUSET           0x40 /* check for correct cpuset */
+ #define ALLOC_CMA              0x80 /* allow allocations from CMA areas */
+ #ifdef CONFIG_ZONE_DMA32
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index ae628574dc9fc..4e9e9cb98f336 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3865,7 +3865,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
+       /* free_pages may go negative - that's OK */
+       free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
+-      if (alloc_flags & ALLOC_HIGH)
++      if (alloc_flags & ALLOC_MIN_RESERVE)
+               min -= min / 2;
+       if (unlikely(alloc_harder)) {
+@@ -4696,18 +4696,18 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+       unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
+       /*
+-       * __GFP_HIGH is assumed to be the same as ALLOC_HIGH
++       * __GFP_HIGH is assumed to be the same as ALLOC_MIN_RESERVE
+        * and __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD
+        * to save two branches.
+        */
+-      BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
++      BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_MIN_RESERVE);
+       BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD);
+       /*
+        * The caller may dip into page reserves a bit more if the caller
+        * cannot run direct reclaim, or if the caller has realtime scheduling
+        * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
+-       * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
++       * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
+        */
+       alloc_flags |= (__force int)
+               (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch b/queue-5.15/mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch
new file mode 100644 (file)
index 0000000..f08ffef
--- /dev/null
@@ -0,0 +1,151 @@
+From fc6b92e69f211e4a3637ec0d4b8a328496c067d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Jun 2022 13:54:19 +0100
+Subject: mm/page_alloc: split out buddy removal code from rmqueue into
+ separate helper
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit 589d9973c1d2c3344a94a57441071340b0c71097 ]
+
+This is a preparation page to allow the buddy removal code to be reused in
+a later patch.
+
+No functional change.
+
+Link: https://lkml.kernel.org/r/20220624125423.6126-4-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Tested-by: Minchan Kim <minchan@kernel.org>
+Acked-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Tested-by: Yu Zhao <yuzhao@google.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Marcelo Tosatti <mtosatti@redhat.com>
+Cc: Marek Szyprowski <m.szyprowski@samsung.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 81 ++++++++++++++++++++++++++++---------------------
+ 1 file changed, 47 insertions(+), 34 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 264cb1914ab5b..ae628574dc9fc 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3597,6 +3597,43 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
+ #endif
+ }
++static __always_inline
++struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
++                         unsigned int order, unsigned int alloc_flags,
++                         int migratetype)
++{
++      struct page *page;
++      unsigned long flags;
++
++      do {
++              page = NULL;
++              spin_lock_irqsave(&zone->lock, flags);
++              /*
++               * order-0 request can reach here when the pcplist is skipped
++               * due to non-CMA allocation context. HIGHATOMIC area is
++               * reserved for high-order atomic allocation, so order-0
++               * request should skip it.
++               */
++              if (order > 0 && alloc_flags & ALLOC_HARDER)
++                      page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
++              if (!page) {
++                      page = __rmqueue(zone, order, migratetype, alloc_flags);
++                      if (!page) {
++                              spin_unlock_irqrestore(&zone->lock, flags);
++                              return NULL;
++                      }
++              }
++              __mod_zone_freepage_state(zone, -(1 << order),
++                                        get_pcppage_migratetype(page));
++              spin_unlock_irqrestore(&zone->lock, flags);
++      } while (check_new_pages(page, order));
++
++      __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
++      zone_statistics(preferred_zone, zone, 1);
++
++      return page;
++}
++
+ /* Remove page from the per-cpu list, caller must protect the list */
+ static inline
+ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
+@@ -3677,9 +3714,14 @@ struct page *rmqueue(struct zone *preferred_zone,
+                       gfp_t gfp_flags, unsigned int alloc_flags,
+                       int migratetype)
+ {
+-      unsigned long flags;
+       struct page *page;
++      /*
++       * We most definitely don't want callers attempting to
++       * allocate greater than order-1 page units with __GFP_NOFAIL.
++       */
++      WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
++
+       if (likely(pcp_allowed_order(order))) {
+               /*
+                * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
+@@ -3693,35 +3735,10 @@ struct page *rmqueue(struct zone *preferred_zone,
+               }
+       }
+-      /*
+-       * We most definitely don't want callers attempting to
+-       * allocate greater than order-1 page units with __GFP_NOFAIL.
+-       */
+-      WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+-
+-      do {
+-              page = NULL;
+-              spin_lock_irqsave(&zone->lock, flags);
+-              /*
+-               * order-0 request can reach here when the pcplist is skipped
+-               * due to non-CMA allocation context. HIGHATOMIC area is
+-               * reserved for high-order atomic allocation, so order-0
+-               * request should skip it.
+-               */
+-              if (order > 0 && alloc_flags & ALLOC_HARDER)
+-                      page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+-              if (!page) {
+-                      page = __rmqueue(zone, order, migratetype, alloc_flags);
+-                      if (!page)
+-                              goto failed;
+-              }
+-              __mod_zone_freepage_state(zone, -(1 << order),
+-                                        get_pcppage_migratetype(page));
+-              spin_unlock_irqrestore(&zone->lock, flags);
+-      } while (check_new_pages(page, order));
+-
+-      __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+-      zone_statistics(preferred_zone, zone, 1);
++      page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
++                                                      migratetype);
++      if (unlikely(!page))
++              return NULL;
+ out:
+       /* Separate test+clear to avoid unnecessary atomics */
+@@ -3732,10 +3749,6 @@ struct page *rmqueue(struct zone *preferred_zone,
+       VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
+       return page;
+-
+-failed:
+-      spin_unlock_irqrestore(&zone->lock, flags);
+-      return NULL;
+ }
+ #ifdef CONFIG_FAIL_PAGE_ALLOC
+-- 
+2.43.0
+
diff --git a/queue-5.15/mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch b/queue-5.15/mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch
new file mode 100644 (file)
index 0000000..abc62b8
--- /dev/null
@@ -0,0 +1,55 @@
+From a0507ad4ef8d62903a10c963660969828cc572d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 11:12:13 +0000
+Subject: mm/page_alloc: treat RT tasks similar to __GFP_HIGH
+
+From: Mel Gorman <mgorman@techsingularity.net>
+
+[ Upstream commit c988dcbecf3fd5430921eaa3fe9054754f76d185 ]
+
+RT tasks are allowed to dip below the min reserve but ALLOC_HARDER is
+typically combined with ALLOC_MIN_RESERVE so RT tasks are a little
+unusual.  While there is some justification for allowing RT tasks access
+to memory reserves, there is a strong chance that a RT task that is also
+under memory pressure is at risk of missing deadlines anyway.  Relax how
+much reserves an RT task can access by treating it the same as __GFP_HIGH
+allocations.
+
+Note that in a future kernel release that the RT special casing will be
+removed.  Hard realtime tasks should be locking down resources in advance
+and ensuring enough memory is available.  Even a soft-realtime task like
+audio or video live decoding which cannot jitter should be allocating both
+memory and any disk space required up-front before the recording starts
+instead of relying on reserves.  At best, reserve access will only delay
+the problem by a very short interval.
+
+Link: https://lkml.kernel.org/r/20230113111217.14134-3-mgorman@techsingularity.net
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: NeilBrown <neilb@suse.de>
+Cc: Thierry Reding <thierry.reding@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 281dd25c1a01 ("mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic reserves")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 4e9e9cb98f336..72835cf4034bc 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -4725,7 +4725,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
+                */
+               alloc_flags &= ~ALLOC_CPUSET;
+       } else if (unlikely(rt_task(current)) && in_task())
+-              alloc_flags |= ALLOC_HARDER;
++              alloc_flags |= ALLOC_MIN_RESERVE;
+       alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
+-- 
+2.43.0
+
diff --git a/queue-5.15/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch b/queue-5.15/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch
new file mode 100644 (file)
index 0000000..6165c5b
--- /dev/null
@@ -0,0 +1,60 @@
+From fe6424c229e1c0e381b3a30dc093e7ebcc43e8ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 19:43:47 +0800
+Subject: ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow
+
+From: Edward Adam Davis <eadavis@qq.com>
+
+[ Upstream commit bc0a2f3a73fcdac651fca64df39306d1e5ebe3b0 ]
+
+Syzbot reported a kernel BUG in ocfs2_truncate_inline.  There are two
+reasons for this: first, the parameter value passed is greater than
+ocfs2_max_inline_data_with_xattr, second, the start and end parameters of
+ocfs2_truncate_inline are "unsigned int".
+
+So, we need to add a sanity check for byte_start and byte_len right before
+ocfs2_truncate_inline() in ocfs2_remove_inode_range(), if they are greater
+than ocfs2_max_inline_data_with_xattr return -EINVAL.
+
+Link: https://lkml.kernel.org/r/tencent_D48DB5122ADDAEDDD11918CFB68D93258C07@qq.com
+Fixes: 1afc32b95233 ("ocfs2: Write support for inline data")
+Signed-off-by: Edward Adam Davis <eadavis@qq.com>
+Reported-by: syzbot+81092778aac03460d6b7@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=81092778aac03460d6b7
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ocfs2/file.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index 403c71a485c7c..fc1e929ae0381 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1787,6 +1787,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
+               return 0;
+       if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
++              int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
++
++              if (byte_start > id_count || byte_start + byte_len > id_count) {
++                      ret = -EINVAL;
++                      mlog_errno(ret);
++                      goto out;
++              }
++
+               ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
+                                           byte_start + byte_len, 0);
+               if (ret) {
+-- 
+2.43.0
+
diff --git a/queue-5.15/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch b/queue-5.15/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch
new file mode 100644 (file)
index 0000000..c135605
--- /dev/null
@@ -0,0 +1,48 @@
+From 793a7bb0be6deacd2f96f42fb3d3698eca37b010 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Sep 2024 16:02:33 +0200
+Subject: riscv: efi: Set NX compat flag in PE/COFF header
+
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+
+[ Upstream commit d41373a4b910961df5a5e3527d7bde6ad45ca438 ]
+
+The IMAGE_DLLCHARACTERISTICS_NX_COMPAT informs the firmware that the
+EFI binary does not rely on pages that are both executable and
+writable.
+
+The flag is used by some distro versions of GRUB to decide if the EFI
+binary may be executed.
+
+As the Linux kernel neither has RWX sections nor needs RWX pages for
+relocation we should set the flag.
+
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
+Fixes: cb7d2dd5612a ("RISC-V: Add PE/COFF header for EFI stub")
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240929140233.211800-1-heinrich.schuchardt@canonical.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/efi-header.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
+index 8e733aa48ba6c..c306f3a6a800e 100644
+--- a/arch/riscv/kernel/efi-header.S
++++ b/arch/riscv/kernel/efi-header.S
+@@ -59,7 +59,7 @@ extra_header_fields:
+       .long   efi_header_end - _start                 // SizeOfHeaders
+       .long   0                                       // CheckSum
+       .short  IMAGE_SUBSYSTEM_EFI_APPLICATION         // Subsystem
+-      .short  0                                       // DllCharacteristics
++      .short  IMAGE_DLL_CHARACTERISTICS_NX_COMPAT     // DllCharacteristics
+       .quad   0                                       // SizeOfStackReserve
+       .quad   0                                       // SizeOfStackCommit
+       .quad   0                                       // SizeOfHeapReserve
+-- 
+2.43.0
+
diff --git a/queue-5.15/riscv-remove-duplicated-get_rm.patch b/queue-5.15/riscv-remove-duplicated-get_rm.patch
new file mode 100644 (file)
index 0000000..f546236
--- /dev/null
@@ -0,0 +1,38 @@
+From 62bbec0f2fec66bfe961fb0cd642088cd4796c1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:39 +0800
+Subject: riscv: Remove duplicated GET_RM
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 164f66de6bb6ef454893f193c898dc8f1da6d18b ]
+
+The macro GET_RM defined twice in this file, one can be removed.
+
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241008094141.549248-3-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps_misaligned.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
+index b246c3dc69930..d548d6992d988 100644
+--- a/arch/riscv/kernel/traps_misaligned.c
++++ b/arch/riscv/kernel/traps_misaligned.c
+@@ -131,8 +131,6 @@
+ #define REG_PTR(insn, pos, regs)      \
+       (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))
+-#define GET_RM(insn)                  (((insn) >> 12) & 7)
+-
+ #define GET_RS1(insn, regs)           (*REG_PTR(insn, SH_RS1, regs))
+ #define GET_RS2(insn, regs)           (*REG_PTR(insn, SH_RS2, regs))
+ #define GET_RS1S(insn, regs)          (*REG_PTR(RVC_RS1S(insn), 0, regs))
+-- 
+2.43.0
+
diff --git a/queue-5.15/riscv-remove-unused-generating_asm_offsets.patch b/queue-5.15/riscv-remove-unused-generating_asm_offsets.patch
new file mode 100644 (file)
index 0000000..d737d28
--- /dev/null
@@ -0,0 +1,44 @@
+From 447c6d669f5bcd209d458ade7941840d3b37714c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:38 +0800
+Subject: riscv: Remove unused GENERATING_ASM_OFFSETS
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 46d4e5ac6f2f801f97bcd0ec82365969197dc9b1 ]
+
+The macro is not used in the current version of kernel, it looks like
+can be removed to avoid a build warning:
+
+../arch/riscv/kernel/asm-offsets.c: At top level:
+../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" is not used [-Wunused-macros]
+    7 | #define GENERATING_ASM_OFFSETS
+
+Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()")
+Cc: stable@vger.kernel.org
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Link: https://lore.kernel.org/r/20241008094141.549248-2-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/asm-offsets.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index 90f8ce64fa6f1..0b6064fec9e07 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -4,8 +4,6 @@
+  * Copyright (C) 2017 SiFive
+  */
+-#define GENERATING_ASM_OFFSETS
+-
+ #include <linux/kbuild.h>
+ #include <linux/sched.h>
+ #include <asm/thread_info.h>
+-- 
+2.43.0
+
diff --git a/queue-5.15/riscv-use-u-to-format-the-output-of-cpu.patch b/queue-5.15/riscv-use-u-to-format-the-output-of-cpu.patch
new file mode 100644 (file)
index 0000000..00fbf87
--- /dev/null
@@ -0,0 +1,43 @@
+From 5998836eb1a921346ceb0364c5a3734dde769cec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 11:20:10 +0800
+Subject: riscv: Use '%u' to format the output of 'cpu'
+
+From: WangYuli <wangyuli@uniontech.com>
+
+[ Upstream commit e0872ab72630dada3ae055bfa410bf463ff1d1e0 ]
+
+'cpu' is an unsigned integer, so its conversion specifier should
+be %u, not %d.
+
+Suggested-by: Wentao Guan <guanwentao@uniontech.com>
+Suggested-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/all/alpine.DEB.2.21.2409122309090.40372@angie.orcam.me.uk/
+Signed-off-by: WangYuli <wangyuli@uniontech.com>
+Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
+Tested-by: Charlie Jenkins <charlie@rivosinc.com>
+Fixes: f1e58583b9c7 ("RISC-V: Support cpu hotplug")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/4C127DEECDA287C8+20241017032010.96772-1-wangyuli@uniontech.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/cpu-hotplug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
+index 66ddfba1cfbef..28a3fa6e67d79 100644
+--- a/arch/riscv/kernel/cpu-hotplug.c
++++ b/arch/riscv/kernel/cpu-hotplug.c
+@@ -71,7 +71,7 @@ void __cpu_die(unsigned int cpu)
+       if (cpu_ops[cpu]->cpu_is_stopped)
+               ret = cpu_ops[cpu]->cpu_is_stopped(cpu);
+       if (ret)
+-              pr_warn("CPU%d may not have stopped: %d\n", cpu, ret);
++              pr_warn("CPU%u may not have stopped: %d\n", cpu, ret);
+ }
+ /*
+-- 
+2.43.0
+
diff --git a/queue-5.15/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch b/queue-5.15/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch
new file mode 100644 (file)
index 0000000..0cc8314
--- /dev/null
@@ -0,0 +1,40 @@
+From 5c32140f24b70bc6811d7f5322075dd791a169c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 10:36:24 +0200
+Subject: riscv: vdso: Prevent the compiler from inserting calls to memset()
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit bf40167d54d55d4b54d0103713d86a8638fb9290 ]
+
+The compiler is smart enough to insert a call to memset() in
+riscv_vdso_get_cpus(), which generates a dynamic relocation.
+
+So prevent this by using -fno-builtin option.
+
+Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API")
+Cc: stable@vger.kernel.org
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20241016083625.136311-2-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/vdso/Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
+index 06e6b27f3bcc9..c1b68f962bada 100644
+--- a/arch/riscv/kernel/vdso/Makefile
++++ b/arch/riscv/kernel/vdso/Makefile
+@@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+ ccflags-y := -fno-stack-protector
+ ccflags-y += -DDISABLE_BRANCH_PROFILING
++ccflags-y += -fno-builtin
+ ifneq ($(c-gettimeofday-y),)
+   CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
+-- 
+2.43.0
+
index 4cd1c1a3cdcb1b261ce6fb32c444d889ac0f322d..40ff12f8a9671f4102034183c4e061ad23ad9b65 100644 (file)
@@ -48,3 +48,18 @@ staging-iio-frequency-ad9832-fix-division-by-zero-in-ad9832_calc_freqreg.patch
 iio-adc-ad7124-fix-division-by-zero-in-ad7124_set_channel_odr.patch
 iio-light-veml6030-fix-microlux-value-calculation.patch
 nilfs2-fix-potential-deadlock-with-newly-created-symlinks.patch
+riscv-vdso-prevent-the-compiler-from-inserting-calls.patch
+riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch
+riscv-use-u-to-format-the-output-of-cpu.patch
+riscv-remove-unused-generating_asm_offsets.patch
+riscv-remove-duplicated-get_rm.patch
+mm-page_alloc-call-check_new_pages-while-zone-spinlo.patch
+mm-page_alloc-fix-tracepoint-mm_page_alloc_zone_lock.patch
+mm-page_alloc-split-out-buddy-removal-code-from-rmqu.patch
+mm-page_alloc-rename-alloc_high-to-alloc_min_reserve.patch
+mm-page_alloc-treat-rt-tasks-similar-to-__gfp_high.patch
+mm-page_alloc-explicitly-record-high-order-atomic-al.patch
+mm-page_alloc-explicitly-define-what-alloc-flags-dep.patch
+mm-page_alloc-explicitly-define-how-__gfp_high-non-b.patch
+mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch
+ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch