From: Vlastimil Babka Date: Fri, 27 Feb 2026 17:07:59 +0000 (+0100) Subject: mm/page_alloc: remove IRQ saving/restoring from pcp locking X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0a2c52a9a2f55e80fee6a10a846cadab590dc918;p=thirdparty%2Flinux.git mm/page_alloc: remove IRQ saving/restoring from pcp locking Effectively revert commit 038a102535eb ("mm/page_alloc: prevent pcp corruption with SMP=n"). The original problem is now avoided by pcp_spin_trylock() always failing on CONFIG_SMP=n, so we do not need to disable IRQs anymore. It's not a complete revert, because keeping the pcp_spin_(un)lock() wrappers is useful. Rename them from _maybe_irqsave/restore to _nopin. The difference from pcp_spin_trylock()/pcp_spin_unlock() is that the _nopin variants don't perform pcpu_task_pin/unpin(). Link: https://lkml.kernel.org/r/20260227-b4-pcp-locking-cleanup-v1-2-f7e22e603447@kernel.org Signed-off-by: Vlastimil Babka (SUSE) Acked-by: Johannes Weiner Cc: Brendan Jackman Cc: David Hildenbrand (Arm) Cc: Matthew Wilcox (Oracle) Cc: Mel Gorman Cc: Michal Hocko Cc: Sebastian Andrzej Siewior Cc: Steven Rostedt Cc: Suren Baghdasaryan Cc: Zi Yan Signed-off-by: Andrew Morton --- diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7fa2d0f104607..be367516c59b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -154,24 +154,14 @@ static DEFINE_MUTEX(pcp_batch_high_lock); BUG_ON(1) #endif -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) -static inline void __flags_noop(unsigned long *flags) { } -#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ -({ \ - __flags_noop(&(flags)); \ - spin_lock(&(ptr)->lock); \ -}) -#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ -({ \ - spin_unlock(&(ptr)->lock); \ - __flags_noop(&(flags)); \ -}) -#else -#define pcp_spin_lock_maybe_irqsave(ptr, flags) \ - spin_lock_irqsave(&(ptr)->lock, flags) -#define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ - spin_unlock_irqrestore(&(ptr)->lock, flags) -#endif +/* + * In some cases we do not need to pin the task to the CPU because we are + * already given a specific cpu's pcp pointer. + */ +#define pcp_spin_lock_nopin(ptr) \ + spin_lock(&(ptr)->lock) +#define pcp_spin_unlock_nopin(ptr) \ + spin_unlock(&(ptr)->lock) #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); @@ -2568,7 +2558,6 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) { int high_min, to_drain, to_drain_batched, batch; - unsigned long UP_flags; bool todo = false; high_min = READ_ONCE(pcp->high_min); @@ -2588,9 +2577,9 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) to_drain = pcp->count - pcp->high; while (to_drain > 0) { to_drain_batched = min(to_drain, batch); - pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + pcp_spin_lock_nopin(pcp); free_pcppages_bulk(zone, to_drain_batched, pcp, 0); - pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + pcp_spin_unlock_nopin(pcp); todo = true; to_drain -= to_drain_batched; @@ -2607,15 +2596,14 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) */ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { - unsigned long UP_flags; int to_drain, batch; batch = READ_ONCE(pcp->batch); to_drain = min(pcp->count, batch); if (to_drain > 0) { - pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + pcp_spin_lock_nopin(pcp); free_pcppages_bulk(zone, to_drain, pcp, 0); - pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + pcp_spin_unlock_nopin(pcp); } } #endif @@ -2626,11 +2614,10 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) static void drain_pages_zone(unsigned int cpu, struct zone *zone) { struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); - unsigned long UP_flags; int count; do { - pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + pcp_spin_lock_nopin(pcp); count = pcp->count; if (count) { int to_drain = min(count, @@ -2639,7 +2626,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) free_pcppages_bulk(zone, to_drain, pcp, 0); count -= to_drain; } - pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + pcp_spin_unlock_nopin(pcp); } while (count); } @@ -6123,7 +6110,6 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu) { struct per_cpu_pages *pcp; struct cpu_cacheinfo *cci; - unsigned long UP_flags; pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); cci = get_cpu_cacheinfo(cpu); @@ -6134,12 +6120,12 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu) * This can reduce zone lock contention without hurting * cache-hot pages sharing. */ - pcp_spin_lock_maybe_irqsave(pcp, UP_flags); + pcp_spin_lock_nopin(pcp); if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) pcp->flags |= PCPF_FREE_HIGH_BATCH; else pcp->flags &= ~PCPF_FREE_HIGH_BATCH; - pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); + pcp_spin_unlock_nopin(pcp); } void setup_pcp_cacheinfo(unsigned int cpu)