mm/page_alloc: prevent pcp corruption with SMP=n

author Vlastimil Babka <vbabka@suse.cz>

Mon, 5 Jan 2026 15:08:56 +0000 (16:08 +0100)

committer Andrew Morton <akpm@linux-foundation.org>

Thu, 15 Jan 2026 06:16:25 +0000 (22:16 -0800)
author Vlastimil Babka <vbabka@suse.cz>
Mon, 5 Jan 2026 15:08:56 +0000 (16:08 +0100)
committer Andrew Morton <akpm@linux-foundation.org>
Thu, 15 Jan 2026 06:16:25 +0000 (22:16 -0800)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 3dce96f3be494c0c5e0971a942334ac78f5037f9..f65c4edf199d52f7e69193f5f0e64cfb0e930f47 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -167,6 +167,33 @@ static inline void __pcp_trylock_noop(unsigned long *flags) { }
         pcp_trylock_finish(UP_flags);                                   \
  })
  
+/*
+ * With the UP spinlock implementation, when we spin_lock(&pcp->lock) (for i.e.
+ * a potentially remote cpu drain) and get interrupted by an operation that
+ * attempts pcp_spin_trylock(), we can't rely on the trylock failure due to UP
+ * spinlock assumptions making the trylock a no-op. So we have to turn that
+ * spin_lock() to a spin_lock_irqsave(). This works because on UP there are no
+ * remote cpu's so we can only be locking the only existing local one.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
+static inline void __flags_noop(unsigned long *flags) { }
+#define pcp_spin_lock_maybe_irqsave(ptr, flags)                \
+({                                                     \
+        __flags_noop(&(flags));                        \
+        spin_lock(&(ptr)->lock);                       \
+})
+#define pcp_spin_unlock_maybe_irqrestore(ptr, flags)   \
+({                                                     \
+        spin_unlock(&(ptr)->lock);                     \
+        __flags_noop(&(flags));                        \
+})
+#else
+#define pcp_spin_lock_maybe_irqsave(ptr, flags)                \
+               spin_lock_irqsave(&(ptr)->lock, flags)
+#define pcp_spin_unlock_maybe_irqrestore(ptr, flags)   \
+               spin_unlock_irqrestore(&(ptr)->lock, flags)
+#endif
+
  #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
  DEFINE_PER_CPU(int, numa_node);
  EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -2556,6 +2583,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
  bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
  {
         int high_min, to_drain, to_drain_batched, batch;
+       unsigned long UP_flags;
         bool todo = false;
  
         high_min = READ_ONCE(pcp->high_min);
@@ -2575,9 +2603,9 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
         to_drain = pcp->count - pcp->high;
         while (to_drain > 0) {
                 to_drain_batched = min(to_drain, batch);
-               spin_lock(&pcp->lock);
+               pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
                 free_pcppages_bulk(zone, to_drain_batched, pcp, 0);
-               spin_unlock(&pcp->lock);
+               pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
                 todo = true;
  
                 to_drain -= to_drain_batched;
@@ -2594,14 +2622,15 @@ bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
   */
  void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  {
+       unsigned long UP_flags;
         int to_drain, batch;
  
         batch = READ_ONCE(pcp->batch);
         to_drain = min(pcp->count, batch);
         if (to_drain > 0) {
-               spin_lock(&pcp->lock);
+               pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
                 free_pcppages_bulk(zone, to_drain, pcp, 0);
-               spin_unlock(&pcp->lock);
+               pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
         }
  }
  #endif
@@ -2612,10 +2641,11 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  static void drain_pages_zone(unsigned int cpu, struct zone *zone)
  {
         struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+       unsigned long UP_flags;
         int count;
  
         do {
-               spin_lock(&pcp->lock);
+               pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
                 count = pcp->count;
                 if (count) {
                         int to_drain = min(count,
@@ -2624,7 +2654,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
                         free_pcppages_bulk(zone, to_drain, pcp, 0);
                         count -= to_drain;
                 }
-               spin_unlock(&pcp->lock);
+               pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
         } while (count);
  }
  
@@ -6109,6 +6139,7 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
  {
         struct per_cpu_pages *pcp;
         struct cpu_cacheinfo *cci;
+       unsigned long UP_flags;
  
         pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
         cci = get_cpu_cacheinfo(cpu);
@@ -6119,12 +6150,12 @@ static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
          * This can reduce zone lock contention without hurting
          * cache-hot pages sharing.
          */
-       spin_lock(&pcp->lock);
+       pcp_spin_lock_maybe_irqsave(pcp, UP_flags);
         if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
                 pcp->flags |= PCPF_FREE_HIGH_BATCH;
         else
                 pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
-       spin_unlock(&pcp->lock);
+       pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags);
  }
  
  void setup_pcp_cacheinfo(unsigned int cpu)
author	Vlastimil Babka <vbabka@suse.cz>
	Mon, 5 Jan 2026 15:08:56 +0000 (16:08 +0100)
committer	Andrew Morton <akpm@linux-foundation.org>
	Thu, 15 Jan 2026 06:16:25 +0000 (22:16 -0800)