mm/vmscan: mitigate spurious kswapd_failures reset from direct reclaim

author Jiayuan Chen <jiayuan.chen@shopee.com>

Tue, 20 Jan 2026 02:43:48 +0000 (10:43 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Sat, 31 Jan 2026 22:22:38 +0000 (14:22 -0800)
author Jiayuan Chen <jiayuan.chen@shopee.com>
Tue, 20 Jan 2026 02:43:48 +0000 (10:43 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Sat, 31 Jan 2026 22:22:38 +0000 (14:22 -0800)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index eb3815fc94ad4b451c3cf76d36f99b1e38e59a49..8881198e85c6a45a08c1698095e914a49c29404a 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1536,6 +1536,8 @@ static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
  void build_all_zonelists(pg_data_t *pgdat);
  void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
                    enum zone_type highest_zoneidx);
+void kswapd_try_clear_hopeless(struct pglist_data *pgdat,
+                              unsigned int order, int highest_zoneidx);
  bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
                          int highest_zoneidx, unsigned int alloc_flags,
                          long free_pages);
diff --git a/mm/vmscan.c b/mm/vmscan.c

index b33039000d6e5ae1a39d0c5289b4a854896bde18..5d9b1bce6f01e631d2e14774e714dbdebdf53adc 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5065,7 +5065,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
         blk_finish_plug(&plug);
  done:
         if (sc->nr_reclaimed > reclaimed)
-               atomic_set(&pgdat->kswapd_failures, 0);
+               kswapd_try_clear_hopeless(pgdat, sc->order, sc->reclaim_idx);
  }
  
  /******************************************************************************
@@ -6132,7 +6132,7 @@ again:
          * successful direct reclaim run will revive a dormant kswapd.
          */
         if (reclaimable)
-               atomic_set(&pgdat->kswapd_failures, 0);
+               kswapd_try_clear_hopeless(pgdat, sc->order, sc->reclaim_idx);
         else if (sc->cache_trim_mode)
                 sc->cache_trim_mode_failed = 1;
  }
@@ -7391,6 +7391,24 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
         wake_up_interruptible(&pgdat->kswapd_wait);
  }
  
+static void kswapd_clear_hopeless(pg_data_t *pgdat)
+{
+       atomic_set(&pgdat->kswapd_failures, 0);
+}
+
+/*
+ * Reset kswapd_failures only when the node is balanced. Without this
+ * check, successful direct reclaim (e.g., from cgroup memory.high
+ * throttling) can keep resetting kswapd_failures even when the node
+ * cannot be balanced, causing kswapd to run endlessly.
+ */
+void kswapd_try_clear_hopeless(struct pglist_data *pgdat,
+                              unsigned int order, int highest_zoneidx)
+{
+       if (pgdat_balanced(pgdat, order, highest_zoneidx))
+               kswapd_clear_hopeless(pgdat);
+}
+
  #ifdef CONFIG_HIBERNATION
  /*
   * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
author	Jiayuan Chen <jiayuan.chen@shopee.com>
	Tue, 20 Jan 2026 02:43:48 +0000 (10:43 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Sat, 31 Jan 2026 22:22:38 +0000 (14:22 -0800)
include/linux/mmzone.h		patch \| blob \| blame \| history
mm/vmscan.c		patch \| blob \| blame \| history