mm: page_alloc: defrag_mode kswapd/kcompactd watermarks

author Johannes Weiner <hannes@cmpxchg.org>

Thu, 13 Mar 2025 21:05:36 +0000 (17:05 -0400)

committer Andrew Morton <akpm@linux-foundation.org>

Tue, 18 Mar 2025 05:07:07 +0000 (22:07 -0700)
author Johannes Weiner <hannes@cmpxchg.org>
Thu, 13 Mar 2025 21:05:36 +0000 (17:05 -0400)
committer Andrew Morton <akpm@linux-foundation.org>
Tue, 18 Mar 2025 05:07:07 +0000 (22:07 -0700)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index dbb0ad69e17f2a7cdf9a36b9d0f92f149fbea611..37c29f3fbca839c735934ead3a8f4f13bf8b8ba7 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -138,6 +138,7 @@ enum numa_stat_item {
  enum zone_stat_item {
         /* First 128 byte cacheline (assuming 64 bit words) */
         NR_FREE_PAGES,
+       NR_FREE_PAGES_BLOCKS,
         NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
         NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
         NR_ZONE_ACTIVE_ANON,
diff --git a/mm/compaction.c b/mm/compaction.c

index cf32e8053edbad1f2444732cd54788d10a434be3..139f00c0308a3d43ae3ed445eeed899f95d0c353 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2328,6 +2328,22 @@ static enum compact_result __compact_finished(struct compact_control *cc)
         if (!pageblock_aligned(cc->migrate_pfn))
                 return COMPACT_CONTINUE;
  
+       /*
+        * When defrag_mode is enabled, make kcompactd target
+        * watermarks in whole pageblocks. Because they can be stolen
+        * without polluting, no further fallback checks are needed.
+        */
+       if (defrag_mode && !cc->direct_compaction) {
+               if (__zone_watermark_ok(cc->zone, cc->order,
+                                       high_wmark_pages(cc->zone),
+                                       cc->highest_zoneidx, cc->alloc_flags,
+                                       zone_page_state(cc->zone,
+                                                       NR_FREE_PAGES_BLOCKS)))
+                       return COMPACT_SUCCESS;
+
+               return COMPACT_CONTINUE;
+       }
+
         /* Direct compactor: Is a suitable page free? */
         ret = COMPACT_NO_SUITABLE_PAGE;
         for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
@@ -2495,13 +2511,19 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
  static enum compact_result
  compaction_suit_allocation_order(struct zone *zone, unsigned int order,
                                  int highest_zoneidx, unsigned int alloc_flags,
-                                bool async)
+                                bool async, bool kcompactd)
  {
+       unsigned long free_pages;
         unsigned long watermark;
  
+       if (kcompactd && defrag_mode)
+               free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS);
+       else
+               free_pages = zone_page_state(zone, NR_FREE_PAGES);
+
         watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
-       if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
-                             alloc_flags))
+       if (__zone_watermark_ok(zone, order, watermark, highest_zoneidx,
+                               alloc_flags, free_pages))
                 return COMPACT_SUCCESS;
  
         /*
@@ -2557,7 +2579,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
                 ret = compaction_suit_allocation_order(cc->zone, cc->order,
                                                        cc->highest_zoneidx,
                                                        cc->alloc_flags,
-                                                      cc->mode == MIGRATE_ASYNC);
+                                                      cc->mode == MIGRATE_ASYNC,
+                                                      !cc->direct_compaction);
                 if (ret != COMPACT_CONTINUE)
                         return ret;
         }
@@ -3051,6 +3074,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
         struct zone *zone;
         enum zone_type highest_zoneidx = pgdat->kcompactd_highest_zoneidx;
         enum compact_result ret;
+       unsigned int alloc_flags = defrag_mode ?
+               ALLOC_WMARK_HIGH : ALLOC_WMARK_MIN;
  
         for (zoneid = 0; zoneid <= highest_zoneidx; zoneid++) {
                 zone = &pgdat->node_zones[zoneid];
@@ -3060,8 +3085,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
  
                 ret = compaction_suit_allocation_order(zone,
                                 pgdat->kcompactd_max_order,
-                               highest_zoneidx, ALLOC_WMARK_MIN,
-                               false);
+                               highest_zoneidx, alloc_flags,
+                               false, true);
                 if (ret == COMPACT_CONTINUE)
                         return true;
         }
@@ -3084,7 +3109,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
                 .mode = MIGRATE_SYNC_LIGHT,
                 .ignore_skip_hint = false,
                 .gfp_mask = GFP_KERNEL,
-               .alloc_flags = ALLOC_WMARK_MIN,
+               .alloc_flags = defrag_mode ? ALLOC_WMARK_HIGH : ALLOC_WMARK_MIN,
         };
         enum compact_result ret;
  
@@ -3104,7 +3129,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
  
                 ret = compaction_suit_allocation_order(zone,
                                 cc.order, zoneid, cc.alloc_flags,
-                               false);
+                               false, true);
                 if (ret != COMPACT_CONTINUE)
                         continue;
  
diff --git a/mm/internal.h b/mm/internal.h

index 2f52a65272c1969e2ad619caa17d28d5390eafb9..286520a424fe6e032fb72d989b68a5bec0a0aa55 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -536,6 +536,7 @@ extern char * const zone_names[MAX_NR_ZONES];
  DECLARE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
  
  extern int min_free_kbytes;
+extern int defrag_mode;
  
  void setup_per_zone_wmarks(void);
  void calculate_min_free_kbytes(void);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 5a2ee82f723e3bcdaf633f28518a1a1932082094..4337467eaf5a730f86221e3d1a02e43cbdae18ae 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -273,7 +273,7 @@ int min_free_kbytes = 1024;
  int user_min_free_kbytes = -1;
  static int watermark_boost_factor __read_mostly = 15000;
  static int watermark_scale_factor = 10;
-static int defrag_mode;
+int defrag_mode;
  
  /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
  int movable_zone;
@@ -660,16 +660,20 @@ static inline void __add_to_free_list(struct page *page, struct zone *zone,
                                       bool tail)
  {
         struct free_area *area = &zone->free_area[order];
+       int nr_pages = 1 << order;
  
         VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
                      "page type is %lu, passed migratetype is %d (nr=%d)\n",
-                    get_pageblock_migratetype(page), migratetype, 1 << order);
+                    get_pageblock_migratetype(page), migratetype, nr_pages);
  
         if (tail)
                 list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
         else
                 list_add(&page->buddy_list, &area->free_list[migratetype]);
         area->nr_free++;
+
+       if (order >= pageblock_order && !is_migrate_isolate(migratetype))
+               __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, nr_pages);
  }
  
  /*
@@ -681,24 +685,34 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
                                      unsigned int order, int old_mt, int new_mt)
  {
         struct free_area *area = &zone->free_area[order];
+       int nr_pages = 1 << order;
  
         /* Free page moving can fail, so it happens before the type update */
         VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
                      "page type is %lu, passed migratetype is %d (nr=%d)\n",
-                    get_pageblock_migratetype(page), old_mt, 1 << order);
+                    get_pageblock_migratetype(page), old_mt, nr_pages);
  
         list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
  
-       account_freepages(zone, -(1 << order), old_mt);
-       account_freepages(zone, 1 << order, new_mt);
+       account_freepages(zone, -nr_pages, old_mt);
+       account_freepages(zone, nr_pages, new_mt);
+
+       if (order >= pageblock_order &&
+           is_migrate_isolate(old_mt) != is_migrate_isolate(new_mt)) {
+               if (!is_migrate_isolate(old_mt))
+                       nr_pages = -nr_pages;
+               __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, nr_pages);
+       }
  }
  
  static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
                                              unsigned int order, int migratetype)
  {
+       int nr_pages = 1 << order;
+
          VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
                      "page type is %lu, passed migratetype is %d (nr=%d)\n",
-                    get_pageblock_migratetype(page), migratetype, 1 << order);
+                    get_pageblock_migratetype(page), migratetype, nr_pages);
  
         /* clear reported state and update reported page count */
         if (page_reported(page))
@@ -708,6 +722,9 @@ static inline void __del_page_from_free_list(struct page *page, struct zone *zon
         __ClearPageBuddy(page);
         set_page_private(page, 0);
         zone->free_area[order].nr_free--;
+
+       if (order >= pageblock_order && !is_migrate_isolate(migratetype))
+               __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, -nr_pages);
  }
  
  static inline void del_page_from_free_list(struct page *page, struct zone *zone,
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 3370bdca6868c10c2960e5201dd1691cd2ab4e43..b5c7dfc2b18999c8a0b02c0811af3089d54e79e8 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6724,11 +6724,24 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
          * meet watermarks.
          */
         for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) {
+               unsigned long free_pages;
+
                 if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
                         mark = promo_wmark_pages(zone);
                 else
                         mark = high_wmark_pages(zone);
-               if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx))
+
+               /*
+                * In defrag_mode, watermarks must be met in whole
+                * blocks to avoid polluting allocator fallbacks.
+                */
+               if (defrag_mode)
+                       free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS);
+               else
+                       free_pages = zone_page_state(zone, NR_FREE_PAGES);
+
+               if (__zone_watermark_ok(zone, order, mark, highest_zoneidx,
+                                       0, free_pages))
                         return true;
         }
  
diff --git a/mm/vmstat.c b/mm/vmstat.c

index 16bfe1c694dd4e0c98b39337a367a2ea86e8f3bd..ed49a86348f7af95007318040afdf09155f1ecd4 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1190,6 +1190,7 @@ int fragmentation_index(struct zone *zone, unsigned int order)
  const char * const vmstat_text[] = {
         /* enum zone_stat_item counters */
         "nr_free_pages",
+       "nr_free_pages_blocks",
         "nr_zone_inactive_anon",
         "nr_zone_active_anon",
         "nr_zone_inactive_file",
author	Johannes Weiner <hannes@cmpxchg.org>
	Thu, 13 Mar 2025 21:05:36 +0000 (17:05 -0400)
committer	Andrew Morton <akpm@linux-foundation.org>
	Tue, 18 Mar 2025 05:07:07 +0000 (22:07 -0700)
include/linux/mmzone.h		patch \| blob \| blame \| history
mm/compaction.c		patch \| blob \| blame \| history
mm/internal.h		patch \| blob \| blame \| history
mm/page_alloc.c		patch \| blob \| blame \| history
mm/vmscan.c		patch \| blob \| blame \| history
mm/vmstat.c		patch \| blob \| blame \| history