mm, swap: consolidate cluster reclaim and usability check

author Kairui Song <kasong@tencent.com>

Fri, 19 Dec 2025 19:43:39 +0000 (03:43 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Sat, 31 Jan 2026 22:22:55 +0000 (14:22 -0800)
author Kairui Song <kasong@tencent.com>
Fri, 19 Dec 2025 19:43:39 +0000 (03:43 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Sat, 31 Jan 2026 22:22:55 +0000 (14:22 -0800)
diff --git a/mm/swapfile.c b/mm/swapfile.c

index e47807fdf51b511f3b8bad6d2ac14517493b8620..ce8c75ede5c315b83a97e8ec12ef56a8a47aa67b 100644 (file)
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -777,33 +777,51 @@ static int swap_cluster_setup_bad_slot(struct swap_cluster_info *cluster_info,
         return 0;
  }
  
+/*
+ * Reclaim drops the ci lock, so the cluster may become unusable (freed or
+ * stolen by a lower order). @usable will be set to false if that happens.
+ */
  static bool cluster_reclaim_range(struct swap_info_struct *si,
                                   struct swap_cluster_info *ci,
-                                 unsigned long start, unsigned long end)
+                                 unsigned long start, unsigned int order,
+                                 bool *usable)
  {
+       unsigned int nr_pages = 1 << order;
+       unsigned long offset = start, end = start + nr_pages;
         unsigned char *map = si->swap_map;
-       unsigned long offset = start;
         int nr_reclaim;
  
         spin_unlock(&ci->lock);
         do {
                 switch (READ_ONCE(map[offset])) {
                 case 0:
-                       offset++;
                         break;
                 case SWAP_HAS_CACHE:
                         nr_reclaim = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
-                       if (nr_reclaim > 0)
-                               offset += nr_reclaim;
-                       else
+                       if (nr_reclaim < 0)
                                 goto out;
                         break;
                 default:
                         goto out;
                 }
-       } while (offset < end);
+       } while (++offset < end);
  out:
         spin_lock(&ci->lock);
+
+       /*
+        * We just dropped ci->lock so cluster could be used by another
+        * order or got freed, check if it's still usable or empty.
+        */
+       if (!cluster_is_usable(ci, order)) {
+               *usable = false;
+               return false;
+       }
+       *usable = true;
+
+       /* Fast path, no need to scan if the whole cluster is empty */
+       if (cluster_is_empty(ci))
+               return true;
+
         /*
          * Recheck the range no matter reclaim succeeded or not, the slot
          * could have been be freed while we are not holding the lock.
@@ -900,9 +918,10 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
         unsigned long start = ALIGN_DOWN(offset, SWAPFILE_CLUSTER);
         unsigned long end = min(start + SWAPFILE_CLUSTER, si->max);
         unsigned int nr_pages = 1 << order;
-       bool need_reclaim, ret;
+       bool need_reclaim, ret, usable;
  
         lockdep_assert_held(&ci->lock);
+       VM_WARN_ON(!cluster_is_usable(ci, order));
  
         if (end < nr_pages || ci->count + nr_pages > SWAPFILE_CLUSTER)
                 goto out;
@@ -912,14 +931,8 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
                 if (!cluster_scan_range(si, ci, offset, nr_pages, &need_reclaim))
                         continue;
                 if (need_reclaim) {
-                       ret = cluster_reclaim_range(si, ci, offset, offset + nr_pages);
-                       /*
-                        * Reclaim drops ci->lock and cluster could be used
-                        * by another order. Not checking flag as off-list
-                        * cluster has no flag set, and change of list
-                        * won't cause fragmentation.
-                        */
-                       if (!cluster_is_usable(ci, order))
+                       ret = cluster_reclaim_range(si, ci, offset, order, &usable);
+                       if (!usable)
                                 goto out;
                         if (cluster_is_empty(ci))
                                 offset = start;
author	Kairui Song <kasong@tencent.com>
	Fri, 19 Dec 2025 19:43:39 +0000 (03:43 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Sat, 31 Jan 2026 22:22:55 +0000 (14:22 -0800)