mm, swap: simplify swap cache allocation helper

author Kairui Song <kasong@tencent.com>

Sun, 17 May 2026 15:39:40 +0000 (23:39 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Tue, 2 Jun 2026 22:22:20 +0000 (15:22 -0700)
author Kairui Song <kasong@tencent.com>
Sun, 17 May 2026 15:39:40 +0000 (23:39 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Tue, 2 Jun 2026 22:22:20 +0000 (15:22 -0700)
diff --git a/mm/swap.h b/mm/swap.h

index a77016f2423b90591fced51e74ee20dfac05de2a..ad8b17a937581844f53a3e74ebc72f0ebd0a20b6 100644 (file)
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -281,8 +281,7 @@ struct folio *swap_cache_get_folio(swp_entry_t entry);
  void *swap_cache_get_shadow(swp_entry_t entry);
  void swap_cache_del_folio(struct folio *folio);
  struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
-                                    struct mempolicy *mpol, pgoff_t ilx,
-                                    bool *alloced);
+                                    struct mempolicy *mpol, pgoff_t ilx);
  /* Below helpers require the caller to lock and pass in the swap cluster. */
  void __swap_cache_add_folio(struct swap_cluster_info *ci,
                             struct folio *folio, swp_entry_t entry);
diff --git a/mm/swap_state.c b/mm/swap_state.c

index 1415a5c54a43b29cf5d4d938241cfbd0fe0340e0..3bba82f6dc79a2265cb53da82a069d4065df23c3 100644 (file)
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -459,54 +459,38 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
   * All swap slots covered by the folio must have a non-zero swap count.
   *
   * Context: Caller must protect the swap device with reference count or locks.
- * Return: Returns the folio being added on success. Returns the existing folio
- * if @entry is already cached. Returns NULL if raced with swapin or swapoff.
+ * Return: 0 if success, error code if failed.
   */
-static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
-                                                 struct folio *folio,
-                                                 gfp_t gfp, bool charged)
+static int __swap_cache_prepare_and_add(swp_entry_t entry,
+                                       struct folio *folio,
+                                       gfp_t gfp, bool charged)
  {
-       struct folio *swapcache = NULL;
         void *shadow;
         int ret;
  
         __folio_set_locked(folio);
         __folio_set_swapbacked(folio);
  
-       if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry))
+       if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) {
+               ret = -ENOMEM;
                 goto failed;
-
-       for (;;) {
-               ret = swap_cache_add_folio(folio, entry, &shadow);
-               if (!ret)
-                       break;
-
-               /*
-                * Large order allocation needs special handling on
-                * race: if a smaller folio exists in cache, swapin needs
-                * to fallback to order 0, and doing a swap cache lookup
-                * might return a folio that is irrelevant to the faulting
-                * entry because @entry is aligned down. Just return NULL.
-                */
-               if (ret != -EEXIST || folio_test_large(folio))
-                       goto failed;
-
-               swapcache = swap_cache_get_folio(entry);
-               if (swapcache)
-                       goto failed;
         }
  
+       ret = swap_cache_add_folio(folio, entry, &shadow);
+       if (ret)
+               goto failed;
+
         memcg1_swapin(entry, folio_nr_pages(folio));
         if (shadow)
                 workingset_refault(folio, shadow);
  
         /* Caller will initiate read into locked folio */
         folio_add_lru(folio);
-       return folio;
+       return 0;
  
  failed:
         folio_unlock(folio);
-       return swapcache;
+       return ret;
  }
  
  /**
@@ -515,7 +499,6 @@ failed:
   * @gfp_mask: memory allocation flags
   * @mpol: NUMA memory allocation policy to be applied
   * @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE
- * @new_page_allocated: sets true if allocation happened, false otherwise
   *
   * Allocate a folio in the swap cache for one swap slot, typically before
   * doing IO (e.g. swap in or zswap writeback). The swap slot indicated by
@@ -523,18 +506,40 @@ failed:
   * Currently only supports order 0.
   *
   * Context: Caller must protect the swap device with reference count or locks.
- * Return: Returns the existing folio if @entry is cached already. Returns
- * NULL if failed due to -ENOMEM or @entry have a swap count < 1.
+ * Return: Returns the folio if allocation succeeded and folio is added to
+ * swap cache. Returns error code if allocation failed due to race or OOM.
   */
  struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
-                                    struct mempolicy *mpol, pgoff_t ilx,
-                                    bool *new_page_allocated)
+                                    struct mempolicy *mpol, pgoff_t ilx)
+{
+       int err;
+       struct folio *folio;
+
+       /* Allocate a new folio to be added into the swap cache. */
+       folio = folio_alloc_mpol(gfp_mask, 0, mpol, ilx, numa_node_id());
+       if (!folio)
+               return ERR_PTR(-ENOMEM);
+
+       /*
+        * Try to add the new folio to the swap cache. It returns
+        * -EEXIST if the entry is already cached.
+        */
+       err = __swap_cache_prepare_and_add(entry, folio, gfp_mask, false);
+       if (err) {
+               folio_put(folio);
+               return ERR_PTR(err);
+       }
+
+       return folio;
+}
+
+static struct folio *swap_cache_read_folio(swp_entry_t entry, gfp_t gfp,
+                                          struct mempolicy *mpol, pgoff_t ilx,
+                                          struct swap_iocb **plug, bool readahead)
  {
         struct swap_info_struct *si = __swap_entry_to_info(entry);
         struct folio *folio;
-       struct folio *result = NULL;
  
-       *new_page_allocated = false;
         /* Check the swap cache again for readahead path. */
         folio = swap_cache_get_folio(entry);
         if (folio)
@@ -544,17 +549,24 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
         if (!swap_entry_swapped(si, entry))
                 return NULL;
  
-       /* Allocate a new folio to be added into the swap cache. */
-       folio = folio_alloc_mpol(gfp_mask, 0, mpol, ilx, numa_node_id());
-       if (!folio)
+       do {
+               folio = swap_cache_get_folio(entry);
+               if (folio)
+                       return folio;
+
+               folio = swap_cache_alloc_folio(entry, gfp, mpol, ilx);
+       } while (PTR_ERR(folio) == -EEXIST);
+
+       if (IS_ERR_OR_NULL(folio))
                 return NULL;
-       /* Try add the new folio, returns existing folio or NULL on failure. */
-       result = __swap_cache_prepare_and_add(entry, folio, gfp_mask, false);
-       if (result == folio)
-               *new_page_allocated = true;
-       else
-               folio_put(folio);
-       return result;
+
+       swap_read_folio(folio, plug);
+       if (readahead) {
+               folio_set_readahead(folio);
+               count_vm_event(SWAP_RA);
+       }
+
+       return folio;
  }
  
  /**
@@ -573,15 +585,35 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
   */
  struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
  {
+       int ret;
         struct folio *swapcache;
         pgoff_t offset = swp_offset(entry);
         unsigned long nr_pages = folio_nr_pages(folio);
  
         entry = swp_entry(swp_type(entry), round_down(offset, nr_pages));
-       swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true);
-       if (swapcache == folio)
-               swap_read_folio(folio, NULL);
-       return swapcache;
+       for (;;) {
+               ret = __swap_cache_prepare_and_add(entry, folio, 0, true);
+               if (!ret) {
+                       swap_read_folio(folio, NULL);
+                       break;
+               }
+
+               /*
+                * Large order allocation needs special handling on
+                * race: if a smaller folio exists in cache, swapin needs
+                * to fall back to order 0, and doing a swap cache lookup
+                * might return a folio that is irrelevant to the faulting
+                * entry because @entry is aligned down. Just return NULL.
+                */
+               if (ret != -EEXIST || nr_pages > 1)
+                       return NULL;
+
+               swapcache = swap_cache_get_folio(entry);
+               if (swapcache)
+                       return swapcache;
+       }
+
+       return folio;
  }
  
  /*
@@ -595,7 +627,6 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                 struct swap_iocb **plug)
  {
         struct swap_info_struct *si;
-       bool page_allocated;
         struct mempolicy *mpol;
         pgoff_t ilx;
         struct folio *folio;
@@ -605,13 +636,9 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                 return NULL;
  
         mpol = get_vma_policy(vma, addr, 0, &ilx);
-       folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
-                                      &page_allocated);
+       folio = swap_cache_read_folio(entry, gfp_mask, mpol, ilx, plug, false);
         mpol_cond_put(mpol);
  
-       if (page_allocated)
-               swap_read_folio(folio, plug);
-
         put_swap_device(si);
         return folio;
  }
@@ -696,7 +723,7 @@ static unsigned long swapin_nr_pages(unsigned long offset)
   * are fairly likely to have been swapped out from the same node.
   */
  struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
-                                   struct mempolicy *mpol, pgoff_t ilx)
+                                    struct mempolicy *mpol, pgoff_t ilx)
  {
         struct folio *folio;
         unsigned long entry_offset = swp_offset(entry);
@@ -706,7 +733,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
         struct swap_info_struct *si = __swap_entry_to_info(entry);
         struct blk_plug plug;
         struct swap_iocb *splug = NULL;
-       bool page_allocated;
+       swp_entry_t ra_entry;
  
         mask = swapin_nr_pages(offset) - 1;
         if (!mask)
@@ -723,18 +750,11 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
         blk_start_plug(&plug);
         for (offset = start_offset; offset <= end_offset ; offset++) {
                 /* Ok, do the async read-ahead now */
-               folio = swap_cache_alloc_folio(
-                       swp_entry(swp_type(entry), offset), gfp_mask, mpol, ilx,
-                       &page_allocated);
+               ra_entry = swp_entry(swp_type(entry), offset);
+               folio = swap_cache_read_folio(ra_entry, gfp_mask, mpol, ilx,
+                                             &splug, offset != entry_offset);
                 if (!folio)
                         continue;
-               if (page_allocated) {
-                       swap_read_folio(folio, &splug);
-                       if (offset != entry_offset) {
-                               folio_set_readahead(folio);
-                               count_vm_event(SWAP_RA);
-                       }
-               }
                 folio_put(folio);
         }
         blk_finish_plug(&plug);
@@ -742,11 +762,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
         lru_add_drain();        /* Push any new pages onto the LRU now */
  skip:
         /* The page was likely read above, so no need for plugging here */
-       folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
-                                      &page_allocated);
-       if (unlikely(page_allocated))
-               swap_read_folio(folio, NULL);
-       return folio;
+       return swap_cache_read_folio(entry, gfp_mask, mpol, ilx, NULL, false);
  }
  
  static int swap_vma_ra_win(struct vm_fault *vmf, unsigned long *start,
@@ -812,8 +828,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
         pte_t *pte = NULL, pentry;
         int win;
         unsigned long start, end, addr;
-       pgoff_t ilx;
-       bool page_allocated;
+       pgoff_t ilx = targ_ilx;
  
         win = swap_vma_ra_win(vmf, &start, &end);
         if (win == 1)
@@ -847,19 +862,12 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
                         if (!si)
                                 continue;
                 }
-               folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
-                                              &page_allocated);
+               folio = swap_cache_read_folio(entry, gfp_mask, mpol, ilx,
+                                             &splug, addr != vmf->address);
                 if (si)
                         put_swap_device(si);
                 if (!folio)
                         continue;
-               if (page_allocated) {
-                       swap_read_folio(folio, &splug);
-                       if (addr != vmf->address) {
-                               folio_set_readahead(folio);
-                               count_vm_event(SWAP_RA);
-                       }
-               }
                 folio_put(folio);
         }
         if (pte)
@@ -869,10 +877,8 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
         lru_add_drain();
  skip:
         /* The folio was likely read above, so no need for plugging here */
-       folio = swap_cache_alloc_folio(targ_entry, gfp_mask, mpol, targ_ilx,
-                                      &page_allocated);
-       if (unlikely(page_allocated))
-               swap_read_folio(folio, NULL);
+       folio = swap_cache_read_folio(targ_entry, gfp_mask, mpol, targ_ilx,
+                                     NULL, false);
         return folio;
  }
  
diff --git a/mm/zswap.c b/mm/zswap.c

index 4b5149173b0ec5e154bdd4752f410719c7df87e1..e27f6e96f00318d8315899315d5c9b47662a27df 100644 (file)
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -991,7 +991,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
         pgoff_t offset = swp_offset(swpentry);
         struct folio *folio;
         struct mempolicy *mpol;
-       bool folio_was_allocated;
         struct swap_info_struct *si;
         int ret = 0;
  
@@ -1002,22 +1001,18 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
  
         mpol = get_task_policy(current);
         folio = swap_cache_alloc_folio(swpentry, GFP_KERNEL, mpol,
-                                      NO_INTERLEAVE_INDEX, &folio_was_allocated);
+                                      NO_INTERLEAVE_INDEX);
         put_swap_device(si);
-       if (!folio)
-               return -ENOMEM;
  
         /*
-        * Found an existing folio, we raced with swapin or concurrent
-        * shrinker. We generally writeback cold folios from zswap, and
-        * swapin means the folio just became hot, so skip this folio.
-        * For unlikely concurrent shrinker case, it will be unlinked
-        * and freed when invalidated by the concurrent shrinker anyway.
+        * Swap cache allocation might fail due to OOM, or the entry
+        * may already be cached due to concurrent swapin or have been
+        * freed. If already cached, a concurrent swapin made the folio
+        * hot, so skip it. For the unlikely concurrent shrinker case,
+        * it will be unlinked and freed when invalidated anyway.
          */
-       if (!folio_was_allocated) {
-               ret = -EEXIST;
-               goto out;
-       }
+       if (IS_ERR(folio))
+               return PTR_ERR(folio);
  
         /*
          * folio is locked, and the swapcache is now secured against
@@ -1057,7 +1052,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
         __swap_writepage(folio, NULL);
  
  out:
-       if (ret && ret != -EEXIST) {
+       if (ret) {
                 swap_cache_del_folio(folio);
                 folio_unlock(folio);
         }
author	Kairui Song <kasong@tencent.com>
	Sun, 17 May 2026 15:39:40 +0000 (23:39 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Tue, 2 Jun 2026 22:22:20 +0000 (15:22 -0700)
mm/swap.h		patch \| blob \| blame \| history
mm/swap_state.c		patch \| blob \| blame \| history
mm/zswap.c		patch \| blob \| blame \| history