]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm, swap: remove workaround for unsynchronized swap map cache state
authorKairui Song <kasong@tencent.com>
Fri, 19 Dec 2025 19:43:42 +0000 (03:43 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Sat, 31 Jan 2026 22:22:56 +0000 (14:22 -0800)
Remove the "skip if exists" check from commit a65b0e7607ccb ("zswap: make
shrinking memcg-aware").  It was needed because there is a tiny time
window between setting the SWAP_HAS_CACHE bit and actually adding the
folio to the swap cache.  If a user is trying to add the folio into the
swap cache but another user was interrupted after setting SWAP_HAS_CACHE
but hasn't added the folio to the swap cache yet, it might lead to a
deadlock.

We have moved the bit setting to the same critical section as adding the
folio, so this is no longer needed.  Remove it and clean it up.

Link: https://lkml.kernel.org/r/20251220-swap-table-p2-v5-13-8862a265a033@tencent.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
Cc: Deepanshu Kartikey <kartikey406@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <ryncsn@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/swap.h
mm/swap_state.c
mm/zswap.c

index e427240073e9c3ebc8e0c627b0e30dad5dc45d33..0801857a0640634d5d1574f48d546458ce9f0d09 100644 (file)
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -260,7 +260,7 @@ int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
 void swap_cache_del_folio(struct folio *folio);
 struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
                                     struct mempolicy *mpol, pgoff_t ilx,
-                                    bool *alloced, bool skip_if_exists);
+                                    bool *alloced);
 /* Below helpers require the caller to lock and pass in the swap cluster. */
 void __swap_cache_del_folio(struct swap_cluster_info *ci,
                            struct folio *folio, swp_entry_t entry, void *shadow);
index 22990c5259cc48ea1e2a9f89a8543f2e4b2ae317..9f45563591d6dcb30e72519935780ef1fb0cdc0e 100644 (file)
@@ -444,8 +444,6 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
  * @folio: folio to be added.
  * @gfp: memory allocation flags for charge, can be 0 if @charged if true.
  * @charged: if the folio is already charged.
- * @skip_if_exists: if the slot is in a cached state, return NULL.
- *                  This is an old workaround that will be removed shortly.
  *
  * Update the swap_map and add folio as swap cache, typically before swapin.
  * All swap slots covered by the folio must have a non-zero swap count.
@@ -456,8 +454,7 @@ void swap_update_readahead(struct folio *folio, struct vm_area_struct *vma,
  */
 static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
                                                  struct folio *folio,
-                                                 gfp_t gfp, bool charged,
-                                                 bool skip_if_exists)
+                                                 gfp_t gfp, bool charged)
 {
        struct folio *swapcache = NULL;
        void *shadow;
@@ -477,7 +474,7 @@ static struct folio *__swap_cache_prepare_and_add(swp_entry_t entry,
                 * might return a folio that is irrelevant to the faulting
                 * entry because @entry is aligned down. Just return NULL.
                 */
-               if (ret != -EEXIST || skip_if_exists || folio_test_large(folio))
+               if (ret != -EEXIST || folio_test_large(folio))
                        goto failed;
 
                swapcache = swap_cache_get_folio(entry);
@@ -510,8 +507,6 @@ failed:
  * @mpol: NUMA memory allocation policy to be applied
  * @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE
  * @new_page_allocated: sets true if allocation happened, false otherwise
- * @skip_if_exists: if the slot is a partially cached state, return NULL.
- *                  This is a workaround that would be removed shortly.
  *
  * Allocate a folio in the swap cache for one swap slot, typically before
  * doing IO (e.g. swap in or zswap writeback). The swap slot indicated by
@@ -524,8 +519,7 @@ failed:
  */
 struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
                                     struct mempolicy *mpol, pgoff_t ilx,
-                                    bool *new_page_allocated,
-                                    bool skip_if_exists)
+                                    bool *new_page_allocated)
 {
        struct swap_info_struct *si = __swap_entry_to_info(entry);
        struct folio *folio;
@@ -546,8 +540,7 @@ struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_mask,
        if (!folio)
                return NULL;
        /* Try add the new folio, returns existing folio or NULL on failure. */
-       result = __swap_cache_prepare_and_add(entry, folio, gfp_mask,
-                                             false, skip_if_exists);
+       result = __swap_cache_prepare_and_add(entry, folio, gfp_mask, false);
        if (result == folio)
                *new_page_allocated = true;
        else
@@ -576,7 +569,7 @@ struct folio *swapin_folio(swp_entry_t entry, struct folio *folio)
        unsigned long nr_pages = folio_nr_pages(folio);
 
        entry = swp_entry(swp_type(entry), round_down(offset, nr_pages));
-       swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true, false);
+       swapcache = __swap_cache_prepare_and_add(entry, folio, 0, true);
        if (swapcache == folio)
                swap_read_folio(folio, NULL);
        return swapcache;
@@ -604,7 +597,7 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 
        mpol = get_vma_policy(vma, addr, 0, &ilx);
        folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
-                                       &page_allocated, false);
+                                      &page_allocated);
        mpol_cond_put(mpol);
 
        if (page_allocated)
@@ -723,7 +716,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
                /* Ok, do the async read-ahead now */
                folio = swap_cache_alloc_folio(
                        swp_entry(swp_type(entry), offset), gfp_mask, mpol, ilx,
-                       &page_allocated, false);
+                       &page_allocated);
                if (!folio)
                        continue;
                if (page_allocated) {
@@ -741,7 +734,7 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 skip:
        /* The page was likely read above, so no need for plugging here */
        folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
-                                       &page_allocated, false);
+                                      &page_allocated);
        if (unlikely(page_allocated))
                swap_read_folio(folio, NULL);
        return folio;
@@ -846,7 +839,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
                                continue;
                }
                folio = swap_cache_alloc_folio(entry, gfp_mask, mpol, ilx,
-                                               &page_allocated, false);
+                                              &page_allocated);
                if (si)
                        put_swap_device(si);
                if (!folio)
@@ -868,7 +861,7 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
 skip:
        /* The folio was likely read above, so no need for plugging here */
        folio = swap_cache_alloc_folio(targ_entry, gfp_mask, mpol, targ_ilx,
-                                       &page_allocated, false);
+                                      &page_allocated);
        if (unlikely(page_allocated))
                swap_read_folio(folio, NULL);
        return folio;
index 3e99215915c5fc422918ea8f5e6d0ddaff5d985f..a3811b05ab579837dba2375e0c1175a07dfb2e1a 100644 (file)
@@ -1014,7 +1014,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 
        mpol = get_task_policy(current);
        folio = swap_cache_alloc_folio(swpentry, GFP_KERNEL, mpol,
-                                      NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
+                                      NO_INTERLEAVE_INDEX, &folio_was_allocated);
        put_swap_device(si);
        if (!folio)
                return -ENOMEM;