mm/shmem, swap: tidy up swap entry splitting

author Kairui Song <kasong@tencent.com>

Mon, 28 Jul 2025 07:53:02 +0000 (15:53 +0800)

committer Andrew Morton <akpm@linux-foundation.org>

Sat, 2 Aug 2025 19:06:12 +0000 (12:06 -0700)
author Kairui Song <kasong@tencent.com>
Mon, 28 Jul 2025 07:53:02 +0000 (15:53 +0800)
committer Andrew Morton <akpm@linux-foundation.org>
Sat, 2 Aug 2025 19:06:12 +0000 (12:06 -0700)
diff --git a/mm/shmem.c b/mm/shmem.c

index 88058d53ae550ea08152cac0ff044db4b9e0f00b..5e10e27e8b73a3fb67089326feae997e2197eb07 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2303,14 +2303,16 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
         struct address_space *mapping = inode->i_mapping;
         struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL;
         struct shmem_inode_info *info = SHMEM_I(inode);
+       swp_entry_t swap, index_entry;
         struct swap_info_struct *si;
         struct folio *folio = NULL;
         bool skip_swapcache = false;
-       swp_entry_t swap;
         int error, nr_pages, order, split_order;
+       pgoff_t offset;
  
         VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
-       swap = radix_to_swp_entry(*foliop);
+       index_entry = radix_to_swp_entry(*foliop);
+       swap = index_entry;
         *foliop = NULL;
  
         if (is_poisoned_swp_entry(swap))
@@ -2358,46 +2360,35 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
                 }
  
                 /*
-                * Now swap device can only swap in order 0 folio, then we
-                * should split the large swap entry stored in the pagecache
-                * if necessary.
-                */
-               split_order = shmem_split_large_entry(inode, index, swap, gfp);
-               if (split_order < 0) {
-                       error = split_order;
-                       goto failed;
-               }
-
-               /*
-                * If the large swap entry has already been split, it is
+                * Now swap device can only swap in order 0 folio, it is
                  * necessary to recalculate the new swap entry based on
-                * the old order alignment.
+                * the offset, as the swapin index might be unalgined.
                  */
-               if (split_order > 0) {
-                       pgoff_t offset = index - round_down(index, 1 << split_order);
-
+               if (order) {
+                       offset = index - round_down(index, 1 << order);
                         swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
                 }
  
-               /* Here we actually start the io */
                 folio = shmem_swapin_cluster(swap, gfp, info, index);
                 if (!folio) {
                         error = -ENOMEM;
                         goto failed;
                 }
-       } else if (order > folio_order(folio)) {
+       }
+alloced:
+       if (order > folio_order(folio)) {
                 /*
-                * Swap readahead may swap in order 0 folios into swapcache
+                * Swapin may get smaller folios due to various reasons:
+                * It may fallback to order 0 due to memory pressure or race,
+                * swap readahead may swap in order 0 folios into swapcache
                  * asynchronously, while the shmem mapping can still stores
                  * large swap entries. In such cases, we should split the
                  * large swap entry to prevent possible data corruption.
                  */
-               split_order = shmem_split_large_entry(inode, index, swap, gfp);
+               split_order = shmem_split_large_entry(inode, index, index_entry, gfp);
                 if (split_order < 0) {
-                       folio_put(folio);
-                       folio = NULL;
                         error = split_order;
-                       goto failed;
+                       goto failed_nolock;
                 }
  
                 /*
@@ -2406,16 +2397,14 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
                  * the old order alignment.
                  */
                 if (split_order > 0) {
-                       pgoff_t offset = index - round_down(index, 1 << split_order);
-
-                       swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
+                       offset = index - round_down(index, 1 << split_order);
+                       swap = swp_entry(swp_type(swap), swp_offset(index_entry) + offset);
                 }
         } else if (order < folio_order(folio)) {
                 swap.val = round_down(swap.val, 1 << folio_order(folio));
                 index = round_down(index, 1 << folio_order(folio));
         }
  
-alloced:
         /*
          * We have to do this with the folio locked to prevent races.
          * The shmem_confirm_swap below only checks if the first swap
@@ -2479,12 +2468,13 @@ failed:
                 shmem_set_folio_swapin_error(inode, index, folio, swap,
                                              skip_swapcache);
  unlock:
-       if (skip_swapcache)
-               swapcache_clear(si, swap, folio_nr_pages(folio));
-       if (folio) {
+       if (folio)
                 folio_unlock(folio);
+failed_nolock:
+       if (skip_swapcache)
+               swapcache_clear(si, folio->swap, folio_nr_pages(folio));
+       if (folio)
                 folio_put(folio);
-       }
         put_swap_device(si);
  
         return error;
author	Kairui Song <kasong@tencent.com>
	Mon, 28 Jul 2025 07:53:02 +0000 (15:53 +0800)
committer	Andrew Morton <akpm@linux-foundation.org>
	Sat, 2 Aug 2025 19:06:12 +0000 (12:06 -0700)