return folio_entry.val == round_down(entry.val, nr_pages);
}
+/* Temporary internal helpers */
+void __swapcache_set_cached(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ swp_entry_t entry);
+void __swapcache_clear_cached(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ swp_entry_t entry, unsigned int nr);
+
/*
* All swap cache helpers below require the caller to ensure the swap entries
* used are valid and stabilize the device by any of the following ways:
*/
struct folio *swap_cache_get_folio(swp_entry_t entry);
void *swap_cache_get_shadow(swp_entry_t entry);
-void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow);
+int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
+ void **shadow, bool alloc);
void swap_cache_del_folio(struct folio *folio);
struct folio *swap_cache_alloc_folio(swp_entry_t entry, gfp_t gfp_flags,
struct mempolicy *mpol, pgoff_t ilx,
return NULL;
}
-static inline void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadow)
+static inline int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
+ void **shadow, bool alloc)
{
+ return -ENOENT;
}
static inline void swap_cache_del_folio(struct folio *folio)
* @entry: The swap entry corresponding to the folio.
* @gfp: gfp_mask for XArray node allocation.
* @shadowp: If a shadow is found, return the shadow.
+ * @alloc: If it's the allocator that is trying to insert a folio. Allocator
+ * sets SWAP_HAS_CACHE to pin slots before insert so skip map update.
*
* Context: Caller must ensure @entry is valid and protect the swap device
* with reference count or locks.
- * The caller also needs to update the corresponding swap_map slots with
- * SWAP_HAS_CACHE bit to avoid race or conflict.
*/
-void swap_cache_add_folio(struct folio *folio, swp_entry_t entry, void **shadowp)
+int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
+ void **shadowp, bool alloc)
{
+ int err;
void *shadow = NULL;
+ struct swap_info_struct *si;
unsigned long old_tb, new_tb;
struct swap_cluster_info *ci;
- unsigned int ci_start, ci_off, ci_end;
+ unsigned int ci_start, ci_off, ci_end, offset;
unsigned long nr_pages = folio_nr_pages(folio);
VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
VM_WARN_ON_ONCE_FOLIO(!folio_test_swapbacked(folio), folio);
+ si = __swap_entry_to_info(entry);
new_tb = folio_to_swp_tb(folio);
ci_start = swp_cluster_offset(entry);
ci_end = ci_start + nr_pages;
ci_off = ci_start;
- ci = swap_cluster_lock(__swap_entry_to_info(entry), swp_offset(entry));
+ offset = swp_offset(entry);
+ ci = swap_cluster_lock(si, swp_offset(entry));
+ if (unlikely(!ci->table)) {
+ err = -ENOENT;
+ goto failed;
+ }
do {
- old_tb = __swap_table_xchg(ci, ci_off, new_tb);
- WARN_ON_ONCE(swp_tb_is_folio(old_tb));
+ old_tb = __swap_table_get(ci, ci_off);
+ if (unlikely(swp_tb_is_folio(old_tb))) {
+ err = -EEXIST;
+ goto failed;
+ }
+ if (!alloc && unlikely(!__swap_count(swp_entry(swp_type(entry), offset)))) {
+ err = -ENOENT;
+ goto failed;
+ }
if (swp_tb_is_shadow(old_tb))
shadow = swp_tb_to_shadow(old_tb);
+ offset++;
+ } while (++ci_off < ci_end);
+
+ ci_off = ci_start;
+ offset = swp_offset(entry);
+ do {
+ /*
+ * Still need to pin the slots with SWAP_HAS_CACHE since
+ * swap allocator depends on that.
+ */
+ if (!alloc)
+ __swapcache_set_cached(si, ci, swp_entry(swp_type(entry), offset));
+ __swap_table_set(ci, ci_off, new_tb);
+ offset++;
} while (++ci_off < ci_end);
folio_ref_add(folio, nr_pages);
if (shadowp)
*shadowp = shadow;
+ return 0;
+
+failed:
+ swap_cluster_unlock(ci);
+ return err;
}
/**
void __swap_cache_del_folio(struct swap_cluster_info *ci, struct folio *folio,
swp_entry_t entry, void *shadow)
{
+ struct swap_info_struct *si;
unsigned long old_tb, new_tb;
unsigned int ci_start, ci_off, ci_end;
unsigned long nr_pages = folio_nr_pages(folio);
VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
VM_WARN_ON_ONCE_FOLIO(folio_test_writeback(folio), folio);
+ si = __swap_entry_to_info(entry);
new_tb = shadow_swp_to_tb(shadow);
ci_start = swp_cluster_offset(entry);
ci_end = ci_start + nr_pages;
folio_clear_swapcache(folio);
node_stat_mod_folio(folio, NR_FILE_PAGES, -nr_pages);
lruvec_stat_mod_folio(folio, NR_SWAPCACHE, -nr_pages);
+ __swapcache_clear_cached(si, ci, entry, nr_pages);
}
/**
__swap_cache_del_folio(ci, folio, entry, NULL);
swap_cluster_unlock(ci);
- put_swap_folio(folio, entry);
folio_ref_sub(folio, folio_nr_pages(folio));
}
gfp_t gfp, bool charged,
bool skip_if_exists)
{
- struct folio *swapcache;
+ struct folio *swapcache = NULL;
void *shadow;
int ret;
- /*
- * Check and pin the swap map with SWAP_HAS_CACHE, then add the folio
- * into the swap cache. Loop with a schedule delay if raced with
- * another process setting SWAP_HAS_CACHE. This hackish loop will
- * be fixed very soon.
- */
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
for (;;) {
- ret = swapcache_prepare(entry, folio_nr_pages(folio));
+ ret = swap_cache_add_folio(folio, entry, &shadow, false);
if (!ret)
break;
/*
- * The skip_if_exists is for protecting against a recursive
- * call to this helper on the same entry waiting forever
- * here because SWAP_HAS_CACHE is set but the folio is not
- * in the swap cache yet. This can happen today if
- * mem_cgroup_swapin_charge_folio() below triggers reclaim
- * through zswap, which may call this helper again in the
- * writeback path.
- *
- * Large order allocation also needs special handling on
+ * Large order allocation needs special handling on
* race: if a smaller folio exists in cache, swapin needs
* to fallback to order 0, and doing a swap cache lookup
* might return a folio that is irrelevant to the faulting
* entry because @entry is aligned down. Just return NULL.
*/
if (ret != -EEXIST || skip_if_exists || folio_test_large(folio))
- return NULL;
+ goto failed;
- /*
- * Check the swap cache again, we can only arrive
- * here because swapcache_prepare returns -EEXIST.
- */
swapcache = swap_cache_get_folio(entry);
if (swapcache)
- return swapcache;
-
- /*
- * We might race against __swap_cache_del_folio(), and
- * stumble across a swap_map entry whose SWAP_HAS_CACHE
- * has not yet been cleared. Or race against another
- * swap_cache_alloc_folio(), which has set SWAP_HAS_CACHE
- * in swap_map, but not yet added its folio to swap cache.
- */
- schedule_timeout_uninterruptible(1);
+ goto failed;
}
- __folio_set_locked(folio);
- __folio_set_swapbacked(folio);
-
if (!charged && mem_cgroup_swapin_charge_folio(folio, NULL, gfp, entry)) {
- put_swap_folio(folio, entry);
- folio_unlock(folio);
- return NULL;
+ swap_cache_del_folio(folio);
+ goto failed;
}
- swap_cache_add_folio(folio, entry, &shadow);
memcg1_swapin(entry, folio_nr_pages(folio));
if (shadow)
workingset_refault(folio, shadow);
/* Caller will initiate read into locked folio */
folio_add_lru(folio);
return folio;
+
+failed:
+ folio_unlock(folio);
+ return swapcache;
}
/**
if (!entry.val)
return -ENOMEM;
- swap_cache_add_folio(folio, entry, NULL);
+ /*
+ * Allocator has pinned the slots with SWAP_HAS_CACHE
+ * so it should never fail
+ */
+ WARN_ON_ONCE(swap_cache_add_folio(folio, entry, NULL, true));
return 0;
* do_swap_page()
* ... swapoff+swapon
* swap_cache_alloc_folio()
- * swapcache_prepare()
- * __swap_duplicate()
- * // check swap_map
+ * swap_cache_add_folio()
+ * // check swap_map
* // verify PTE not changed
*
* In __swap_duplicate(), the swap_map need to be checked before
return err;
}
-/*
- * @entry: first swap entry from which we allocate nr swap cache.
- *
- * Called when allocating swap cache for existing swap entries,
- * This can return error codes. Returns 0 at success.
- * -EEXIST means there is a swap cache.
- * Note: return code is different from swap_duplicate().
- */
-int swapcache_prepare(swp_entry_t entry, int nr)
+/* Mark the swap map as HAS_CACHE, caller need to hold the cluster lock */
+void __swapcache_set_cached(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ swp_entry_t entry)
+{
+ WARN_ON(swap_dup_entries(si, ci, swp_offset(entry), SWAP_HAS_CACHE, 1));
+}
+
+/* Clear the swap map as !HAS_CACHE, caller need to hold the cluster lock */
+void __swapcache_clear_cached(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ swp_entry_t entry, unsigned int nr)
{
- return __swap_duplicate(entry, SWAP_HAS_CACHE, nr);
+ if (swap_only_has_cache(si, swp_offset(entry), nr)) {
+ swap_entries_free(si, ci, entry, nr);
+ } else {
+ for (int i = 0; i < nr; i++, entry.val++)
+ swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
+ }
}
/*