return NULL;
}
+void __swap_cache_add_folio(struct swap_cluster_info *ci,
+ struct folio *folio, swp_entry_t entry)
+{
+ unsigned long new_tb;
+ unsigned int ci_start, ci_off, ci_end;
+ unsigned long nr_pages = folio_nr_pages(folio);
+
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_swapbacked(folio), folio);
+
+ new_tb = folio_to_swp_tb(folio);
+ ci_start = swp_cluster_offset(entry);
+ ci_off = ci_start;
+ ci_end = ci_start + nr_pages;
+ do {
+ VM_WARN_ON_ONCE(swp_tb_is_folio(__swap_table_get(ci, ci_off)));
+ __swap_table_set(ci, ci_off, new_tb);
+ } while (++ci_off < ci_end);
+
+ folio_ref_add(folio, nr_pages);
+ folio_set_swapcache(folio);
+ folio->swap = entry;
+
+ node_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages);
+ lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr_pages);
+}
+
/**
* swap_cache_add_folio - Add a folio into the swap cache.
* @folio: The folio to be added.
* @entry: The swap entry corresponding to the folio.
* @gfp: gfp_mask for XArray node allocation.
* @shadowp: If a shadow is found, return the shadow.
- * @alloc: If it's the allocator that is trying to insert a folio. Allocator
- * sets SWAP_HAS_CACHE to pin slots before insert so skip map update.
*
* Context: Caller must ensure @entry is valid and protect the swap device
* with reference count or locks.
*/
-int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
- void **shadowp, bool alloc)
+static int swap_cache_add_folio(struct folio *folio, swp_entry_t entry,
+ void **shadowp)
{
int err;
void *shadow = NULL;
+ unsigned long old_tb;
struct swap_info_struct *si;
- unsigned long old_tb, new_tb;
struct swap_cluster_info *ci;
unsigned int ci_start, ci_off, ci_end, offset;
unsigned long nr_pages = folio_nr_pages(folio);
- VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
- VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
- VM_WARN_ON_ONCE_FOLIO(!folio_test_swapbacked(folio), folio);
-
si = __swap_entry_to_info(entry);
- new_tb = folio_to_swp_tb(folio);
ci_start = swp_cluster_offset(entry);
ci_end = ci_start + nr_pages;
ci_off = ci_start;
err = -EEXIST;
goto failed;
}
- if (!alloc && unlikely(!__swap_count(swp_entry(swp_type(entry), offset)))) {
+ if (unlikely(!__swap_count(swp_entry(swp_type(entry), offset)))) {
err = -ENOENT;
goto failed;
}
* Still need to pin the slots with SWAP_HAS_CACHE since
* swap allocator depends on that.
*/
- if (!alloc)
- __swapcache_set_cached(si, ci, swp_entry(swp_type(entry), offset));
- __swap_table_set(ci, ci_off, new_tb);
+ __swapcache_set_cached(si, ci, swp_entry(swp_type(entry), offset));
offset++;
} while (++ci_off < ci_end);
-
- folio_ref_add(folio, nr_pages);
- folio_set_swapcache(folio);
- folio->swap = entry;
+ __swap_cache_add_folio(ci, folio, entry);
swap_cluster_unlock(ci);
-
- node_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages);
- lruvec_stat_mod_folio(folio, NR_SWAPCACHE, nr_pages);
-
if (shadowp)
*shadowp = shadow;
return 0;
__folio_set_locked(folio);
__folio_set_swapbacked(folio);
for (;;) {
- ret = swap_cache_add_folio(folio, entry, &shadow, false);
+ ret = swap_cache_add_folio(folio, entry, &shadow);
if (!ret)
break;
}
}
-static bool cluster_alloc_range(struct swap_info_struct *si, struct swap_cluster_info *ci,
- unsigned int start, unsigned char usage,
- unsigned int order)
+static bool cluster_alloc_range(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ struct folio *folio,
+ unsigned int offset)
{
- unsigned int nr_pages = 1 << order;
+ unsigned long nr_pages;
+ unsigned int order;
lockdep_assert_held(&ci->lock);
if (!(si->flags & SWP_WRITEOK))
return false;
+ /*
+ * All mm swap allocation starts with a folio (folio_alloc_swap),
+ * it's also the only allocation path for large orders allocation.
+ * Such swap slots starts with count == 0 and will be increased
+ * upon folio unmap.
+ *
+ * Else, it's a exclusive order 0 allocation for hibernation.
+ * The slot starts with count == 1 and never increases.
+ */
+ if (likely(folio)) {
+ order = folio_order(folio);
+ nr_pages = 1 << order;
+ /*
+ * Pin the slot with SWAP_HAS_CACHE to satisfy swap_dup_entries.
+ * This is the legacy allocation behavior, will drop it very soon.
+ */
+ memset(si->swap_map + offset, SWAP_HAS_CACHE, nr_pages);
+ __swap_cache_add_folio(ci, folio, swp_entry(si->type, offset));
+ } else if (IS_ENABLED(CONFIG_HIBERNATION)) {
+ order = 0;
+ nr_pages = 1;
+ WARN_ON_ONCE(si->swap_map[offset]);
+ si->swap_map[offset] = 1;
+ swap_cluster_assert_table_empty(ci, offset, 1);
+ } else {
+ /* Allocation without folio is only possible with hibernation */
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
/*
* The first allocation in a cluster makes the
* cluster exclusive to this order
*/
if (cluster_is_empty(ci))
ci->order = order;
-
- memset(si->swap_map + start, usage, nr_pages);
- swap_cluster_assert_table_empty(ci, start, nr_pages);
- swap_range_alloc(si, nr_pages);
ci->count += nr_pages;
+ swap_range_alloc(si, nr_pages);
return true;
}
/* Try use a new cluster for current CPU and allocate from it. */
static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si,
struct swap_cluster_info *ci,
- unsigned long offset,
- unsigned int order,
- unsigned char usage)
+ struct folio *folio, unsigned long offset)
{
unsigned int next = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID;
unsigned long start = ALIGN_DOWN(offset, SWAPFILE_CLUSTER);
unsigned long end = min(start + SWAPFILE_CLUSTER, si->max);
+ unsigned int order = likely(folio) ? folio_order(folio) : 0;
unsigned int nr_pages = 1 << order;
bool need_reclaim, ret, usable;
if (!ret)
continue;
}
- if (!cluster_alloc_range(si, ci, offset, usage, order))
+ if (!cluster_alloc_range(si, ci, folio, offset))
break;
found = offset;
offset += nr_pages;
static unsigned int alloc_swap_scan_list(struct swap_info_struct *si,
struct list_head *list,
- unsigned int order,
- unsigned char usage,
+ struct folio *folio,
bool scan_all)
{
unsigned int found = SWAP_ENTRY_INVALID;
if (!ci)
break;
offset = cluster_offset(si, ci);
- found = alloc_swap_scan_cluster(si, ci, offset, order, usage);
+ found = alloc_swap_scan_cluster(si, ci, folio, offset);
if (found)
break;
} while (scan_all);
* Try to allocate swap entries with specified order and try set a new
* cluster for current CPU too.
*/
-static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int order,
- unsigned char usage)
+static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si,
+ struct folio *folio)
{
struct swap_cluster_info *ci;
+ unsigned int order = likely(folio) ? folio_order(folio) : 0;
unsigned int offset = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID;
/*
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
offset = cluster_offset(si, ci);
- found = alloc_swap_scan_cluster(si, ci, offset,
- order, usage);
+ found = alloc_swap_scan_cluster(si, ci, folio, offset);
} else {
swap_cluster_unlock(ci);
}
* to spread out the writes.
*/
if (si->flags & SWP_PAGE_DISCARD) {
- found = alloc_swap_scan_list(si, &si->free_clusters, order, usage,
- false);
+ found = alloc_swap_scan_list(si, &si->free_clusters, folio, false);
if (found)
goto done;
}
if (order < PMD_ORDER) {
- found = alloc_swap_scan_list(si, &si->nonfull_clusters[order],
- order, usage, true);
+ found = alloc_swap_scan_list(si, &si->nonfull_clusters[order], folio, true);
if (found)
goto done;
}
if (!(si->flags & SWP_PAGE_DISCARD)) {
- found = alloc_swap_scan_list(si, &si->free_clusters, order, usage,
- false);
+ found = alloc_swap_scan_list(si, &si->free_clusters, folio, false);
if (found)
goto done;
}
* failure is not critical. Scanning one cluster still
* keeps the list rotated and reclaimed (for HAS_CACHE).
*/
- found = alloc_swap_scan_list(si, &si->frag_clusters[order], order,
- usage, false);
+ found = alloc_swap_scan_list(si, &si->frag_clusters[order], folio, false);
if (found)
goto done;
}
* Clusters here have at least one usable slots and can't fail order 0
* allocation, but reclaim may drop si->lock and race with another user.
*/
- found = alloc_swap_scan_list(si, &si->frag_clusters[o],
- 0, usage, true);
+ found = alloc_swap_scan_list(si, &si->frag_clusters[o], folio, true);
if (found)
goto done;
- found = alloc_swap_scan_list(si, &si->nonfull_clusters[o],
- 0, usage, true);
+ found = alloc_swap_scan_list(si, &si->nonfull_clusters[o], folio, true);
if (found)
goto done;
}
* Fast path try to get swap entries with specified order from current
* CPU's swap entry pool (a cluster).
*/
-static bool swap_alloc_fast(swp_entry_t *entry,
- int order)
+static bool swap_alloc_fast(struct folio *folio)
{
+ unsigned int order = folio_order(folio);
struct swap_cluster_info *ci;
struct swap_info_struct *si;
- unsigned int offset, found = SWAP_ENTRY_INVALID;
+ unsigned int offset;
/*
* Once allocated, swap_info_struct will never be completely freed,
if (cluster_is_usable(ci, order)) {
if (cluster_is_empty(ci))
offset = cluster_offset(si, ci);
- found = alloc_swap_scan_cluster(si, ci, offset, order, SWAP_HAS_CACHE);
- if (found)
- *entry = swp_entry(si->type, found);
+ alloc_swap_scan_cluster(si, ci, folio, offset);
} else {
swap_cluster_unlock(ci);
}
put_swap_device(si);
- return !!found;
+ return folio_test_swapcache(folio);
}
/* Rotate the device and switch to a new cluster */
-static void swap_alloc_slow(swp_entry_t *entry,
- int order)
+static void swap_alloc_slow(struct folio *folio)
{
- unsigned long offset;
struct swap_info_struct *si, *next;
spin_lock(&swap_avail_lock);
plist_requeue(&si->avail_list, &swap_avail_head);
spin_unlock(&swap_avail_lock);
if (get_swap_device_info(si)) {
- offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE);
+ cluster_alloc_swap_entry(si, folio);
put_swap_device(si);
- if (offset) {
- *entry = swp_entry(si->type, offset);
+ if (folio_test_swapcache(folio))
return;
- }
- if (order)
+ if (folio_test_large(folio))
return;
}
{
unsigned int order = folio_order(folio);
unsigned int size = 1 << order;
- swp_entry_t entry = {};
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
again:
local_lock(&percpu_swap_cluster.lock);
- if (!swap_alloc_fast(&entry, order))
- swap_alloc_slow(&entry, order);
+ if (!swap_alloc_fast(folio))
+ swap_alloc_slow(folio);
local_unlock(&percpu_swap_cluster.lock);
- if (unlikely(!order && !entry.val)) {
+ if (!order && unlikely(!folio_test_swapcache(folio))) {
if (swap_sync_discard())
goto again;
}
/* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */
- if (mem_cgroup_try_charge_swap(folio, entry))
- goto out_free;
+ if (unlikely(mem_cgroup_try_charge_swap(folio, folio->swap)))
+ swap_cache_del_folio(folio);
- if (!entry.val)
+ if (unlikely(!folio_test_swapcache(folio)))
return -ENOMEM;
- /*
- * Allocator has pinned the slots with SWAP_HAS_CACHE
- * so it should never fail
- */
- WARN_ON_ONCE(swap_cache_add_folio(folio, entry, NULL, true));
-
- /*
- * Allocator should always allocate aligned entries so folio based
- * operations never crossed more than one cluster.
- */
- VM_WARN_ON_ONCE_FOLIO(!IS_ALIGNED(folio->swap.val, size), folio);
-
return 0;
-
-out_free:
- put_swap_folio(folio, entry);
- return -ENOMEM;
}
/**
partial_free_cluster(si, ci);
}
-/*
- * Called after dropping swapcache to decrease refcnt to swap entries.
- */
-void put_swap_folio(struct folio *folio, swp_entry_t entry)
-{
- struct swap_info_struct *si;
- struct swap_cluster_info *ci;
- unsigned long offset = swp_offset(entry);
- int size = 1 << swap_entry_order(folio_order(folio));
-
- si = _swap_info_get(entry);
- if (!si)
- return;
-
- ci = swap_cluster_lock(si, offset);
- if (swap_only_has_cache(si, offset, size))
- swap_entries_free(si, ci, entry, size);
- else
- for (int i = 0; i < size; i++, entry.val++)
- swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
- swap_cluster_unlock(ci);
-}
-
int __swap_count(swp_entry_t entry)
{
struct swap_info_struct *si = __swap_entry_to_info(entry);
* with swap table allocation.
*/
local_lock(&percpu_swap_cluster.lock);
- offset = cluster_alloc_swap_entry(si, 0, 1);
+ offset = cluster_alloc_swap_entry(si, NULL);
local_unlock(&percpu_swap_cluster.lock);
if (offset)
entry = swp_entry(si->type, offset);