#include <linux/swap_cgroup.h>
#include "swap_table.h"
#include "internal.h"
+#include "swap_table.h"
#include "swap.h"
static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);
-static void swap_entries_free(struct swap_info_struct *si,
- struct swap_cluster_info *ci,
- unsigned long start, unsigned int nr_pages);
static void swap_range_alloc(struct swap_info_struct *si,
unsigned int nr_entries);
static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr);
static void swap_put_entry_locked(struct swap_info_struct *si,
struct swap_cluster_info *ci,
- unsigned long offset,
- unsigned char usage);
+ unsigned long offset);
static bool folio_swapcache_freeable(struct folio *folio);
static void move_cluster(struct swap_info_struct *si,
struct swap_cluster_info *ci, struct list_head *list,
return swap_type_to_info(swp_type(entry));
}
-static inline unsigned char swap_count(unsigned char ent)
-{
- return ent & ~SWAP_HAS_CACHE; /* may include COUNT_CONTINUED flag */
-}
-
/*
* Use the second highest bit of inuse_pages counter as the indicator
* if one swap device is on the available plist, so the atomic can
#define TTRS_FULL 0x4
static bool swap_only_has_cache(struct swap_info_struct *si,
- unsigned long offset, int nr_pages)
+ struct swap_cluster_info *ci,
+ unsigned long offset, int nr_pages)
{
+ unsigned int ci_off = offset % SWAPFILE_CLUSTER;
unsigned char *map = si->swap_map + offset;
unsigned char *map_end = map + nr_pages;
+ unsigned long swp_tb;
do {
- VM_BUG_ON(!(*map & SWAP_HAS_CACHE));
- if (*map != SWAP_HAS_CACHE)
+ swp_tb = __swap_table_get(ci, ci_off);
+ VM_WARN_ON_ONCE(!swp_tb_is_folio(swp_tb));
+ if (*map)
return false;
+ ++ci_off;
} while (++map < map_end);
return true;
goto out_unlock;
/*
- * It's safe to delete the folio from swap cache only if the folio's
- * swap_map is HAS_CACHE only, which means the slots have no page table
+ * It's safe to delete the folio from swap cache only if the folio
+ * is in swap cache with swap count == 0. The slots have no page table
* reference or pending writeback, and can't be allocated to others.
*/
ci = swap_cluster_lock(si, offset);
- need_reclaim = swap_only_has_cache(si, offset, nr_pages);
+ need_reclaim = swap_only_has_cache(si, ci, offset, nr_pages);
swap_cluster_unlock(ci);
if (!need_reclaim)
goto out_unlock;
spin_unlock(&ci->lock);
do {
- if (swap_count(READ_ONCE(map[offset])))
+ if (READ_ONCE(map[offset]))
break;
swp_tb = swap_table_get(ci, offset % SWAPFILE_CLUSTER);
if (swp_tb_is_folio(swp_tb)) {
*/
for (offset = start; offset < end; offset++) {
swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
- if (swap_count(map[offset]) || !swp_tb_is_null(swp_tb))
+ if (map[offset] || !swp_tb_is_null(swp_tb))
return false;
}
return true;
do {
- if (swap_count(map[offset]))
+ if (map[offset])
return false;
swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
if (swp_tb_is_folio(swp_tb)) {
- WARN_ON_ONCE(!(map[offset] & SWAP_HAS_CACHE));
if (!vm_swap_full())
return false;
*need_reclaim = true;
if (likely(folio)) {
order = folio_order(folio);
nr_pages = 1 << order;
- /*
- * Pin the slot with SWAP_HAS_CACHE to satisfy swap_dup_entries.
- * This is the legacy allocation behavior, will drop it very soon.
- */
- memset(si->swap_map + offset, SWAP_HAS_CACHE, nr_pages);
__swap_cache_add_folio(ci, folio, swp_entry(si->type, offset));
} else if (IS_ENABLED(CONFIG_HIBERNATION)) {
order = 0;
to_scan--;
while (offset < end) {
- if (!swap_count(READ_ONCE(map[offset])) &&
- swp_tb_is_folio(__swap_table_get(ci, offset % SWAPFILE_CLUSTER))) {
+ if (!READ_ONCE(map[offset]) &&
+ swp_tb_is_folio(swap_table_get(ci, offset % SWAPFILE_CLUSTER))) {
spin_unlock(&ci->lock);
nr_reclaim = __try_to_reclaim_swap(si, offset,
TTRS_ANYWAY);
* Scan only one fragment cluster is good enough. Order 0
* allocation will surely success, and large allocation
* failure is not critical. Scanning one cluster still
- * keeps the list rotated and reclaimed (for HAS_CACHE).
+ * keeps the list rotated and reclaimed (for clean swap cache).
*/
found = alloc_swap_scan_list(si, &si->frag_clusters[order], folio, false);
if (found)
do {
swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
count = si->swap_map[offset];
- VM_WARN_ON(swap_count(count) < 1 || count == SWAP_MAP_BAD);
- if (swap_count(count) == 1) {
+ VM_WARN_ON(count < 1 || count == SWAP_MAP_BAD);
+ if (count == 1) {
/* count == 1 and non-cached slots will be batch freed. */
if (!swp_tb_is_folio(swp_tb)) {
if (!batch_start)
continue;
}
/* count will be 0 after put, slot can be reclaimed */
- VM_WARN_ON(!(count & SWAP_HAS_CACHE));
need_reclaim = true;
}
/*
* slots will be freed when folio is removed from swap cache
* (__swap_cache_del_folio).
*/
- swap_put_entry_locked(si, ci, offset, 1);
+ swap_put_entry_locked(si, ci, offset);
if (batch_start) {
swap_entries_free(si, ci, batch_start, offset - batch_start);
batch_start = SWAP_ENTRY_INVALID;
offset = swp_offset(entry);
if (offset >= si->max)
goto bad_offset;
- if (data_race(!si->swap_map[swp_offset(entry)]))
+ if (data_race(!si->swap_map[swp_offset(entry)]) &&
+ !swap_cache_has_folio(entry))
goto bad_free;
return si;
static void swap_put_entry_locked(struct swap_info_struct *si,
struct swap_cluster_info *ci,
- unsigned long offset,
- unsigned char usage)
+ unsigned long offset)
{
unsigned char count;
- unsigned char has_cache;
count = si->swap_map[offset];
-
- has_cache = count & SWAP_HAS_CACHE;
- count &= ~SWAP_HAS_CACHE;
-
- if (usage == SWAP_HAS_CACHE) {
- VM_BUG_ON(!has_cache);
- has_cache = 0;
- } else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
+ if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
if (count == COUNT_CONTINUED) {
if (swap_count_continued(si, offset, count))
count = SWAP_MAP_MAX | COUNT_CONTINUED;
count--;
}
- usage = count | has_cache;
- if (usage)
- WRITE_ONCE(si->swap_map[offset], usage);
- else
+ WRITE_ONCE(si->swap_map[offset], count);
+ if (!count && !swp_tb_is_folio(__swap_table_get(ci, offset % SWAPFILE_CLUSTER)))
swap_entries_free(si, ci, offset, 1);
}
return NULL;
}
-/*
- * Check if it's the last ref of swap entry in the freeing path.
- */
-static inline bool __maybe_unused swap_is_last_ref(unsigned char count)
-{
- return (count == SWAP_HAS_CACHE) || (count == 1);
-}
-
/*
* Drop the last ref of swap entries, caller have to ensure all entries
* belong to the same cgroup and cluster.
*/
-static void swap_entries_free(struct swap_info_struct *si,
- struct swap_cluster_info *ci,
- unsigned long offset, unsigned int nr_pages)
+void swap_entries_free(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ unsigned long offset, unsigned int nr_pages)
{
swp_entry_t entry = swp_entry(si->type, offset);
unsigned char *map = si->swap_map + offset;
ci->count -= nr_pages;
do {
- VM_BUG_ON(!swap_is_last_ref(*map));
+ VM_WARN_ON(*map > 1);
*map = 0;
} while (++map < map_end);
struct swap_info_struct *si = __swap_entry_to_info(entry);
pgoff_t offset = swp_offset(entry);
- return swap_count(si->swap_map[offset]);
+ return si->swap_map[offset];
}
/**
int count;
ci = swap_cluster_lock(si, offset);
- count = swap_count(si->swap_map[offset]);
+ count = si->swap_map[offset];
swap_cluster_unlock(ci);
return count && count != SWAP_MAP_BAD;
ci = swap_cluster_lock(si, offset);
- count = swap_count(si->swap_map[offset]);
+ count = si->swap_map[offset];
if (!(count & COUNT_CONTINUED))
goto out;
ci = swap_cluster_lock(si, offset);
if (nr_pages == 1) {
- if (swap_count(map[roffset]))
+ if (map[roffset])
ret = true;
goto unlock_out;
}
for (i = 0; i < nr_pages; i++) {
- if (swap_count(map[offset + i])) {
+ if (map[offset + i]) {
ret = true;
break;
}
return;
ci = swap_cluster_lock(si, offset);
- swap_put_entry_locked(si, ci, offset, 1);
+ swap_put_entry_locked(si, ci, offset);
swap_cluster_unlock(ci);
/* In theory readahead might add it to the swap cache by accident */
unsigned int prev)
{
unsigned int i;
+ unsigned long swp_tb;
unsigned char count;
/*
*/
for (i = prev + 1; i < si->max; i++) {
count = READ_ONCE(si->swap_map[i]);
- if (count && swap_count(count) != SWAP_MAP_BAD)
+ swp_tb = swap_table_get(__swap_offset_to_cluster(si, i),
+ i % SWAPFILE_CLUSTER);
+ if (count == SWAP_MAP_BAD)
+ continue;
+ if (count || swp_tb_is_folio(swp_tb))
break;
if ((i % LATENCY_LIMIT) == 0)
cond_resched();
* Returns error code in following case.
* - success -> 0
* - swp_entry is invalid -> EINVAL
- * - swap-cache reference is requested but there is already one. -> EEXIST
- * - swap-cache reference is requested but the entry is not used. -> ENOENT
+ * - swap-mapped reference is requested but the entry is not used. -> ENOENT
* - swap-mapped reference requested but needs continued swap count. -> ENOMEM
*/
static int swap_dup_entries(struct swap_info_struct *si,
unsigned char usage, int nr)
{
int i;
- unsigned char count, has_cache;
+ unsigned char count;
for (i = 0; i < nr; i++) {
count = si->swap_map[offset + i];
-
/*
* For swapin out, allocator never allocates bad slots. for
* swapin, readahead is guarded by swap_entry_swapped.
*/
- if (WARN_ON(swap_count(count) == SWAP_MAP_BAD))
+ if (WARN_ON(count == SWAP_MAP_BAD))
return -ENOENT;
-
- has_cache = count & SWAP_HAS_CACHE;
- count &= ~SWAP_HAS_CACHE;
-
- if (!count && !has_cache) {
+ /*
+ * Swap count duplication must be guarded by either swap cache folio (from
+ * folio_dup_swap) or external lock of existing entry (from swap_dup_entry_direct).
+ */
+ if (WARN_ON(!count &&
+ !swp_tb_is_folio(__swap_table_get(ci, offset % SWAPFILE_CLUSTER))))
return -ENOENT;
- } else if (usage == SWAP_HAS_CACHE) {
- if (has_cache)
- return -EEXIST;
- } else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX) {
+ if (WARN_ON((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX))
return -EINVAL;
- }
}
for (i = 0; i < nr; i++) {
count = si->swap_map[offset + i];
- has_cache = count & SWAP_HAS_CACHE;
- count &= ~SWAP_HAS_CACHE;
-
- if (usage == SWAP_HAS_CACHE)
- has_cache = SWAP_HAS_CACHE;
- else if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
+ if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
count += usage;
else if (swap_count_continued(si, offset + i, count))
count = COUNT_CONTINUED;
return -ENOMEM;
}
- WRITE_ONCE(si->swap_map[offset + i], count | has_cache);
+ WRITE_ONCE(si->swap_map[offset + i], count);
}
return 0;
return err;
}
-/* Mark the swap map as HAS_CACHE, caller need to hold the cluster lock */
-void __swapcache_set_cached(struct swap_info_struct *si,
- struct swap_cluster_info *ci,
- swp_entry_t entry)
-{
- WARN_ON(swap_dup_entries(si, ci, swp_offset(entry), SWAP_HAS_CACHE, 1));
-}
-
-/* Clear the swap map as !HAS_CACHE, caller need to hold the cluster lock */
-void __swapcache_clear_cached(struct swap_info_struct *si,
- struct swap_cluster_info *ci,
- swp_entry_t entry, unsigned int nr)
-{
- if (swap_only_has_cache(si, swp_offset(entry), nr)) {
- swap_entries_free(si, ci, swp_offset(entry), nr);
- } else {
- for (int i = 0; i < nr; i++, entry.val++)
- swap_put_entry_locked(si, ci, swp_offset(entry), SWAP_HAS_CACHE);
- }
-}
-
/*
* add_swap_count_continuation - called when a swap count is duplicated
* beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
ci = swap_cluster_lock(si, offset);
- count = swap_count(si->swap_map[offset]);
+ count = si->swap_map[offset];
if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
/*