static void free_swap_count_continuations(struct swap_info_struct *);
static void swap_entries_free(struct swap_info_struct *si,
struct swap_cluster_info *ci,
- swp_entry_t entry, unsigned int nr_pages);
+ unsigned long start, unsigned int nr_pages);
static void swap_range_alloc(struct swap_info_struct *si,
unsigned int nr_entries);
static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr);
-static bool swap_entries_put_map(struct swap_info_struct *si,
- swp_entry_t entry, int nr);
+static void swap_put_entry_locked(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ unsigned long offset,
+ unsigned char usage);
static bool folio_swapcache_freeable(struct folio *folio);
static void move_cluster(struct swap_info_struct *si,
struct swap_cluster_info *ci, struct list_head *list,
return true;
}
-static bool swap_is_last_map(struct swap_info_struct *si,
- unsigned long offset, int nr_pages, bool *has_cache)
-{
- unsigned char *map = si->swap_map + offset;
- unsigned char *map_end = map + nr_pages;
- unsigned char count = *map;
-
- if (swap_count(count) != 1)
- return false;
-
- while (++map < map_end) {
- if (*map != count)
- return false;
- }
-
- *has_cache = !!(count & SWAP_HAS_CACHE);
- return true;
-}
-
/*
* returns number of pages in the folio that backs the swap entry. If positive,
* the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no
return false;
}
+/**
+ * swap_put_entries_cluster - Decrease the swap count of a set of slots.
+ * @si: The swap device.
+ * @start: start offset of slots.
+ * @nr: number of slots.
+ * @reclaim_cache: if true, also reclaim the swap cache.
+ *
+ * This helper decreases the swap count of a set of slots and tries to
+ * batch free them. Also reclaims the swap cache if @reclaim_cache is true.
+ * Context: The caller must ensure that all slots belong to the same
+ * cluster and their swap count doesn't go underflow.
+ */
+static void swap_put_entries_cluster(struct swap_info_struct *si,
+ unsigned long start, int nr,
+ bool reclaim_cache)
+{
+ unsigned long offset = start, end = start + nr;
+ unsigned long batch_start = SWAP_ENTRY_INVALID;
+ struct swap_cluster_info *ci;
+ bool need_reclaim = false;
+ unsigned int nr_reclaimed;
+ unsigned long swp_tb;
+ unsigned int count;
+
+ ci = swap_cluster_lock(si, offset);
+ do {
+ swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER);
+ count = si->swap_map[offset];
+ VM_WARN_ON(swap_count(count) < 1 || count == SWAP_MAP_BAD);
+ if (swap_count(count) == 1) {
+ /* count == 1 and non-cached slots will be batch freed. */
+ if (!swp_tb_is_folio(swp_tb)) {
+ if (!batch_start)
+ batch_start = offset;
+ continue;
+ }
+ /* count will be 0 after put, slot can be reclaimed */
+ VM_WARN_ON(!(count & SWAP_HAS_CACHE));
+ need_reclaim = true;
+ }
+ /*
+ * A count != 1 or cached slot can't be freed. Put its swap
+ * count and then free the interrupted pending batch. Cached
+ * slots will be freed when folio is removed from swap cache
+ * (__swap_cache_del_folio).
+ */
+ swap_put_entry_locked(si, ci, offset, 1);
+ if (batch_start) {
+ swap_entries_free(si, ci, batch_start, offset - batch_start);
+ batch_start = SWAP_ENTRY_INVALID;
+ }
+ } while (++offset < end);
+
+ if (batch_start)
+ swap_entries_free(si, ci, batch_start, offset - batch_start);
+ swap_cluster_unlock(ci);
+
+ if (!need_reclaim || !reclaim_cache)
+ return;
+
+ offset = start;
+ do {
+ nr_reclaimed = __try_to_reclaim_swap(si, offset,
+ TTRS_UNMAPPED | TTRS_FULL);
+ offset++;
+ if (nr_reclaimed)
+ offset = round_up(offset, abs(nr_reclaimed));
+ } while (offset < end);
+}
+
/**
* folio_alloc_swap - allocate swap space for a folio
* @folio: folio we want to move to swap
{
swp_entry_t entry = folio->swap;
unsigned long nr_pages = folio_nr_pages(folio);
+ struct swap_info_struct *si = __swap_entry_to_info(entry);
VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio);
VM_WARN_ON_FOLIO(!folio_test_swapcache(folio), folio);
nr_pages = 1;
}
- swap_entries_put_map(__swap_entry_to_info(entry), entry, nr_pages);
+ swap_put_entries_cluster(si, swp_offset(entry), nr_pages, false);
}
static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
return NULL;
}
-static unsigned char swap_entry_put_locked(struct swap_info_struct *si,
- struct swap_cluster_info *ci,
- swp_entry_t entry,
- unsigned char usage)
+static void swap_put_entry_locked(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ unsigned long offset,
+ unsigned char usage)
{
- unsigned long offset = swp_offset(entry);
unsigned char count;
unsigned char has_cache;
if (usage)
WRITE_ONCE(si->swap_map[offset], usage);
else
- swap_entries_free(si, ci, entry, 1);
-
- return usage;
+ swap_entries_free(si, ci, offset, 1);
}
/*
return NULL;
}
-static bool swap_entries_put_map(struct swap_info_struct *si,
- swp_entry_t entry, int nr)
-{
- unsigned long offset = swp_offset(entry);
- struct swap_cluster_info *ci;
- bool has_cache = false;
- unsigned char count;
- int i;
-
- if (nr <= 1)
- goto fallback;
- count = swap_count(data_race(si->swap_map[offset]));
- if (count != 1)
- goto fallback;
-
- ci = swap_cluster_lock(si, offset);
- if (!swap_is_last_map(si, offset, nr, &has_cache)) {
- goto locked_fallback;
- }
- if (!has_cache)
- swap_entries_free(si, ci, entry, nr);
- else
- for (i = 0; i < nr; i++)
- WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
- swap_cluster_unlock(ci);
-
- return has_cache;
-
-fallback:
- ci = swap_cluster_lock(si, offset);
-locked_fallback:
- for (i = 0; i < nr; i++, entry.val++) {
- count = swap_entry_put_locked(si, ci, entry, 1);
- if (count == SWAP_HAS_CACHE)
- has_cache = true;
- }
- swap_cluster_unlock(ci);
- return has_cache;
-}
-
-/*
- * Only functions with "_nr" suffix are able to free entries spanning
- * cross multi clusters, so ensure the range is within a single cluster
- * when freeing entries with functions without "_nr" suffix.
- */
-static bool swap_entries_put_map_nr(struct swap_info_struct *si,
- swp_entry_t entry, int nr)
-{
- int cluster_nr, cluster_rest;
- unsigned long offset = swp_offset(entry);
- bool has_cache = false;
-
- cluster_rest = SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER;
- while (nr) {
- cluster_nr = min(nr, cluster_rest);
- has_cache |= swap_entries_put_map(si, entry, cluster_nr);
- cluster_rest = SWAPFILE_CLUSTER;
- nr -= cluster_nr;
- entry.val += cluster_nr;
- }
-
- return has_cache;
-}
-
/*
* Check if it's the last ref of swap entry in the freeing path.
*/
*/
static void swap_entries_free(struct swap_info_struct *si,
struct swap_cluster_info *ci,
- swp_entry_t entry, unsigned int nr_pages)
+ unsigned long offset, unsigned int nr_pages)
{
- unsigned long offset = swp_offset(entry);
+ swp_entry_t entry = swp_entry(si->type, offset);
unsigned char *map = si->swap_map + offset;
unsigned char *map_end = map + nr_pages;
{
const unsigned long start_offset = swp_offset(entry);
const unsigned long end_offset = start_offset + nr;
+ unsigned long offset, cluster_end;
struct swap_info_struct *si;
- bool any_only_cache = false;
- unsigned long offset;
- unsigned long swp_tb;
si = get_swap_device(entry);
if (WARN_ON_ONCE(!si))
if (WARN_ON_ONCE(end_offset > si->max))
goto out;
- /*
- * First free all entries in the range.
- */
- any_only_cache = swap_entries_put_map_nr(si, entry, nr);
-
- /*
- * Short-circuit the below loop if none of the entries had their
- * reference drop to zero.
- */
- if (!any_only_cache)
- goto out;
-
- /*
- * Now go back over the range trying to reclaim the swap cache.
- */
- for (offset = start_offset; offset < end_offset; offset += nr) {
- nr = 1;
- swp_tb = swap_table_get(__swap_offset_to_cluster(si, offset),
- offset % SWAPFILE_CLUSTER);
- if (!swap_count(READ_ONCE(si->swap_map[offset])) && swp_tb_is_folio(swp_tb)) {
- /*
- * Folios are always naturally aligned in swap so
- * advance forward to the next boundary. Zero means no
- * folio was found for the swap entry, so advance by 1
- * in this case. Negative value means folio was found
- * but could not be reclaimed. Here we can still advance
- * to the next boundary.
- */
- nr = __try_to_reclaim_swap(si, offset,
- TTRS_UNMAPPED | TTRS_FULL);
- if (nr == 0)
- nr = 1;
- else if (nr < 0)
- nr = -nr;
- nr = ALIGN(offset + 1, nr) - offset;
- }
- }
-
+ /* Put entries and reclaim cache in each cluster */
+ offset = start_offset;
+ do {
+ cluster_end = min(round_up(offset + 1, SWAPFILE_CLUSTER), end_offset);
+ swap_put_entries_cluster(si, offset, cluster_end - offset, true);
+ offset = cluster_end;
+ } while (offset < end_offset);
out:
put_swap_device(si);
}
return;
ci = swap_cluster_lock(si, offset);
- swap_entry_put_locked(si, ci, entry, 1);
+ swap_put_entry_locked(si, ci, offset, 1);
swap_cluster_unlock(ci);
/* In theory readahead might add it to the swap cache by accident */
swp_entry_t entry, unsigned int nr)
{
if (swap_only_has_cache(si, swp_offset(entry), nr)) {
- swap_entries_free(si, ci, entry, nr);
+ swap_entries_free(si, ci, swp_offset(entry), nr);
} else {
for (int i = 0; i < nr; i++, entry.val++)
- swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE);
+ swap_put_entry_locked(si, ci, swp_offset(entry), SWAP_HAS_CACHE);
}
}
* into, carry if so, or else fail until a new continuation page is allocated;
* when the original swap_map count is decremented from 0 with continuation,
* borrow from the continuation and report whether it still holds more.
- * Called while __swap_duplicate() or caller of swap_entry_put_locked()
+ * Called while __swap_duplicate() or caller of swap_put_entry_locked()
* holds cluster lock.
*/
static bool swap_count_continued(struct swap_info_struct *si,