struct page;
struct mm_struct;
struct kmem_cache;
+struct swap_cluster_info;
/* Cgroup-specific page state, on top of universal node page state */
enum memcg_stat_item {
current->in_user_fault = 0;
}
-void __memcg1_swapout(struct folio *folio);
-void memcg1_swapin(struct folio *folio);
-
#else /* CONFIG_MEMCG_V1 */
static inline
unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order,
{
}
-static inline void __memcg1_swapout(struct folio *folio)
+#endif /* CONFIG_MEMCG_V1 */
+
+#if defined(CONFIG_MEMCG_V1) && defined(CONFIG_SWAP)
+
+void __memcg1_swapout(struct folio *folio, struct swap_cluster_info *ci);
+void memcg1_swapin(struct folio *folio);
+
+#else
+
+static inline void __memcg1_swapout(struct folio *folio,
+ struct swap_cluster_info *ci)
{
}
static inline void memcg1_swapin(struct folio *folio)
{
}
-
-#endif /* CONFIG_MEMCG_V1 */
+#endif
#endif /* _LINUX_MEMCONTROL_H */
return __mem_cgroup_try_charge_swap(folio);
}
-extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
-static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
+extern void __mem_cgroup_uncharge_swap(unsigned short id, unsigned int nr_pages);
+static inline void mem_cgroup_uncharge_swap(unsigned short id, unsigned int nr_pages)
{
if (mem_cgroup_disabled())
return;
- __mem_cgroup_uncharge_swap(entry, nr_pages);
+ __mem_cgroup_uncharge_swap(id, nr_pages);
}
extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
return 0;
}
-static inline void mem_cgroup_uncharge_swap(swp_entry_t entry,
+static inline void mem_cgroup_uncharge_swap(unsigned short id,
unsigned int nr_pages)
{
}
#include "internal.h"
#include "swap.h"
+#include "swap_table.h"
#include "memcontrol-v1.h"
/*
local_irq_restore(flags);
}
+#ifdef CONFIG_SWAP
/**
* __memcg1_swapout - transfer a memsw charge to swap
* @folio: folio whose memsw charge to transfer
+ * @ci: the locked swap cluster holding the swap entries
*
* Transfer the memsw charge of @folio to the swap entry stored in
* folio->swap.
*
- * Context: folio must be isolated, unmapped, locked and is just about
- * to be freed, and caller must disable IRQs.
+ * Context: folio must be isolated, unmapped, locked and is just about to
+ * be freed, and caller must disable IRQs and hold the swap cluster lock.
*/
-void __memcg1_swapout(struct folio *folio)
+void __memcg1_swapout(struct folio *folio, struct swap_cluster_info *ci)
{
struct mem_cgroup *memcg, *swap_memcg;
struct obj_cgroup *objcg;
swap_memcg = mem_cgroup_private_id_get_online(memcg, nr_entries);
mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
- swap_cgroup_record(folio, mem_cgroup_private_id(swap_memcg), folio->swap);
+ __swap_cgroup_set(ci, swp_cluster_offset(folio->swap), nr_entries,
+ mem_cgroup_private_id(swap_memcg));
folio_unqueue_deferred_split(folio);
folio->memcg_data = 0;
}
/*
- * Interrupts should be disabled here because the caller holds the
- * i_pages lock which is taken with interrupts-off. It is
+ * The caller must hold the swap cluster lock with IRQ off. It is
* important here to have the interrupts disabled because it is the
* only synchronisation we have for updating the per-CPU variables.
*/
}
/**
- * memcg1_swapin - uncharge swap slot
+ * memcg1_swapin - uncharge swap slot on swapin
* @folio: folio being swapped in
*
* Call this function after successfully adding the charged
*/
void memcg1_swapin(struct folio *folio)
{
+ struct swap_cluster_info *ci;
+ unsigned long nr_pages;
+ unsigned short id;
+
VM_WARN_ON_ONCE_FOLIO(!folio_test_swapcache(folio), folio);
VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
* correspond 1:1 to page and swap slot lifetimes: we charge the
* page to memory here, and uncharge swap when the slot is freed.
*/
- if (do_memsw_account()) {
- /*
- * The swap entry might not get freed for a long time,
- * let's not wait for it. The page already received a
- * memory+swap charge, drop the swap entry duplicate.
- */
- mem_cgroup_uncharge_swap(folio->swap, folio_nr_pages(folio));
- }
+ if (!do_memsw_account())
+ return;
+
+ /*
+ * The swap entry might not get freed for a long time,
+ * let's not wait for it. The page already received a
+ * memory+swap charge, drop the swap entry duplicate.
+ */
+ nr_pages = folio_nr_pages(folio);
+ ci = swap_cluster_get_and_lock(folio);
+ id = __swap_cgroup_clear(ci, swp_cluster_offset(folio->swap),
+ nr_pages);
+ swap_cluster_unlock(ci);
+ mem_cgroup_uncharge_swap(id, nr_pages);
}
+#endif
void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
unsigned long nr_memory, int nid)
#include <linux/sched/isolation.h>
#include <linux/kmemleak.h>
#include "internal.h"
+#include "swap_table.h"
#include <net/sock.h>
#include <net/ip.h>
#include "slab.h"
int __mem_cgroup_try_charge_swap(struct folio *folio)
{
unsigned int nr_pages = folio_nr_pages(folio);
+ struct swap_cluster_info *ci;
struct page_counter *counter;
struct mem_cgroup *memcg;
struct obj_cgroup *objcg;
}
mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
- swap_cgroup_record(folio, mem_cgroup_private_id(memcg), folio->swap);
+ ci = swap_cluster_get_and_lock(folio);
+ __swap_cgroup_set(ci, swp_cluster_offset(folio->swap), nr_pages,
+ mem_cgroup_private_id(memcg));
+ swap_cluster_unlock(ci);
return 0;
}
/**
* __mem_cgroup_uncharge_swap - uncharge swap space
- * @entry: swap entry to uncharge
+ * @id: cgroup id to uncharge
* @nr_pages: the amount of swap space to uncharge
*/
-void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
+void __mem_cgroup_uncharge_swap(unsigned short id, unsigned int nr_pages)
{
struct mem_cgroup *memcg;
- unsigned short id;
- id = swap_cgroup_clear(entry, nr_pages);
rcu_read_lock();
memcg = mem_cgroup_from_private_id(id);
if (memcg) {
#include <linux/atomic.h> /* for atomic_long_t */
struct mempolicy;
struct swap_iocb;
+struct swap_memcg_table;
extern int page_cluster;
u8 order;
atomic_long_t __rcu *table; /* Swap table entries, see mm/swap_table.h */
unsigned int *extend_table; /* For large swap count, protected by ci->lock */
+#ifdef CONFIG_MEMCG
+ struct swap_memcg_table *memcg_table; /* Swap table entries' cgroup record */
+#endif
struct list_head list;
};
if (shadowp && swp_tb_is_shadow(old_tb))
*shadowp = swp_tb_to_shadow(old_tb);
if (memcg_id)
- *memcg_id = lookup_swap_cgroup_id(targ_entry);
+ *memcg_id = __swap_cgroup_get(ci, ci_off);
if (nr == 1)
return 0;
- targ_entry.val = round_down(targ_entry.val, nr);
ci_off = round_down(ci_off, nr);
ci_end = ci_off + nr;
do {
old_tb = __swap_table_get(ci, ci_off);
if (unlikely(swp_tb_is_folio(old_tb) ||
!__swp_tb_get_count(old_tb) ||
- (memcg_id && *memcg_id != lookup_swap_cgroup_id(targ_entry))))
+ (memcg_id && *memcg_id != __swap_cgroup_get(ci, ci_off))))
return -EBUSY;
- targ_entry.val++;
} while (++ci_off < ci_end);
return 0;
atomic_long_t entries[SWAPFILE_CLUSTER];
};
+/* For storing memcg private id */
+struct swap_memcg_table {
+ unsigned short id[SWAPFILE_CLUSTER];
+};
+
#define SWP_TABLE_USE_PAGE (sizeof(struct swap_table) == PAGE_SIZE)
/*
return swp_tb;
}
+
+#ifdef CONFIG_MEMCG
+static inline void __swap_cgroup_set(struct swap_cluster_info *ci,
+ unsigned int ci_off, unsigned long nr, unsigned short id)
+{
+ lockdep_assert_held(&ci->lock);
+ VM_WARN_ON_ONCE(ci_off >= SWAPFILE_CLUSTER);
+ if (WARN_ON_ONCE(!ci->memcg_table))
+ return;
+ do {
+ ci->memcg_table->id[ci_off++] = id;
+ } while (--nr);
+}
+
+static inline unsigned short __swap_cgroup_get(struct swap_cluster_info *ci,
+ unsigned int ci_off)
+{
+ lockdep_assert_held(&ci->lock);
+ VM_WARN_ON_ONCE(ci_off >= SWAPFILE_CLUSTER);
+ if (unlikely(!ci->memcg_table))
+ return 0;
+ return ci->memcg_table->id[ci_off];
+}
+
+static inline unsigned short __swap_cgroup_clear(struct swap_cluster_info *ci,
+ unsigned int ci_off,
+ unsigned long nr)
+{
+ unsigned short old = __swap_cgroup_get(ci, ci_off);
+
+ if (!old)
+ return 0;
+ do {
+ VM_WARN_ON_ONCE(ci->memcg_table->id[ci_off] != old);
+ ci->memcg_table->id[ci_off++] = 0;
+ } while (--nr);
+
+ return old;
+}
+#else
+static inline void __swap_cgroup_set(struct swap_cluster_info *ci,
+ unsigned int ci_off, unsigned long nr, unsigned short id)
+{
+}
+
+static inline unsigned short __swap_cgroup_get(struct swap_cluster_info *ci,
+ unsigned int ci_off)
+{
+ return 0;
+}
+
+static inline unsigned short __swap_cgroup_clear(struct swap_cluster_info *ci,
+ unsigned int ci_off,
+ unsigned long nr)
+{
+ return 0;
+}
+#endif
+
#endif
{
struct swap_table *table;
- table = (struct swap_table *)rcu_dereference_protected(ci->table, true);
+#ifdef CONFIG_MEMCG
+ kfree(ci->memcg_table);
+ ci->memcg_table = NULL;
+#endif
+
+ table = (struct swap_table *)rcu_access_pointer(ci->table);
if (!table)
return;
{
struct swap_table *table = NULL;
struct folio *folio;
+ int ret = 0;
/* The cluster must be empty and not on any list during allocation. */
VM_WARN_ON_ONCE(ci->flags || !cluster_is_empty(ci));
return -ENOMEM;
rcu_assign_pointer(ci->table, table);
- return 0;
+
+#ifdef CONFIG_MEMCG
+ if (!mem_cgroup_disabled()) {
+ VM_WARN_ON_ONCE(ci->memcg_table);
+ ci->memcg_table = kzalloc_obj(*ci->memcg_table, gfp);
+ if (!ci->memcg_table)
+ ret = -ENOMEM;
+ }
+#endif
+ if (ret)
+ swap_cluster_free_table(ci);
+
+ return ret;
}
/*
bad_slots++;
else
WARN_ON_ONCE(!swp_tb_is_null(swp_tb));
+ WARN_ON_ONCE(__swap_cgroup_get(ci, ci_off));
} while (++ci_off < ci_end);
WARN_ON_ONCE(bad_slots != (swapoff ? ci->count : 0));
unsigned int ci_start, unsigned int nr_pages)
{
unsigned long old_tb;
- unsigned int type = si->type;
unsigned short batch_id = 0, id_cur;
unsigned int ci_off = ci_start, ci_end = ci_start + nr_pages;
unsigned long ci_head = cluster_offset(si, ci);
unsigned int batch_off = ci_off;
- swp_entry_t entry;
VM_WARN_ON(ci->count < nr_pages);
* Uncharge swap slots by memcg in batches. Consecutive
* slots with the same cgroup id are uncharged together.
*/
- entry = swp_entry(type, ci_head + ci_off);
- id_cur = lookup_swap_cgroup_id(entry);
+ id_cur = __swap_cgroup_clear(ci, ci_off, 1);
if (batch_id != id_cur) {
if (batch_id)
- mem_cgroup_uncharge_swap(swp_entry(type, ci_head + batch_off),
- ci_off - batch_off);
+ mem_cgroup_uncharge_swap(batch_id, ci_off - batch_off);
batch_id = id_cur;
batch_off = ci_off;
}
} while (++ci_off < ci_end);
- if (batch_id) {
- mem_cgroup_uncharge_swap(swp_entry(type, ci_head + batch_off),
- ci_off - batch_off);
- }
+ if (batch_id)
+ mem_cgroup_uncharge_swap(batch_id, ci_off - batch_off);
swap_range_free(si, ci_head + ci_start, nr_pages);
swap_cluster_assert_empty(ci, ci_start, nr_pages, false);
if (reclaimed && !mapping_exiting(mapping))
shadow = workingset_eviction(folio, target_memcg);
- __memcg1_swapout(folio);
+ __memcg1_swapout(folio, ci);
__swap_cache_del_folio(ci, folio, swap, shadow);
swap_cluster_unlock_irq(ci);
} else {