lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
}
+bool recheck_lru_gen_max_memcg(struct mem_cgroup *memcg, int nid)
+{
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+ int type;
+
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
+ return false;
+ }
+
+ return true;
+}
+
+static void try_to_inc_max_seq_nowalk(struct mem_cgroup *memcg,
+ struct lruvec *lruvec)
+{
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
+ int swappiness = mem_cgroup_swappiness(memcg);
+ DEFINE_MAX_SEQ(lruvec);
+ bool success = false;
+
+ /*
+ * We are not iterating the mm_list here, updating mm_state->seq is just
+ * to make mm walkers work properly.
+ */
+ if (mm_state) {
+ spin_lock(&mm_list->lock);
+ VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
+ if (max_seq > mm_state->seq) {
+ WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
+ success = true;
+ }
+ spin_unlock(&mm_list->lock);
+ } else {
+ success = true;
+ }
+
+ if (success)
+ inc_max_seq(lruvec, max_seq, swappiness);
+}
+
+/*
+ * We need to ensure that the folios of child memcg can be reparented to the
+ * same gen of the parent memcg, so the gens of the parent memcg needed be
+ * incremented to the MAX_NR_GENS before reparenting.
+ */
+void max_lru_gen_memcg(struct mem_cgroup *memcg, int nid)
+{
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+ int type;
+
+ for (type = 0; type < ANON_AND_FILE; type++) {
+ while (get_nr_gens(lruvec, type) < MAX_NR_GENS) {
+ try_to_inc_max_seq_nowalk(memcg, lruvec);
+ cond_resched();
+ }
+ }
+}
+
+/*
+ * Compared to traditional LRU, MGLRU faces the following challenges:
+ *
+ * 1. Each lruvec has between MIN_NR_GENS and MAX_NR_GENS generations, the
+ * number of generations of the parent and child memcg may be different,
+ * so we cannot simply transfer MGLRU folios in the child memcg to the
+ * parent memcg as we did for traditional LRU folios.
+ * 2. The generation information is stored in folio->flags, but we cannot
+ * traverse these folios while holding the lru lock, otherwise it may
+ * cause softlockup.
+ * 3. In walk_update_folio(), the gen of folio and corresponding lru size
+ * may be updated, but the folio is not immediately moved to the
+ * corresponding lru list. Therefore, there may be folios of different
+ * generations on an LRU list.
+ * 4. In lru_gen_del_folio(), the generation to which the folio belongs is
+ * found based on the generation information in folio->flags, and the
+ * corresponding LRU size will be updated. Therefore, we need to update
+ * the lru size correctly during reparenting, otherwise the lru size may
+ * be updated incorrectly in lru_gen_del_folio().
+ *
+ * Finally, we choose a compromise method, which is to splice the lru list in
+ * the child memcg to the lru list of the same generation in the parent memcg
+ * during reparenting.
+ *
+ * The same generation has different meanings in the parent and child memcg,
+ * so this compromise method will cause the LRU inversion problem. But as the
+ * system runs, this problem will be fixed automatically.
+ */
+static void __lru_gen_reparent_memcg(struct lruvec *child_lruvec, struct lruvec *parent_lruvec,
+ int zone, int type)
+{
+ struct lru_gen_folio *child_lrugen, *parent_lrugen;
+ enum lru_list lru = type * LRU_INACTIVE_FILE;
+ int i;
+
+ child_lrugen = &child_lruvec->lrugen;
+ parent_lrugen = &parent_lruvec->lrugen;
+
+ for (i = 0; i < get_nr_gens(child_lruvec, type); i++) {
+ int gen = lru_gen_from_seq(child_lrugen->max_seq - i);
+ long nr_pages = child_lrugen->nr_pages[gen][type][zone];
+ int child_lru_active = lru_gen_is_active(child_lruvec, gen) ? LRU_ACTIVE : 0;
+ int parent_lru_active = lru_gen_is_active(parent_lruvec, gen) ? LRU_ACTIVE : 0;
+
+ /* Assuming that child pages are colder than parent pages */
+ list_splice_tail_init(&child_lrugen->folios[gen][type][zone],
+ &parent_lrugen->folios[gen][type][zone]);
+
+ WRITE_ONCE(child_lrugen->nr_pages[gen][type][zone], 0);
+ WRITE_ONCE(parent_lrugen->nr_pages[gen][type][zone],
+ parent_lrugen->nr_pages[gen][type][zone] + nr_pages);
+
+ if (lru_gen_is_active(child_lruvec, gen) != lru_gen_is_active(parent_lruvec, gen)) {
+ __update_lru_size(child_lruvec, lru + child_lru_active, zone, -nr_pages);
+ __update_lru_size(parent_lruvec, lru + parent_lru_active, zone, nr_pages);
+ }
+ }
+}
+
+void lru_gen_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent, int nid)
+{
+ struct lruvec *child_lruvec, *parent_lruvec;
+ int type, zid;
+ struct zone *zone;
+ enum lru_list lru;
+
+ child_lruvec = get_lruvec(memcg, nid);
+ parent_lruvec = get_lruvec(parent, nid);
+
+ for_each_managed_zone_pgdat(zone, NODE_DATA(nid), zid, MAX_NR_ZONES - 1)
+ for (type = 0; type < ANON_AND_FILE; type++)
+ __lru_gen_reparent_memcg(child_lruvec, parent_lruvec, zid, type);
+
+ for_each_lru(lru) {
+ for_each_managed_zone_pgdat(zone, NODE_DATA(nid), zid, MAX_NR_ZONES - 1) {
+ unsigned long size = mem_cgroup_get_zone_lru_size(child_lruvec, lru, zid);
+
+ mem_cgroup_update_lru_size(parent_lruvec, lru, zid, size);
+ }
+ }
+}
+
#endif /* CONFIG_MEMCG */
/******************************************************************************