]>
git.ipfire.org Git - thirdparty/linux.git/blob - mm/list_lru.c
1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4 * Authors: David Chinner and Glauber Costa
6 * Generic LRU infrastructure
8 #include <linux/kernel.h>
9 #include <linux/module.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
18 #ifdef CONFIG_MEMCG_KMEM
19 static LIST_HEAD(memcg_list_lrus
);
20 static DEFINE_MUTEX(list_lrus_mutex
);
22 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
24 return lru
->memcg_aware
;
27 static void list_lru_register(struct list_lru
*lru
)
29 if (!list_lru_memcg_aware(lru
))
32 mutex_lock(&list_lrus_mutex
);
33 list_add(&lru
->list
, &memcg_list_lrus
);
34 mutex_unlock(&list_lrus_mutex
);
37 static void list_lru_unregister(struct list_lru
*lru
)
39 if (!list_lru_memcg_aware(lru
))
42 mutex_lock(&list_lrus_mutex
);
44 mutex_unlock(&list_lrus_mutex
);
47 static int lru_shrinker_id(struct list_lru
*lru
)
49 return lru
->shrinker_id
;
52 static inline struct list_lru_one
*
53 list_lru_from_memcg_idx(struct list_lru
*lru
, int nid
, int idx
)
55 if (list_lru_memcg_aware(lru
) && idx
>= 0) {
56 struct list_lru_memcg
*mlru
= xa_load(&lru
->xa
, idx
);
58 return mlru
? &mlru
->node
[nid
] : NULL
;
60 return &lru
->node
[nid
].lru
;
63 static inline struct list_lru_one
*
64 list_lru_from_kmem(struct list_lru
*lru
, int nid
, void *ptr
,
65 struct mem_cgroup
**memcg_ptr
)
67 struct list_lru_node
*nlru
= &lru
->node
[nid
];
68 struct list_lru_one
*l
= &nlru
->lru
;
69 struct mem_cgroup
*memcg
= NULL
;
71 if (!list_lru_memcg_aware(lru
))
74 memcg
= mem_cgroup_from_slab_obj(ptr
);
78 l
= list_lru_from_memcg_idx(lru
, nid
, memcg_kmem_id(memcg
));
85 static void list_lru_register(struct list_lru
*lru
)
89 static void list_lru_unregister(struct list_lru
*lru
)
93 static int lru_shrinker_id(struct list_lru
*lru
)
98 static inline bool list_lru_memcg_aware(struct list_lru
*lru
)
103 static inline struct list_lru_one
*
104 list_lru_from_memcg_idx(struct list_lru
*lru
, int nid
, int idx
)
106 return &lru
->node
[nid
].lru
;
109 static inline struct list_lru_one
*
110 list_lru_from_kmem(struct list_lru
*lru
, int nid
, void *ptr
,
111 struct mem_cgroup
**memcg_ptr
)
115 return &lru
->node
[nid
].lru
;
117 #endif /* CONFIG_MEMCG_KMEM */
119 bool list_lru_add(struct list_lru
*lru
, struct list_head
*item
)
121 int nid
= page_to_nid(virt_to_page(item
));
122 struct list_lru_node
*nlru
= &lru
->node
[nid
];
123 struct mem_cgroup
*memcg
;
124 struct list_lru_one
*l
;
126 spin_lock(&nlru
->lock
);
127 if (list_empty(item
)) {
128 l
= list_lru_from_kmem(lru
, nid
, item
, &memcg
);
129 list_add_tail(item
, &l
->list
);
130 /* Set shrinker bit if the first element was added */
132 set_shrinker_bit(memcg
, nid
,
133 lru_shrinker_id(lru
));
135 spin_unlock(&nlru
->lock
);
138 spin_unlock(&nlru
->lock
);
141 EXPORT_SYMBOL_GPL(list_lru_add
);
143 bool list_lru_del(struct list_lru
*lru
, struct list_head
*item
)
145 int nid
= page_to_nid(virt_to_page(item
));
146 struct list_lru_node
*nlru
= &lru
->node
[nid
];
147 struct list_lru_one
*l
;
149 spin_lock(&nlru
->lock
);
150 if (!list_empty(item
)) {
151 l
= list_lru_from_kmem(lru
, nid
, item
, NULL
);
155 spin_unlock(&nlru
->lock
);
158 spin_unlock(&nlru
->lock
);
161 EXPORT_SYMBOL_GPL(list_lru_del
);
163 void list_lru_isolate(struct list_lru_one
*list
, struct list_head
*item
)
168 EXPORT_SYMBOL_GPL(list_lru_isolate
);
170 void list_lru_isolate_move(struct list_lru_one
*list
, struct list_head
*item
,
171 struct list_head
*head
)
173 list_move(item
, head
);
176 EXPORT_SYMBOL_GPL(list_lru_isolate_move
);
178 unsigned long list_lru_count_one(struct list_lru
*lru
,
179 int nid
, struct mem_cgroup
*memcg
)
181 struct list_lru_one
*l
;
185 l
= list_lru_from_memcg_idx(lru
, nid
, memcg_kmem_id(memcg
));
186 count
= l
? READ_ONCE(l
->nr_items
) : 0;
189 if (unlikely(count
< 0))
194 EXPORT_SYMBOL_GPL(list_lru_count_one
);
196 unsigned long list_lru_count_node(struct list_lru
*lru
, int nid
)
198 struct list_lru_node
*nlru
;
200 nlru
= &lru
->node
[nid
];
201 return nlru
->nr_items
;
203 EXPORT_SYMBOL_GPL(list_lru_count_node
);
206 __list_lru_walk_one(struct list_lru
*lru
, int nid
, int memcg_idx
,
207 list_lru_walk_cb isolate
, void *cb_arg
,
208 unsigned long *nr_to_walk
)
210 struct list_lru_node
*nlru
= &lru
->node
[nid
];
211 struct list_lru_one
*l
;
212 struct list_head
*item
, *n
;
213 unsigned long isolated
= 0;
216 l
= list_lru_from_memcg_idx(lru
, nid
, memcg_idx
);
220 list_for_each_safe(item
, n
, &l
->list
) {
224 * decrement nr_to_walk first so that we don't livelock if we
225 * get stuck on large numbers of LRU_RETRY items
231 ret
= isolate(item
, l
, &nlru
->lock
, cb_arg
);
233 case LRU_REMOVED_RETRY
:
234 assert_spin_locked(&nlru
->lock
);
240 * If the lru lock has been dropped, our list
241 * traversal is now invalid and so we have to
242 * restart from scratch.
244 if (ret
== LRU_REMOVED_RETRY
)
248 list_move_tail(item
, &l
->list
);
254 * The lru lock has been dropped, our list traversal is
255 * now invalid and so we have to restart from scratch.
257 assert_spin_locked(&nlru
->lock
);
268 list_lru_walk_one(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
269 list_lru_walk_cb isolate
, void *cb_arg
,
270 unsigned long *nr_to_walk
)
272 struct list_lru_node
*nlru
= &lru
->node
[nid
];
275 spin_lock(&nlru
->lock
);
276 ret
= __list_lru_walk_one(lru
, nid
, memcg_kmem_id(memcg
), isolate
,
278 spin_unlock(&nlru
->lock
);
281 EXPORT_SYMBOL_GPL(list_lru_walk_one
);
284 list_lru_walk_one_irq(struct list_lru
*lru
, int nid
, struct mem_cgroup
*memcg
,
285 list_lru_walk_cb isolate
, void *cb_arg
,
286 unsigned long *nr_to_walk
)
288 struct list_lru_node
*nlru
= &lru
->node
[nid
];
291 spin_lock_irq(&nlru
->lock
);
292 ret
= __list_lru_walk_one(lru
, nid
, memcg_kmem_id(memcg
), isolate
,
294 spin_unlock_irq(&nlru
->lock
);
298 unsigned long list_lru_walk_node(struct list_lru
*lru
, int nid
,
299 list_lru_walk_cb isolate
, void *cb_arg
,
300 unsigned long *nr_to_walk
)
304 isolated
+= list_lru_walk_one(lru
, nid
, NULL
, isolate
, cb_arg
,
307 #ifdef CONFIG_MEMCG_KMEM
308 if (*nr_to_walk
> 0 && list_lru_memcg_aware(lru
)) {
309 struct list_lru_memcg
*mlru
;
312 xa_for_each(&lru
->xa
, index
, mlru
) {
313 struct list_lru_node
*nlru
= &lru
->node
[nid
];
315 spin_lock(&nlru
->lock
);
316 isolated
+= __list_lru_walk_one(lru
, nid
, index
,
319 spin_unlock(&nlru
->lock
);
321 if (*nr_to_walk
<= 0)
329 EXPORT_SYMBOL_GPL(list_lru_walk_node
);
331 static void init_one_lru(struct list_lru_one
*l
)
333 INIT_LIST_HEAD(&l
->list
);
337 #ifdef CONFIG_MEMCG_KMEM
338 static struct list_lru_memcg
*memcg_init_list_lru_one(gfp_t gfp
)
341 struct list_lru_memcg
*mlru
;
343 mlru
= kmalloc(struct_size(mlru
, node
, nr_node_ids
), gfp
);
348 init_one_lru(&mlru
->node
[nid
]);
353 static void memcg_list_lru_free(struct list_lru
*lru
, int src_idx
)
355 struct list_lru_memcg
*mlru
= xa_erase_irq(&lru
->xa
, src_idx
);
358 * The __list_lru_walk_one() can walk the list of this node.
359 * We need kvfree_rcu() here. And the walking of the list
360 * is under lru->node[nid]->lock, which can serve as a RCU
361 * read-side critical section.
364 kvfree_rcu(mlru
, rcu
);
367 static inline void memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
370 xa_init_flags(&lru
->xa
, XA_FLAGS_LOCK_IRQ
);
371 lru
->memcg_aware
= memcg_aware
;
374 static void memcg_destroy_list_lru(struct list_lru
*lru
)
376 XA_STATE(xas
, &lru
->xa
, 0);
377 struct list_lru_memcg
*mlru
;
379 if (!list_lru_memcg_aware(lru
))
383 xas_for_each(&xas
, mlru
, ULONG_MAX
) {
385 xas_store(&xas
, NULL
);
387 xas_unlock_irq(&xas
);
390 static void memcg_reparent_list_lru_node(struct list_lru
*lru
, int nid
,
391 int src_idx
, struct mem_cgroup
*dst_memcg
)
393 struct list_lru_node
*nlru
= &lru
->node
[nid
];
394 int dst_idx
= dst_memcg
->kmemcg_id
;
395 struct list_lru_one
*src
, *dst
;
398 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
399 * we have to use IRQ-safe primitives here to avoid deadlock.
401 spin_lock_irq(&nlru
->lock
);
403 src
= list_lru_from_memcg_idx(lru
, nid
, src_idx
);
406 dst
= list_lru_from_memcg_idx(lru
, nid
, dst_idx
);
408 list_splice_init(&src
->list
, &dst
->list
);
411 dst
->nr_items
+= src
->nr_items
;
412 set_shrinker_bit(dst_memcg
, nid
, lru_shrinker_id(lru
));
416 spin_unlock_irq(&nlru
->lock
);
419 static void memcg_reparent_list_lru(struct list_lru
*lru
,
420 int src_idx
, struct mem_cgroup
*dst_memcg
)
425 memcg_reparent_list_lru_node(lru
, i
, src_idx
, dst_memcg
);
427 memcg_list_lru_free(lru
, src_idx
);
430 void memcg_reparent_list_lrus(struct mem_cgroup
*memcg
, struct mem_cgroup
*parent
)
432 struct cgroup_subsys_state
*css
;
433 struct list_lru
*lru
;
434 int src_idx
= memcg
->kmemcg_id
;
437 * Change kmemcg_id of this cgroup and all its descendants to the
438 * parent's id, and then move all entries from this cgroup's list_lrus
439 * to ones of the parent.
441 * After we have finished, all list_lrus corresponding to this cgroup
442 * are guaranteed to remain empty. So we can safely free this cgroup's
443 * list lrus in memcg_list_lru_free().
445 * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc()
446 * from allocating list lrus for this cgroup after memcg_list_lru_free()
450 css_for_each_descendant_pre(css
, &memcg
->css
) {
451 struct mem_cgroup
*child
;
453 child
= mem_cgroup_from_css(css
);
454 WRITE_ONCE(child
->kmemcg_id
, parent
->kmemcg_id
);
458 mutex_lock(&list_lrus_mutex
);
459 list_for_each_entry(lru
, &memcg_list_lrus
, list
)
460 memcg_reparent_list_lru(lru
, src_idx
, parent
);
461 mutex_unlock(&list_lrus_mutex
);
464 static inline bool memcg_list_lru_allocated(struct mem_cgroup
*memcg
,
465 struct list_lru
*lru
)
467 int idx
= memcg
->kmemcg_id
;
469 return idx
< 0 || xa_load(&lru
->xa
, idx
);
472 int memcg_list_lru_alloc(struct mem_cgroup
*memcg
, struct list_lru
*lru
,
477 struct list_lru_memcg_table
{
478 struct list_lru_memcg
*mlru
;
479 struct mem_cgroup
*memcg
;
481 XA_STATE(xas
, &lru
->xa
, 0);
483 if (!list_lru_memcg_aware(lru
) || memcg_list_lru_allocated(memcg
, lru
))
486 gfp
&= GFP_RECLAIM_MASK
;
487 table
= kmalloc_array(memcg
->css
.cgroup
->level
, sizeof(*table
), gfp
);
492 * Because the list_lru can be reparented to the parent cgroup's
493 * list_lru, we should make sure that this cgroup and all its
494 * ancestors have allocated list_lru_memcg.
496 for (i
= 0; memcg
; memcg
= parent_mem_cgroup(memcg
), i
++) {
497 if (memcg_list_lru_allocated(memcg
, lru
))
500 table
[i
].memcg
= memcg
;
501 table
[i
].mlru
= memcg_init_list_lru_one(gfp
);
502 if (!table
[i
].mlru
) {
504 kfree(table
[i
].mlru
);
510 xas_lock_irqsave(&xas
, flags
);
512 int index
= READ_ONCE(table
[i
].memcg
->kmemcg_id
);
513 struct list_lru_memcg
*mlru
= table
[i
].mlru
;
515 xas_set(&xas
, index
);
517 if (unlikely(index
< 0 || xas_error(&xas
) || xas_load(&xas
))) {
520 xas_store(&xas
, mlru
);
521 if (xas_error(&xas
) == -ENOMEM
) {
522 xas_unlock_irqrestore(&xas
, flags
);
523 if (xas_nomem(&xas
, gfp
))
524 xas_set_err(&xas
, 0);
525 xas_lock_irqsave(&xas
, flags
);
527 * The xas lock has been released, this memcg
528 * can be reparented before us. So reload
529 * memcg id. More details see the comments
530 * in memcg_reparent_list_lrus().
532 index
= READ_ONCE(table
[i
].memcg
->kmemcg_id
);
534 xas_set_err(&xas
, 0);
535 else if (!xas_error(&xas
) && index
!= xas
.xa_index
)
536 xas_set(&xas
, index
);
541 /* xas_nomem() is used to free memory instead of memory allocation. */
543 xas_nomem(&xas
, gfp
);
544 xas_unlock_irqrestore(&xas
, flags
);
547 return xas_error(&xas
);
550 static inline void memcg_init_list_lru(struct list_lru
*lru
, bool memcg_aware
)
554 static void memcg_destroy_list_lru(struct list_lru
*lru
)
557 #endif /* CONFIG_MEMCG_KMEM */
559 int __list_lru_init(struct list_lru
*lru
, bool memcg_aware
,
560 struct lock_class_key
*key
, struct shrinker
*shrinker
)
564 #ifdef CONFIG_MEMCG_KMEM
566 lru
->shrinker_id
= shrinker
->id
;
568 lru
->shrinker_id
= -1;
571 lru
->node
= kcalloc(nr_node_ids
, sizeof(*lru
->node
), GFP_KERNEL
);
576 spin_lock_init(&lru
->node
[i
].lock
);
578 lockdep_set_class(&lru
->node
[i
].lock
, key
);
579 init_one_lru(&lru
->node
[i
].lru
);
582 memcg_init_list_lru(lru
, memcg_aware
);
583 list_lru_register(lru
);
587 EXPORT_SYMBOL_GPL(__list_lru_init
);
589 void list_lru_destroy(struct list_lru
*lru
)
591 /* Already destroyed or not yet initialized? */
595 list_lru_unregister(lru
);
597 memcg_destroy_list_lru(lru
);
601 #ifdef CONFIG_MEMCG_KMEM
602 lru
->shrinker_id
= -1;
605 EXPORT_SYMBOL_GPL(list_lru_destroy
);