]> git.ipfire.org Git - thirdparty/linux.git/blame - mm/zswap.c
mm: zswap: add basic meminfo and vmstat coverage
[thirdparty/linux.git] / mm / zswap.c
CommitLineData
c942fddf 1// SPDX-License-Identifier: GPL-2.0-or-later
2b281117
SJ
2/*
3 * zswap.c - zswap driver file
4 *
5 * zswap is a backend for frontswap that takes pages that are in the process
6 * of being swapped out and attempts to compress and store them in a
7 * RAM-based memory pool. This can result in a significant I/O reduction on
8 * the swap device and, in the case where decompressing from RAM is faster
9 * than reading from the swap device, can also improve workload performance.
10 *
11 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com>
2b281117
SJ
12*/
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <linux/module.h>
17#include <linux/cpu.h>
18#include <linux/highmem.h>
19#include <linux/slab.h>
20#include <linux/spinlock.h>
21#include <linux/types.h>
22#include <linux/atomic.h>
23#include <linux/frontswap.h>
24#include <linux/rbtree.h>
25#include <linux/swap.h>
26#include <linux/crypto.h>
1ec3b5fe 27#include <linux/scatterlist.h>
2b281117 28#include <linux/mempool.h>
12d79d64 29#include <linux/zpool.h>
1ec3b5fe 30#include <crypto/acompress.h>
2b281117
SJ
31
32#include <linux/mm_types.h>
33#include <linux/page-flags.h>
34#include <linux/swapops.h>
35#include <linux/writeback.h>
36#include <linux/pagemap.h>
45190f01 37#include <linux/workqueue.h>
2b281117 38
014bb1de
N
39#include "swap.h"
40
2b281117
SJ
41/*********************************
42* statistics
43**********************************/
12d79d64 44/* Total bytes used by the compressed storage */
f6498b77 45u64 zswap_pool_total_size;
2b281117 46/* The number of compressed pages currently stored in zswap */
f6498b77 47atomic_t zswap_stored_pages = ATOMIC_INIT(0);
a85f878b
SD
48/* The number of same-value filled pages currently stored in zswap */
49static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0);
2b281117
SJ
50
51/*
52 * The statistics below are not protected from concurrent access for
53 * performance reasons so they may not be a 100% accurate. However,
54 * they do provide useful information on roughly how many times a
55 * certain event is occurring.
56*/
57
58/* Pool limit was hit (see zswap_max_pool_percent) */
59static u64 zswap_pool_limit_hit;
60/* Pages written back when pool limit was reached */
61static u64 zswap_written_back_pages;
62/* Store failed due to a reclaim failure after pool limit was reached */
63static u64 zswap_reject_reclaim_fail;
64/* Compressed page was too big for the allocator to (optimally) store */
65static u64 zswap_reject_compress_poor;
66/* Store failed because underlying allocator could not get memory */
67static u64 zswap_reject_alloc_fail;
68/* Store failed because the entry metadata could not be allocated (rare) */
69static u64 zswap_reject_kmemcache_fail;
70/* Duplicate store was encountered (rare) */
71static u64 zswap_duplicate_entry;
72
45190f01
VW
73/* Shrinker work queue */
74static struct workqueue_struct *shrink_wq;
75/* Pool limit was hit, we need to calm down */
76static bool zswap_pool_reached_full;
77
2b281117
SJ
78/*********************************
79* tunables
80**********************************/
c00ed16a 81
bae21db8
DS
82#define ZSWAP_PARAM_UNSET ""
83
bb8b93b5
MS
84/* Enable/disable zswap */
85static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON);
d7b028f5
DS
86static int zswap_enabled_param_set(const char *,
87 const struct kernel_param *);
83aed6cd 88static const struct kernel_param_ops zswap_enabled_param_ops = {
d7b028f5
DS
89 .set = zswap_enabled_param_set,
90 .get = param_get_bool,
91};
92module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644);
2b281117 93
90b0fc26 94/* Crypto compressor to use */
bb8b93b5 95static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
90b0fc26
DS
96static int zswap_compressor_param_set(const char *,
97 const struct kernel_param *);
83aed6cd 98static const struct kernel_param_ops zswap_compressor_param_ops = {
90b0fc26 99 .set = zswap_compressor_param_set,
c99b42c3
DS
100 .get = param_get_charp,
101 .free = param_free_charp,
90b0fc26
DS
102};
103module_param_cb(compressor, &zswap_compressor_param_ops,
c99b42c3 104 &zswap_compressor, 0644);
2b281117 105
90b0fc26 106/* Compressed storage zpool to use */
bb8b93b5 107static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
90b0fc26 108static int zswap_zpool_param_set(const char *, const struct kernel_param *);
83aed6cd 109static const struct kernel_param_ops zswap_zpool_param_ops = {
c99b42c3
DS
110 .set = zswap_zpool_param_set,
111 .get = param_get_charp,
112 .free = param_free_charp,
90b0fc26 113};
c99b42c3 114module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644);
12d79d64 115
90b0fc26
DS
116/* The maximum percentage of memory that the compressed pool can occupy */
117static unsigned int zswap_max_pool_percent = 20;
118module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644);
60105e12 119
45190f01
VW
120/* The threshold for accepting new pages after the max_pool_percent was hit */
121static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
122module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
123 uint, 0644);
124
cb325ddd
MS
125/*
126 * Enable/disable handling same-value filled pages (enabled by default).
127 * If disabled every page is considered non-same-value filled.
128 */
a85f878b
SD
129static bool zswap_same_filled_pages_enabled = true;
130module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
131 bool, 0644);
132
cb325ddd
MS
133/* Enable/disable handling non-same-value filled pages (enabled by default) */
134static bool zswap_non_same_filled_pages_enabled = true;
135module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
136 bool, 0644);
137
2b281117 138/*********************************
f1c54846 139* data structures
2b281117 140**********************************/
2b281117 141
1ec3b5fe
BS
142struct crypto_acomp_ctx {
143 struct crypto_acomp *acomp;
144 struct acomp_req *req;
145 struct crypto_wait wait;
146 u8 *dstmem;
147 struct mutex *mutex;
148};
149
f1c54846
DS
150struct zswap_pool {
151 struct zpool *zpool;
1ec3b5fe 152 struct crypto_acomp_ctx __percpu *acomp_ctx;
f1c54846
DS
153 struct kref kref;
154 struct list_head list;
45190f01
VW
155 struct work_struct release_work;
156 struct work_struct shrink_work;
cab7a7e5 157 struct hlist_node node;
f1c54846 158 char tfm_name[CRYPTO_MAX_ALG_NAME];
2b281117
SJ
159};
160
2b281117
SJ
161/*
162 * struct zswap_entry
163 *
164 * This structure contains the metadata for tracking a single compressed
165 * page within zswap.
166 *
167 * rbnode - links the entry into red-black tree for the appropriate swap type
f1c54846 168 * offset - the swap offset for the entry. Index into the red-black tree.
2b281117
SJ
169 * refcount - the number of outstanding reference to the entry. This is needed
170 * to protect against premature freeing of the entry by code
6b452516 171 * concurrent calls to load, invalidate, and writeback. The lock
2b281117
SJ
172 * for the zswap_tree structure that contains the entry must
173 * be held while changing the refcount. Since the lock must
174 * be held, there is no reason to also make refcount atomic.
2b281117 175 * length - the length in bytes of the compressed page data. Needed during
a85f878b 176 * decompression. For a same value filled page length is 0.
f1c54846
DS
177 * pool - the zswap_pool the entry's data is in
178 * handle - zpool allocation handle that stores the compressed page data
a85f878b 179 * value - value of the same-value filled pages which have same content
2b281117
SJ
180 */
181struct zswap_entry {
182 struct rb_node rbnode;
183 pgoff_t offset;
184 int refcount;
185 unsigned int length;
f1c54846 186 struct zswap_pool *pool;
a85f878b
SD
187 union {
188 unsigned long handle;
189 unsigned long value;
190 };
2b281117
SJ
191};
192
193struct zswap_header {
194 swp_entry_t swpentry;
195};
196
197/*
198 * The tree lock in the zswap_tree struct protects a few things:
199 * - the rbtree
200 * - the refcount field of each entry in the tree
201 */
202struct zswap_tree {
203 struct rb_root rbroot;
204 spinlock_t lock;
2b281117
SJ
205};
206
207static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
208
f1c54846
DS
209/* RCU-protected iteration */
210static LIST_HEAD(zswap_pools);
211/* protects zswap_pools list modification */
212static DEFINE_SPINLOCK(zswap_pools_lock);
32a4e169
DS
213/* pool counter to provide unique names to zpool */
214static atomic_t zswap_pools_count = ATOMIC_INIT(0);
f1c54846 215
90b0fc26
DS
216/* used by param callback function */
217static bool zswap_init_started;
218
d7b028f5
DS
219/* fatal error during init */
220static bool zswap_init_failed;
221
ae3d89a7
DS
222/* init completed, but couldn't create the initial pool */
223static bool zswap_has_pool;
224
f1c54846
DS
225/*********************************
226* helpers and fwd declarations
227**********************************/
228
229#define zswap_pool_debug(msg, p) \
230 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
231 zpool_get_type((p)->zpool))
232
233static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
234static int zswap_pool_get(struct zswap_pool *pool);
235static void zswap_pool_put(struct zswap_pool *pool);
236
237static const struct zpool_ops zswap_zpool_ops = {
238 .evict = zswap_writeback_entry
239};
240
241static bool zswap_is_full(void)
242{
ca79b0c2
AK
243 return totalram_pages() * zswap_max_pool_percent / 100 <
244 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
f1c54846
DS
245}
246
45190f01
VW
247static bool zswap_can_accept(void)
248{
249 return totalram_pages() * zswap_accept_thr_percent / 100 *
250 zswap_max_pool_percent / 100 >
251 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
252}
253
f1c54846
DS
254static void zswap_update_total_size(void)
255{
256 struct zswap_pool *pool;
257 u64 total = 0;
258
259 rcu_read_lock();
260
261 list_for_each_entry_rcu(pool, &zswap_pools, list)
262 total += zpool_get_total_size(pool->zpool);
263
264 rcu_read_unlock();
265
266 zswap_pool_total_size = total;
267}
268
2b281117
SJ
269/*********************************
270* zswap entry functions
271**********************************/
272static struct kmem_cache *zswap_entry_cache;
273
dd01d7d8 274static int __init zswap_entry_cache_create(void)
2b281117
SJ
275{
276 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0);
5d2d42de 277 return zswap_entry_cache == NULL;
2b281117
SJ
278}
279
c119239b 280static void __init zswap_entry_cache_destroy(void)
2b281117
SJ
281{
282 kmem_cache_destroy(zswap_entry_cache);
283}
284
285static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp)
286{
287 struct zswap_entry *entry;
288 entry = kmem_cache_alloc(zswap_entry_cache, gfp);
289 if (!entry)
290 return NULL;
291 entry->refcount = 1;
0ab0abcf 292 RB_CLEAR_NODE(&entry->rbnode);
2b281117
SJ
293 return entry;
294}
295
296static void zswap_entry_cache_free(struct zswap_entry *entry)
297{
298 kmem_cache_free(zswap_entry_cache, entry);
299}
300
2b281117
SJ
301/*********************************
302* rbtree functions
303**********************************/
304static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
305{
306 struct rb_node *node = root->rb_node;
307 struct zswap_entry *entry;
308
309 while (node) {
310 entry = rb_entry(node, struct zswap_entry, rbnode);
311 if (entry->offset > offset)
312 node = node->rb_left;
313 else if (entry->offset < offset)
314 node = node->rb_right;
315 else
316 return entry;
317 }
318 return NULL;
319}
320
321/*
322 * In the case that a entry with the same offset is found, a pointer to
323 * the existing entry is stored in dupentry and the function returns -EEXIST
324 */
325static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
326 struct zswap_entry **dupentry)
327{
328 struct rb_node **link = &root->rb_node, *parent = NULL;
329 struct zswap_entry *myentry;
330
331 while (*link) {
332 parent = *link;
333 myentry = rb_entry(parent, struct zswap_entry, rbnode);
334 if (myentry->offset > entry->offset)
335 link = &(*link)->rb_left;
336 else if (myentry->offset < entry->offset)
337 link = &(*link)->rb_right;
338 else {
339 *dupentry = myentry;
340 return -EEXIST;
341 }
342 }
343 rb_link_node(&entry->rbnode, parent, link);
344 rb_insert_color(&entry->rbnode, root);
345 return 0;
346}
347
0ab0abcf
WY
348static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
349{
350 if (!RB_EMPTY_NODE(&entry->rbnode)) {
351 rb_erase(&entry->rbnode, root);
352 RB_CLEAR_NODE(&entry->rbnode);
353 }
354}
355
356/*
12d79d64 357 * Carries out the common pattern of freeing and entry's zpool allocation,
0ab0abcf
WY
358 * freeing the entry itself, and decrementing the number of stored pages.
359 */
60105e12 360static void zswap_free_entry(struct zswap_entry *entry)
0ab0abcf 361{
a85f878b
SD
362 if (!entry->length)
363 atomic_dec(&zswap_same_filled_pages);
364 else {
365 zpool_free(entry->pool->zpool, entry->handle);
366 zswap_pool_put(entry->pool);
367 }
0ab0abcf
WY
368 zswap_entry_cache_free(entry);
369 atomic_dec(&zswap_stored_pages);
f1c54846 370 zswap_update_total_size();
0ab0abcf
WY
371}
372
373/* caller must hold the tree lock */
374static void zswap_entry_get(struct zswap_entry *entry)
375{
376 entry->refcount++;
377}
378
379/* caller must hold the tree lock
380* remove from the tree and free it, if nobody reference the entry
381*/
382static void zswap_entry_put(struct zswap_tree *tree,
383 struct zswap_entry *entry)
384{
385 int refcount = --entry->refcount;
386
387 BUG_ON(refcount < 0);
388 if (refcount == 0) {
389 zswap_rb_erase(&tree->rbroot, entry);
60105e12 390 zswap_free_entry(entry);
0ab0abcf
WY
391 }
392}
393
394/* caller must hold the tree lock */
395static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
396 pgoff_t offset)
397{
b0c9865f 398 struct zswap_entry *entry;
0ab0abcf
WY
399
400 entry = zswap_rb_search(root, offset);
401 if (entry)
402 zswap_entry_get(entry);
403
404 return entry;
405}
406
2b281117
SJ
407/*********************************
408* per-cpu code
409**********************************/
410static DEFINE_PER_CPU(u8 *, zswap_dstmem);
1ec3b5fe
BS
411/*
412 * If users dynamically change the zpool type and compressor at runtime, i.e.
413 * zswap is running, zswap can have more than one zpool on one cpu, but they
414 * are sharing dtsmem. So we need this mutex to be per-cpu.
415 */
416static DEFINE_PER_CPU(struct mutex *, zswap_mutex);
2b281117 417
ad7ed770 418static int zswap_dstmem_prepare(unsigned int cpu)
2b281117 419{
1ec3b5fe 420 struct mutex *mutex;
2b281117
SJ
421 u8 *dst;
422
ad7ed770 423 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
2b2695f5 424 if (!dst)
ad7ed770 425 return -ENOMEM;
2b2695f5 426
1ec3b5fe
BS
427 mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu));
428 if (!mutex) {
429 kfree(dst);
430 return -ENOMEM;
431 }
432
433 mutex_init(mutex);
ad7ed770 434 per_cpu(zswap_dstmem, cpu) = dst;
1ec3b5fe 435 per_cpu(zswap_mutex, cpu) = mutex;
ad7ed770 436 return 0;
2b281117
SJ
437}
438
ad7ed770 439static int zswap_dstmem_dead(unsigned int cpu)
2b281117 440{
1ec3b5fe 441 struct mutex *mutex;
ad7ed770 442 u8 *dst;
2b281117 443
1ec3b5fe
BS
444 mutex = per_cpu(zswap_mutex, cpu);
445 kfree(mutex);
446 per_cpu(zswap_mutex, cpu) = NULL;
447
ad7ed770
SAS
448 dst = per_cpu(zswap_dstmem, cpu);
449 kfree(dst);
450 per_cpu(zswap_dstmem, cpu) = NULL;
f1c54846 451
f1c54846 452 return 0;
f1c54846
DS
453}
454
cab7a7e5 455static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
f1c54846 456{
cab7a7e5 457 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
1ec3b5fe
BS
458 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
459 struct crypto_acomp *acomp;
460 struct acomp_req *req;
461
462 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
463 if (IS_ERR(acomp)) {
464 pr_err("could not alloc crypto acomp %s : %ld\n",
465 pool->tfm_name, PTR_ERR(acomp));
466 return PTR_ERR(acomp);
467 }
468 acomp_ctx->acomp = acomp;
f1c54846 469
1ec3b5fe
BS
470 req = acomp_request_alloc(acomp_ctx->acomp);
471 if (!req) {
472 pr_err("could not alloc crypto acomp_request %s\n",
473 pool->tfm_name);
474 crypto_free_acomp(acomp_ctx->acomp);
cab7a7e5
SAS
475 return -ENOMEM;
476 }
1ec3b5fe
BS
477 acomp_ctx->req = req;
478
479 crypto_init_wait(&acomp_ctx->wait);
480 /*
481 * if the backend of acomp is async zip, crypto_req_done() will wakeup
482 * crypto_wait_req(); if the backend of acomp is scomp, the callback
483 * won't be called, crypto_wait_req() will return without blocking.
484 */
485 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
486 crypto_req_done, &acomp_ctx->wait);
487
488 acomp_ctx->mutex = per_cpu(zswap_mutex, cpu);
489 acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu);
490
2b281117 491 return 0;
2b281117
SJ
492}
493
cab7a7e5 494static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
f1c54846 495{
cab7a7e5 496 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
1ec3b5fe
BS
497 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
498
499 if (!IS_ERR_OR_NULL(acomp_ctx)) {
500 if (!IS_ERR_OR_NULL(acomp_ctx->req))
501 acomp_request_free(acomp_ctx->req);
502 if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
503 crypto_free_acomp(acomp_ctx->acomp);
504 }
f1c54846 505
cab7a7e5 506 return 0;
f1c54846
DS
507}
508
2b281117 509/*********************************
f1c54846 510* pool functions
2b281117 511**********************************/
f1c54846
DS
512
513static struct zswap_pool *__zswap_pool_current(void)
2b281117 514{
f1c54846
DS
515 struct zswap_pool *pool;
516
517 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
ae3d89a7
DS
518 WARN_ONCE(!pool && zswap_has_pool,
519 "%s: no page storage pool!\n", __func__);
f1c54846
DS
520
521 return pool;
522}
523
524static struct zswap_pool *zswap_pool_current(void)
525{
526 assert_spin_locked(&zswap_pools_lock);
527
528 return __zswap_pool_current();
529}
530
531static struct zswap_pool *zswap_pool_current_get(void)
532{
533 struct zswap_pool *pool;
534
535 rcu_read_lock();
536
537 pool = __zswap_pool_current();
ae3d89a7 538 if (!zswap_pool_get(pool))
f1c54846
DS
539 pool = NULL;
540
541 rcu_read_unlock();
542
543 return pool;
544}
545
546static struct zswap_pool *zswap_pool_last_get(void)
547{
548 struct zswap_pool *pool, *last = NULL;
549
550 rcu_read_lock();
551
552 list_for_each_entry_rcu(pool, &zswap_pools, list)
553 last = pool;
ae3d89a7
DS
554 WARN_ONCE(!last && zswap_has_pool,
555 "%s: no page storage pool!\n", __func__);
556 if (!zswap_pool_get(last))
f1c54846
DS
557 last = NULL;
558
559 rcu_read_unlock();
560
561 return last;
562}
563
8bc8b228 564/* type and compressor must be null-terminated */
f1c54846
DS
565static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
566{
567 struct zswap_pool *pool;
568
569 assert_spin_locked(&zswap_pools_lock);
570
571 list_for_each_entry_rcu(pool, &zswap_pools, list) {
8bc8b228 572 if (strcmp(pool->tfm_name, compressor))
f1c54846 573 continue;
8bc8b228 574 if (strcmp(zpool_get_type(pool->zpool), type))
f1c54846
DS
575 continue;
576 /* if we can't get it, it's about to be destroyed */
577 if (!zswap_pool_get(pool))
578 continue;
579 return pool;
580 }
581
582 return NULL;
583}
584
45190f01
VW
585static void shrink_worker(struct work_struct *w)
586{
587 struct zswap_pool *pool = container_of(w, typeof(*pool),
588 shrink_work);
589
590 if (zpool_shrink(pool->zpool, 1, NULL))
591 zswap_reject_reclaim_fail++;
592 zswap_pool_put(pool);
593}
594
f1c54846
DS
595static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
596{
597 struct zswap_pool *pool;
32a4e169 598 char name[38]; /* 'zswap' + 32 char (max) num + \0 */
d0164adc 599 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
cab7a7e5 600 int ret;
f1c54846 601
bae21db8
DS
602 if (!zswap_has_pool) {
603 /* if either are unset, pool initialization failed, and we
604 * need both params to be set correctly before trying to
605 * create a pool.
606 */
607 if (!strcmp(type, ZSWAP_PARAM_UNSET))
608 return NULL;
609 if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
610 return NULL;
611 }
612
f1c54846 613 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
f4ae0ce0 614 if (!pool)
f1c54846 615 return NULL;
f1c54846 616
32a4e169
DS
617 /* unique name for each pool specifically required by zsmalloc */
618 snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
619
620 pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
f1c54846
DS
621 if (!pool->zpool) {
622 pr_err("%s zpool not available\n", type);
623 goto error;
624 }
625 pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
626
79cd4202 627 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
1ec3b5fe
BS
628
629 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
630 if (!pool->acomp_ctx) {
f1c54846
DS
631 pr_err("percpu alloc failed\n");
632 goto error;
633 }
634
cab7a7e5
SAS
635 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
636 &pool->node);
637 if (ret)
f1c54846
DS
638 goto error;
639 pr_debug("using %s compressor\n", pool->tfm_name);
640
641 /* being the current pool takes 1 ref; this func expects the
642 * caller to always add the new pool as the current pool
643 */
644 kref_init(&pool->kref);
645 INIT_LIST_HEAD(&pool->list);
45190f01 646 INIT_WORK(&pool->shrink_work, shrink_worker);
f1c54846
DS
647
648 zswap_pool_debug("created", pool);
649
650 return pool;
651
652error:
1ec3b5fe
BS
653 if (pool->acomp_ctx)
654 free_percpu(pool->acomp_ctx);
f1c54846
DS
655 if (pool->zpool)
656 zpool_destroy_pool(pool->zpool);
657 kfree(pool);
658 return NULL;
659}
660
c99b42c3 661static __init struct zswap_pool *__zswap_pool_create_fallback(void)
f1c54846 662{
bae21db8
DS
663 bool has_comp, has_zpool;
664
1ec3b5fe 665 has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
bb8b93b5
MS
666 if (!has_comp && strcmp(zswap_compressor,
667 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
f1c54846 668 pr_err("compressor %s not available, using default %s\n",
bb8b93b5 669 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
c99b42c3 670 param_free_charp(&zswap_compressor);
bb8b93b5 671 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
1ec3b5fe 672 has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
f1c54846 673 }
bae21db8
DS
674 if (!has_comp) {
675 pr_err("default compressor %s not available\n",
676 zswap_compressor);
677 param_free_charp(&zswap_compressor);
678 zswap_compressor = ZSWAP_PARAM_UNSET;
679 }
680
681 has_zpool = zpool_has_pool(zswap_zpool_type);
bb8b93b5
MS
682 if (!has_zpool && strcmp(zswap_zpool_type,
683 CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
f1c54846 684 pr_err("zpool %s not available, using default %s\n",
bb8b93b5 685 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
c99b42c3 686 param_free_charp(&zswap_zpool_type);
bb8b93b5 687 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
bae21db8 688 has_zpool = zpool_has_pool(zswap_zpool_type);
f1c54846 689 }
bae21db8
DS
690 if (!has_zpool) {
691 pr_err("default zpool %s not available\n",
692 zswap_zpool_type);
693 param_free_charp(&zswap_zpool_type);
694 zswap_zpool_type = ZSWAP_PARAM_UNSET;
695 }
696
697 if (!has_comp || !has_zpool)
698 return NULL;
f1c54846
DS
699
700 return zswap_pool_create(zswap_zpool_type, zswap_compressor);
701}
702
703static void zswap_pool_destroy(struct zswap_pool *pool)
704{
705 zswap_pool_debug("destroying", pool);
706
cab7a7e5 707 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
1ec3b5fe 708 free_percpu(pool->acomp_ctx);
f1c54846
DS
709 zpool_destroy_pool(pool->zpool);
710 kfree(pool);
711}
712
713static int __must_check zswap_pool_get(struct zswap_pool *pool)
714{
ae3d89a7
DS
715 if (!pool)
716 return 0;
717
f1c54846
DS
718 return kref_get_unless_zero(&pool->kref);
719}
720
200867af 721static void __zswap_pool_release(struct work_struct *work)
f1c54846 722{
45190f01
VW
723 struct zswap_pool *pool = container_of(work, typeof(*pool),
724 release_work);
200867af
DS
725
726 synchronize_rcu();
f1c54846
DS
727
728 /* nobody should have been able to get a kref... */
729 WARN_ON(kref_get_unless_zero(&pool->kref));
730
731 /* pool is now off zswap_pools list and has no references. */
732 zswap_pool_destroy(pool);
733}
734
735static void __zswap_pool_empty(struct kref *kref)
736{
737 struct zswap_pool *pool;
738
739 pool = container_of(kref, typeof(*pool), kref);
740
741 spin_lock(&zswap_pools_lock);
742
743 WARN_ON(pool == zswap_pool_current());
744
745 list_del_rcu(&pool->list);
200867af 746
45190f01
VW
747 INIT_WORK(&pool->release_work, __zswap_pool_release);
748 schedule_work(&pool->release_work);
f1c54846
DS
749
750 spin_unlock(&zswap_pools_lock);
751}
752
753static void zswap_pool_put(struct zswap_pool *pool)
754{
755 kref_put(&pool->kref, __zswap_pool_empty);
2b281117
SJ
756}
757
90b0fc26
DS
758/*********************************
759* param callbacks
760**********************************/
761
c99b42c3 762/* val must be a null-terminated string */
90b0fc26
DS
763static int __zswap_param_set(const char *val, const struct kernel_param *kp,
764 char *type, char *compressor)
765{
766 struct zswap_pool *pool, *put_pool = NULL;
c99b42c3 767 char *s = strstrip((char *)val);
90b0fc26
DS
768 int ret;
769
d7b028f5
DS
770 if (zswap_init_failed) {
771 pr_err("can't set param, initialization failed\n");
772 return -ENODEV;
773 }
774
c99b42c3 775 /* no change required */
ae3d89a7 776 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
c99b42c3 777 return 0;
90b0fc26
DS
778
779 /* if this is load-time (pre-init) param setting,
780 * don't create a pool; that's done during init.
781 */
782 if (!zswap_init_started)
c99b42c3 783 return param_set_charp(s, kp);
90b0fc26
DS
784
785 if (!type) {
c99b42c3
DS
786 if (!zpool_has_pool(s)) {
787 pr_err("zpool %s not available\n", s);
90b0fc26
DS
788 return -ENOENT;
789 }
c99b42c3 790 type = s;
90b0fc26 791 } else if (!compressor) {
1ec3b5fe 792 if (!crypto_has_acomp(s, 0, 0)) {
c99b42c3 793 pr_err("compressor %s not available\n", s);
90b0fc26
DS
794 return -ENOENT;
795 }
c99b42c3
DS
796 compressor = s;
797 } else {
798 WARN_ON(1);
799 return -EINVAL;
90b0fc26
DS
800 }
801
802 spin_lock(&zswap_pools_lock);
803
804 pool = zswap_pool_find_get(type, compressor);
805 if (pool) {
806 zswap_pool_debug("using existing", pool);
fd5bb66c 807 WARN_ON(pool == zswap_pool_current());
90b0fc26 808 list_del_rcu(&pool->list);
90b0fc26
DS
809 }
810
fd5bb66c
DS
811 spin_unlock(&zswap_pools_lock);
812
813 if (!pool)
814 pool = zswap_pool_create(type, compressor);
815
90b0fc26 816 if (pool)
c99b42c3 817 ret = param_set_charp(s, kp);
90b0fc26
DS
818 else
819 ret = -EINVAL;
820
fd5bb66c
DS
821 spin_lock(&zswap_pools_lock);
822
90b0fc26
DS
823 if (!ret) {
824 put_pool = zswap_pool_current();
825 list_add_rcu(&pool->list, &zswap_pools);
ae3d89a7 826 zswap_has_pool = true;
90b0fc26
DS
827 } else if (pool) {
828 /* add the possibly pre-existing pool to the end of the pools
829 * list; if it's new (and empty) then it'll be removed and
830 * destroyed by the put after we drop the lock
831 */
832 list_add_tail_rcu(&pool->list, &zswap_pools);
833 put_pool = pool;
fd5bb66c
DS
834 }
835
836 spin_unlock(&zswap_pools_lock);
837
838 if (!zswap_has_pool && !pool) {
ae3d89a7
DS
839 /* if initial pool creation failed, and this pool creation also
840 * failed, maybe both compressor and zpool params were bad.
841 * Allow changing this param, so pool creation will succeed
842 * when the other param is changed. We already verified this
1ec3b5fe 843 * param is ok in the zpool_has_pool() or crypto_has_acomp()
ae3d89a7
DS
844 * checks above.
845 */
846 ret = param_set_charp(s, kp);
90b0fc26
DS
847 }
848
90b0fc26
DS
849 /* drop the ref from either the old current pool,
850 * or the new pool we failed to add
851 */
852 if (put_pool)
853 zswap_pool_put(put_pool);
854
855 return ret;
856}
857
858static int zswap_compressor_param_set(const char *val,
859 const struct kernel_param *kp)
860{
861 return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
862}
863
864static int zswap_zpool_param_set(const char *val,
865 const struct kernel_param *kp)
866{
867 return __zswap_param_set(val, kp, NULL, zswap_compressor);
868}
869
d7b028f5
DS
870static int zswap_enabled_param_set(const char *val,
871 const struct kernel_param *kp)
872{
873 if (zswap_init_failed) {
874 pr_err("can't enable, initialization failed\n");
875 return -ENODEV;
876 }
ae3d89a7
DS
877 if (!zswap_has_pool && zswap_init_started) {
878 pr_err("can't enable, no pool configured\n");
879 return -ENODEV;
880 }
d7b028f5
DS
881
882 return param_set_bool(val, kp);
883}
884
2b281117
SJ
885/*********************************
886* writeback code
887**********************************/
888/* return enum for zswap_get_swap_cache_page */
889enum zswap_get_swap_ret {
890 ZSWAP_SWAPCACHE_NEW,
891 ZSWAP_SWAPCACHE_EXIST,
67d13fe8 892 ZSWAP_SWAPCACHE_FAIL,
2b281117
SJ
893};
894
895/*
896 * zswap_get_swap_cache_page
897 *
898 * This is an adaption of read_swap_cache_async()
899 *
900 * This function tries to find a page with the given swap entry
901 * in the swapper_space address space (the swap cache). If the page
902 * is found, it is returned in retpage. Otherwise, a page is allocated,
903 * added to the swap cache, and returned in retpage.
904 *
905 * If success, the swap cache page is returned in retpage
67d13fe8
WY
906 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache
907 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated,
908 * the new page is added to swapcache and locked
909 * Returns ZSWAP_SWAPCACHE_FAIL on error
2b281117
SJ
910 */
911static int zswap_get_swap_cache_page(swp_entry_t entry,
912 struct page **retpage)
913{
5b999aad 914 bool page_was_allocated;
2b281117 915
5b999aad
DS
916 *retpage = __read_swap_cache_async(entry, GFP_KERNEL,
917 NULL, 0, &page_was_allocated);
918 if (page_was_allocated)
919 return ZSWAP_SWAPCACHE_NEW;
920 if (!*retpage)
67d13fe8 921 return ZSWAP_SWAPCACHE_FAIL;
2b281117
SJ
922 return ZSWAP_SWAPCACHE_EXIST;
923}
924
925/*
926 * Attempts to free an entry by adding a page to the swap cache,
927 * decompressing the entry data into the page, and issuing a
928 * bio write to write the page back to the swap device.
929 *
930 * This can be thought of as a "resumed writeback" of the page
931 * to the swap device. We are basically resuming the same swap
932 * writeback path that was intercepted with the frontswap_store()
933 * in the first place. After the page has been decompressed into
934 * the swap cache, the compressed version stored by zswap can be
935 * freed.
936 */
12d79d64 937static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
2b281117
SJ
938{
939 struct zswap_header *zhdr;
940 swp_entry_t swpentry;
941 struct zswap_tree *tree;
942 pgoff_t offset;
943 struct zswap_entry *entry;
944 struct page *page;
1ec3b5fe
BS
945 struct scatterlist input, output;
946 struct crypto_acomp_ctx *acomp_ctx;
947
fc6697a8 948 u8 *src, *tmp = NULL;
2b281117 949 unsigned int dlen;
0ab0abcf 950 int ret;
2b281117
SJ
951 struct writeback_control wbc = {
952 .sync_mode = WB_SYNC_NONE,
953 };
954
fc6697a8
TT
955 if (!zpool_can_sleep_mapped(pool)) {
956 tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
957 if (!tmp)
958 return -ENOMEM;
959 }
960
2b281117 961 /* extract swpentry from data */
12d79d64 962 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
2b281117 963 swpentry = zhdr->swpentry; /* here */
2b281117
SJ
964 tree = zswap_trees[swp_type(swpentry)];
965 offset = swp_offset(swpentry);
2b281117
SJ
966
967 /* find and ref zswap entry */
968 spin_lock(&tree->lock);
0ab0abcf 969 entry = zswap_entry_find_get(&tree->rbroot, offset);
2b281117
SJ
970 if (!entry) {
971 /* entry was invalidated */
972 spin_unlock(&tree->lock);
068619e3 973 zpool_unmap_handle(pool, handle);
fc6697a8 974 kfree(tmp);
2b281117
SJ
975 return 0;
976 }
2b281117
SJ
977 spin_unlock(&tree->lock);
978 BUG_ON(offset != entry->offset);
979
46b76f2e
ML
980 src = (u8 *)zhdr + sizeof(struct zswap_header);
981 if (!zpool_can_sleep_mapped(pool)) {
982 memcpy(tmp, src, entry->length);
983 src = tmp;
984 zpool_unmap_handle(pool, handle);
985 }
986
2b281117
SJ
987 /* try to allocate swap cache page */
988 switch (zswap_get_swap_cache_page(swpentry, &page)) {
67d13fe8 989 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
2b281117
SJ
990 ret = -ENOMEM;
991 goto fail;
992
67d13fe8 993 case ZSWAP_SWAPCACHE_EXIST:
2b281117 994 /* page is already in the swap cache, ignore for now */
09cbfeaf 995 put_page(page);
2b281117
SJ
996 ret = -EEXIST;
997 goto fail;
998
999 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
1000 /* decompress */
1ec3b5fe 1001 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
2b281117 1002 dlen = PAGE_SIZE;
fc6697a8 1003
1ec3b5fe
BS
1004 mutex_lock(acomp_ctx->mutex);
1005 sg_init_one(&input, src, entry->length);
1006 sg_init_table(&output, 1);
1007 sg_set_page(&output, page, PAGE_SIZE, 0);
1008 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1009 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1010 dlen = acomp_ctx->req->dlen;
1011 mutex_unlock(acomp_ctx->mutex);
1012
2b281117
SJ
1013 BUG_ON(ret);
1014 BUG_ON(dlen != PAGE_SIZE);
1015
1016 /* page is up to date */
1017 SetPageUptodate(page);
1018 }
1019
b349acc7
WY
1020 /* move it to the tail of the inactive list after end_writeback */
1021 SetPageReclaim(page);
1022
2b281117
SJ
1023 /* start writeback */
1024 __swap_writepage(page, &wbc, end_swap_bio_write);
09cbfeaf 1025 put_page(page);
2b281117
SJ
1026 zswap_written_back_pages++;
1027
1028 spin_lock(&tree->lock);
2b281117 1029 /* drop local reference */
0ab0abcf 1030 zswap_entry_put(tree, entry);
2b281117
SJ
1031
1032 /*
0ab0abcf
WY
1033 * There are two possible situations for entry here:
1034 * (1) refcount is 1(normal case), entry is valid and on the tree
1035 * (2) refcount is 0, entry is freed and not on the tree
1036 * because invalidate happened during writeback
1037 * search the tree and free the entry if find entry
1038 */
1039 if (entry == zswap_rb_search(&tree->rbroot, offset))
1040 zswap_entry_put(tree, entry);
2b281117 1041 spin_unlock(&tree->lock);
2b281117 1042
0ab0abcf
WY
1043 goto end;
1044
1045 /*
1046 * if we get here due to ZSWAP_SWAPCACHE_EXIST
c0c641d7
RD
1047 * a load may be happening concurrently.
1048 * it is safe and okay to not free the entry.
0ab0abcf 1049 * if we free the entry in the following put
c0c641d7 1050 * it is also okay to return !0
0ab0abcf 1051 */
2b281117
SJ
1052fail:
1053 spin_lock(&tree->lock);
0ab0abcf 1054 zswap_entry_put(tree, entry);
2b281117 1055 spin_unlock(&tree->lock);
0ab0abcf
WY
1056
1057end:
fc6697a8
TT
1058 if (zpool_can_sleep_mapped(pool))
1059 zpool_unmap_handle(pool, handle);
1060 else
1061 kfree(tmp);
1062
2b281117
SJ
1063 return ret;
1064}
1065
a85f878b
SD
1066static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
1067{
1068 unsigned int pos;
1069 unsigned long *page;
1070
1071 page = (unsigned long *)ptr;
1072 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
1073 if (page[pos] != page[0])
1074 return 0;
1075 }
1076 *value = page[0];
1077 return 1;
1078}
1079
1080static void zswap_fill_page(void *ptr, unsigned long value)
1081{
1082 unsigned long *page;
1083
1084 page = (unsigned long *)ptr;
1085 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
1086}
1087
2b281117
SJ
1088/*********************************
1089* frontswap hooks
1090**********************************/
1091/* attempts to compress and store an single page */
1092static int zswap_frontswap_store(unsigned type, pgoff_t offset,
1093 struct page *page)
1094{
1095 struct zswap_tree *tree = zswap_trees[type];
1096 struct zswap_entry *entry, *dupentry;
1ec3b5fe
BS
1097 struct scatterlist input, output;
1098 struct crypto_acomp_ctx *acomp_ctx;
2b281117 1099 int ret;
9c3760eb 1100 unsigned int hlen, dlen = PAGE_SIZE;
a85f878b 1101 unsigned long handle, value;
2b281117
SJ
1102 char *buf;
1103 u8 *src, *dst;
9c3760eb 1104 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) };
d2fcd82b 1105 gfp_t gfp;
2b281117 1106
7ba71669
HY
1107 /* THP isn't supported */
1108 if (PageTransHuge(page)) {
1109 ret = -EINVAL;
1110 goto reject;
1111 }
1112
c00ed16a 1113 if (!zswap_enabled || !tree) {
2b281117
SJ
1114 ret = -ENODEV;
1115 goto reject;
1116 }
1117
1118 /* reclaim space if needed */
1119 if (zswap_is_full()) {
45190f01
VW
1120 struct zswap_pool *pool;
1121
2b281117 1122 zswap_pool_limit_hit++;
45190f01
VW
1123 zswap_pool_reached_full = true;
1124 pool = zswap_pool_last_get();
1125 if (pool)
1126 queue_work(shrink_wq, &pool->shrink_work);
1127 ret = -ENOMEM;
1128 goto reject;
1129 }
16e536ef 1130
45190f01
VW
1131 if (zswap_pool_reached_full) {
1132 if (!zswap_can_accept()) {
16e536ef
LW
1133 ret = -ENOMEM;
1134 goto reject;
45190f01
VW
1135 } else
1136 zswap_pool_reached_full = false;
2b281117
SJ
1137 }
1138
1139 /* allocate entry */
1140 entry = zswap_entry_cache_alloc(GFP_KERNEL);
1141 if (!entry) {
1142 zswap_reject_kmemcache_fail++;
1143 ret = -ENOMEM;
1144 goto reject;
1145 }
1146
a85f878b
SD
1147 if (zswap_same_filled_pages_enabled) {
1148 src = kmap_atomic(page);
1149 if (zswap_is_page_same_filled(src, &value)) {
1150 kunmap_atomic(src);
1151 entry->offset = offset;
1152 entry->length = 0;
1153 entry->value = value;
1154 atomic_inc(&zswap_same_filled_pages);
1155 goto insert_entry;
1156 }
1157 kunmap_atomic(src);
1158 }
1159
cb325ddd
MS
1160 if (!zswap_non_same_filled_pages_enabled) {
1161 ret = -EINVAL;
1162 goto freepage;
1163 }
1164
f1c54846
DS
1165 /* if entry is successfully added, it keeps the reference */
1166 entry->pool = zswap_pool_current_get();
1167 if (!entry->pool) {
1168 ret = -EINVAL;
1169 goto freepage;
1170 }
1171
2b281117 1172 /* compress */
1ec3b5fe
BS
1173 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1174
1175 mutex_lock(acomp_ctx->mutex);
1176
1177 dst = acomp_ctx->dstmem;
1178 sg_init_table(&input, 1);
1179 sg_set_page(&input, page, PAGE_SIZE, 0);
1180
1181 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */
1182 sg_init_one(&output, dst, PAGE_SIZE * 2);
1183 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
1184 /*
1185 * it maybe looks a little bit silly that we send an asynchronous request,
1186 * then wait for its completion synchronously. This makes the process look
1187 * synchronous in fact.
1188 * Theoretically, acomp supports users send multiple acomp requests in one
1189 * acomp instance, then get those requests done simultaneously. but in this
1190 * case, frontswap actually does store and load page by page, there is no
1191 * existing method to send the second page before the first page is done
1192 * in one thread doing frontswap.
1193 * but in different threads running on different cpu, we have different
1194 * acomp instance, so multiple threads can do (de)compression in parallel.
1195 */
1196 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
1197 dlen = acomp_ctx->req->dlen;
1198
2b281117
SJ
1199 if (ret) {
1200 ret = -EINVAL;
f1c54846 1201 goto put_dstmem;
2b281117
SJ
1202 }
1203
1204 /* store */
9c3760eb 1205 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
d2fcd82b
HZ
1206 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
1207 if (zpool_malloc_support_movable(entry->pool->zpool))
1208 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
1209 ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle);
2b281117
SJ
1210 if (ret == -ENOSPC) {
1211 zswap_reject_compress_poor++;
f1c54846 1212 goto put_dstmem;
2b281117
SJ
1213 }
1214 if (ret) {
1215 zswap_reject_alloc_fail++;
f1c54846 1216 goto put_dstmem;
2b281117 1217 }
ae34af1f 1218 buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO);
9c3760eb
YZ
1219 memcpy(buf, &zhdr, hlen);
1220 memcpy(buf + hlen, dst, dlen);
f1c54846 1221 zpool_unmap_handle(entry->pool->zpool, handle);
1ec3b5fe 1222 mutex_unlock(acomp_ctx->mutex);
2b281117
SJ
1223
1224 /* populate entry */
1225 entry->offset = offset;
1226 entry->handle = handle;
1227 entry->length = dlen;
1228
a85f878b 1229insert_entry:
2b281117
SJ
1230 /* map */
1231 spin_lock(&tree->lock);
1232 do {
1233 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry);
1234 if (ret == -EEXIST) {
1235 zswap_duplicate_entry++;
1236 /* remove from rbtree */
0ab0abcf
WY
1237 zswap_rb_erase(&tree->rbroot, dupentry);
1238 zswap_entry_put(tree, dupentry);
2b281117
SJ
1239 }
1240 } while (ret == -EEXIST);
1241 spin_unlock(&tree->lock);
1242
1243 /* update stats */
1244 atomic_inc(&zswap_stored_pages);
f1c54846 1245 zswap_update_total_size();
f6498b77 1246 count_vm_event(ZSWPOUT);
2b281117
SJ
1247
1248 return 0;
1249
f1c54846 1250put_dstmem:
1ec3b5fe 1251 mutex_unlock(acomp_ctx->mutex);
f1c54846
DS
1252 zswap_pool_put(entry->pool);
1253freepage:
2b281117
SJ
1254 zswap_entry_cache_free(entry);
1255reject:
1256 return ret;
1257}
1258
1259/*
1260 * returns 0 if the page was successfully decompressed
1261 * return -1 on entry not found or error
1262*/
1263static int zswap_frontswap_load(unsigned type, pgoff_t offset,
1264 struct page *page)
1265{
1266 struct zswap_tree *tree = zswap_trees[type];
1267 struct zswap_entry *entry;
1ec3b5fe
BS
1268 struct scatterlist input, output;
1269 struct crypto_acomp_ctx *acomp_ctx;
fc6697a8 1270 u8 *src, *dst, *tmp;
2b281117 1271 unsigned int dlen;
0ab0abcf 1272 int ret;
2b281117
SJ
1273
1274 /* find */
1275 spin_lock(&tree->lock);
0ab0abcf 1276 entry = zswap_entry_find_get(&tree->rbroot, offset);
2b281117
SJ
1277 if (!entry) {
1278 /* entry was written back */
1279 spin_unlock(&tree->lock);
1280 return -1;
1281 }
2b281117
SJ
1282 spin_unlock(&tree->lock);
1283
a85f878b
SD
1284 if (!entry->length) {
1285 dst = kmap_atomic(page);
1286 zswap_fill_page(dst, entry->value);
1287 kunmap_atomic(dst);
fc6697a8 1288 ret = 0;
f6498b77 1289 goto stats;
a85f878b
SD
1290 }
1291
fc6697a8 1292 if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
fc6697a8
TT
1293 tmp = kmalloc(entry->length, GFP_ATOMIC);
1294 if (!tmp) {
1295 ret = -ENOMEM;
1296 goto freeentry;
1297 }
1298 }
1299
2b281117
SJ
1300 /* decompress */
1301 dlen = PAGE_SIZE;
9c3760eb
YZ
1302 src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
1303 if (zpool_evictable(entry->pool->zpool))
1304 src += sizeof(struct zswap_header);
1ec3b5fe 1305
fc6697a8 1306 if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
fc6697a8
TT
1307 memcpy(tmp, src, entry->length);
1308 src = tmp;
fc6697a8
TT
1309 zpool_unmap_handle(entry->pool->zpool, entry->handle);
1310 }
1311
1ec3b5fe
BS
1312 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
1313 mutex_lock(acomp_ctx->mutex);
1314 sg_init_one(&input, src, entry->length);
1315 sg_init_table(&output, 1);
1316 sg_set_page(&output, page, PAGE_SIZE, 0);
1317 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
1318 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
1319 mutex_unlock(acomp_ctx->mutex);
1320
fc6697a8
TT
1321 if (zpool_can_sleep_mapped(entry->pool->zpool))
1322 zpool_unmap_handle(entry->pool->zpool, entry->handle);
1323 else
1324 kfree(tmp);
1325
2b281117 1326 BUG_ON(ret);
f6498b77
JW
1327stats:
1328 count_vm_event(ZSWPIN);
a85f878b 1329freeentry:
2b281117 1330 spin_lock(&tree->lock);
0ab0abcf 1331 zswap_entry_put(tree, entry);
2b281117
SJ
1332 spin_unlock(&tree->lock);
1333
fc6697a8 1334 return ret;
2b281117
SJ
1335}
1336
1337/* frees an entry in zswap */
1338static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
1339{
1340 struct zswap_tree *tree = zswap_trees[type];
1341 struct zswap_entry *entry;
2b281117
SJ
1342
1343 /* find */
1344 spin_lock(&tree->lock);
1345 entry = zswap_rb_search(&tree->rbroot, offset);
1346 if (!entry) {
1347 /* entry was written back */
1348 spin_unlock(&tree->lock);
1349 return;
1350 }
1351
1352 /* remove from rbtree */
0ab0abcf 1353 zswap_rb_erase(&tree->rbroot, entry);
2b281117
SJ
1354
1355 /* drop the initial reference from entry creation */
0ab0abcf 1356 zswap_entry_put(tree, entry);
2b281117
SJ
1357
1358 spin_unlock(&tree->lock);
2b281117
SJ
1359}
1360
1361/* frees all zswap entries for the given swap type */
1362static void zswap_frontswap_invalidate_area(unsigned type)
1363{
1364 struct zswap_tree *tree = zswap_trees[type];
0bd42136 1365 struct zswap_entry *entry, *n;
2b281117
SJ
1366
1367 if (!tree)
1368 return;
1369
1370 /* walk the tree and free everything */
1371 spin_lock(&tree->lock);
0ab0abcf 1372 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
60105e12 1373 zswap_free_entry(entry);
2b281117
SJ
1374 tree->rbroot = RB_ROOT;
1375 spin_unlock(&tree->lock);
aa9bca05
WY
1376 kfree(tree);
1377 zswap_trees[type] = NULL;
2b281117
SJ
1378}
1379
2b281117
SJ
1380static void zswap_frontswap_init(unsigned type)
1381{
1382 struct zswap_tree *tree;
1383
9cd1f701 1384 tree = kzalloc(sizeof(*tree), GFP_KERNEL);
60105e12
MK
1385 if (!tree) {
1386 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
1387 return;
1388 }
1389
2b281117
SJ
1390 tree->rbroot = RB_ROOT;
1391 spin_lock_init(&tree->lock);
1392 zswap_trees[type] = tree;
2b281117
SJ
1393}
1394
1da0d94a 1395static const struct frontswap_ops zswap_frontswap_ops = {
2b281117
SJ
1396 .store = zswap_frontswap_store,
1397 .load = zswap_frontswap_load,
1398 .invalidate_page = zswap_frontswap_invalidate_page,
1399 .invalidate_area = zswap_frontswap_invalidate_area,
1400 .init = zswap_frontswap_init
1401};
1402
1403/*********************************
1404* debugfs functions
1405**********************************/
1406#ifdef CONFIG_DEBUG_FS
1407#include <linux/debugfs.h>
1408
1409static struct dentry *zswap_debugfs_root;
1410
1411static int __init zswap_debugfs_init(void)
1412{
1413 if (!debugfs_initialized())
1414 return -ENODEV;
1415
1416 zswap_debugfs_root = debugfs_create_dir("zswap", NULL);
2b281117 1417
0825a6f9
JP
1418 debugfs_create_u64("pool_limit_hit", 0444,
1419 zswap_debugfs_root, &zswap_pool_limit_hit);
1420 debugfs_create_u64("reject_reclaim_fail", 0444,
1421 zswap_debugfs_root, &zswap_reject_reclaim_fail);
1422 debugfs_create_u64("reject_alloc_fail", 0444,
1423 zswap_debugfs_root, &zswap_reject_alloc_fail);
1424 debugfs_create_u64("reject_kmemcache_fail", 0444,
1425 zswap_debugfs_root, &zswap_reject_kmemcache_fail);
1426 debugfs_create_u64("reject_compress_poor", 0444,
1427 zswap_debugfs_root, &zswap_reject_compress_poor);
1428 debugfs_create_u64("written_back_pages", 0444,
1429 zswap_debugfs_root, &zswap_written_back_pages);
1430 debugfs_create_u64("duplicate_entry", 0444,
1431 zswap_debugfs_root, &zswap_duplicate_entry);
1432 debugfs_create_u64("pool_total_size", 0444,
1433 zswap_debugfs_root, &zswap_pool_total_size);
1434 debugfs_create_atomic_t("stored_pages", 0444,
1435 zswap_debugfs_root, &zswap_stored_pages);
a85f878b 1436 debugfs_create_atomic_t("same_filled_pages", 0444,
0825a6f9 1437 zswap_debugfs_root, &zswap_same_filled_pages);
2b281117
SJ
1438
1439 return 0;
1440}
2b281117
SJ
1441#else
1442static int __init zswap_debugfs_init(void)
1443{
1444 return 0;
1445}
2b281117
SJ
1446#endif
1447
1448/*********************************
1449* module init and exit
1450**********************************/
1451static int __init init_zswap(void)
1452{
f1c54846 1453 struct zswap_pool *pool;
ad7ed770 1454 int ret;
60105e12 1455
90b0fc26
DS
1456 zswap_init_started = true;
1457
2b281117
SJ
1458 if (zswap_entry_cache_create()) {
1459 pr_err("entry cache creation failed\n");
f1c54846 1460 goto cache_fail;
2b281117 1461 }
f1c54846 1462
ad7ed770
SAS
1463 ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare",
1464 zswap_dstmem_prepare, zswap_dstmem_dead);
1465 if (ret) {
f1c54846
DS
1466 pr_err("dstmem alloc failed\n");
1467 goto dstmem_fail;
2b281117 1468 }
f1c54846 1469
cab7a7e5
SAS
1470 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
1471 "mm/zswap_pool:prepare",
1472 zswap_cpu_comp_prepare,
1473 zswap_cpu_comp_dead);
1474 if (ret)
1475 goto hp_fail;
1476
f1c54846 1477 pool = __zswap_pool_create_fallback();
ae3d89a7
DS
1478 if (pool) {
1479 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
1480 zpool_get_type(pool->zpool));
1481 list_add(&pool->list, &zswap_pools);
1482 zswap_has_pool = true;
1483 } else {
f1c54846 1484 pr_err("pool creation failed\n");
ae3d89a7 1485 zswap_enabled = false;
2b281117 1486 }
60105e12 1487
45190f01
VW
1488 shrink_wq = create_workqueue("zswap-shrink");
1489 if (!shrink_wq)
1490 goto fallback_fail;
1491
1da0d94a
CH
1492 ret = frontswap_register_ops(&zswap_frontswap_ops);
1493 if (ret)
1494 goto destroy_wq;
2b281117
SJ
1495 if (zswap_debugfs_init())
1496 pr_warn("debugfs initialization failed\n");
1497 return 0;
f1c54846 1498
1da0d94a
CH
1499destroy_wq:
1500 destroy_workqueue(shrink_wq);
45190f01 1501fallback_fail:
38aeb071
DC
1502 if (pool)
1503 zswap_pool_destroy(pool);
cab7a7e5 1504hp_fail:
ad7ed770 1505 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE);
f1c54846 1506dstmem_fail:
c119239b 1507 zswap_entry_cache_destroy();
f1c54846 1508cache_fail:
d7b028f5
DS
1509 /* if built-in, we aren't unloaded on failure; don't allow use */
1510 zswap_init_failed = true;
1511 zswap_enabled = false;
2b281117
SJ
1512 return -ENOMEM;
1513}
1514/* must be late so crypto has time to come up */
1515late_initcall(init_zswap);
1516
1517MODULE_LICENSE("GPL");
68386da8 1518MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
2b281117 1519MODULE_DESCRIPTION("Compressed cache for swap pages");