mm: zswap: tie per-CPU acomp_ctx lifetime to the pool

author Kanchana P. Sridhar <kanchanapsridhar2026@gmail.com>

Tue, 31 Mar 2026 18:33:51 +0000 (11:33 -0700)

committer Andrew Morton <akpm@linux-foundation.org>

Sat, 18 Apr 2026 07:10:50 +0000 (00:10 -0700)
author Kanchana P. Sridhar <kanchanapsridhar2026@gmail.com>
Tue, 31 Mar 2026 18:33:51 +0000 (11:33 -0700)
committer Andrew Morton <akpm@linux-foundation.org>
Sat, 18 Apr 2026 07:10:50 +0000 (00:10 -0700)
diff --git a/mm/zswap.c b/mm/zswap.c

index c59045b59ffe05df5f9d0939f8113b79b9b82065..4b5149173b0ec5e154bdd4752f410719c7df87e1 100644 (file)
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -242,6 +242,34 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
  **********************************/
  static void __zswap_pool_empty(struct percpu_ref *ref);
  
+static void acomp_ctx_free(struct crypto_acomp_ctx *acomp_ctx)
+{
+       if (!acomp_ctx)
+               return;
+
+       /*
+        * If there was an error in allocating @acomp_ctx->req, it
+        * would be set to NULL.
+        */
+       if (acomp_ctx->req)
+               acomp_request_free(acomp_ctx->req);
+
+       acomp_ctx->req = NULL;
+
+       /*
+        * We have to handle both cases here: an error pointer return from
+        * crypto_alloc_acomp_node(); and a) NULL initialization by zswap, or
+        * b) NULL assignment done in a previous call to acomp_ctx_free().
+        */
+       if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
+               crypto_free_acomp(acomp_ctx->acomp);
+
+       acomp_ctx->acomp = NULL;
+
+       kfree(acomp_ctx->buffer);
+       acomp_ctx->buffer = NULL;
+}
+
  static struct zswap_pool *zswap_pool_create(char *compressor)
  {
         struct zswap_pool *pool;
@@ -263,19 +291,27 @@ static struct zswap_pool *zswap_pool_create(char *compressor)
  
         strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
  
-       pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
+       /* Many things rely on the zero-initialization. */
+       pool->acomp_ctx = alloc_percpu_gfp(*pool->acomp_ctx,
+                                          GFP_KERNEL | __GFP_ZERO);
         if (!pool->acomp_ctx) {
                 pr_err("percpu alloc failed\n");
                 goto error;
         }
  
-       for_each_possible_cpu(cpu)
-               mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex);
-
+       /*
+        * This is serialized against CPU hotplug operations. Hence, cores
+        * cannot be offlined until this finishes.
+        */
         ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
                                        &pool->node);
+
+       /*
+        * cpuhp_state_add_instance() will not cleanup on failure since
+        * we don't register a hotunplug callback.
+        */
         if (ret)
-               goto error;
+               goto cpuhp_add_fail;
  
         /* being the current pool takes 1 ref; this func expects the
          * caller to always add the new pool as the current pool
@@ -292,6 +328,10 @@ static struct zswap_pool *zswap_pool_create(char *compressor)
  
  ref_fail:
         cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+
+cpuhp_add_fail:
+       for_each_possible_cpu(cpu)
+               acomp_ctx_free(per_cpu_ptr(pool->acomp_ctx, cpu));
  error:
         if (pool->acomp_ctx)
                 free_percpu(pool->acomp_ctx);
@@ -322,9 +362,15 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
  
  static void zswap_pool_destroy(struct zswap_pool *pool)
  {
+       int cpu;
+
         zswap_pool_debug("destroying", pool);
  
         cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+
+       for_each_possible_cpu(cpu)
+               acomp_ctx_free(per_cpu_ptr(pool->acomp_ctx, cpu));
+
         free_percpu(pool->acomp_ctx);
  
         zs_destroy_pool(pool->zs_pool);
@@ -738,44 +784,41 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
  {
         struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
         struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
-       struct crypto_acomp *acomp = NULL;
-       struct acomp_req *req = NULL;
-       u8 *buffer = NULL;
-       int ret;
+       int ret = -ENOMEM;
  
-       buffer = kmalloc_node(PAGE_SIZE, GFP_KERNEL, cpu_to_node(cpu));
-       if (!buffer) {
-               ret = -ENOMEM;
-               goto fail;
+       /*
+        * To handle cases where the CPU goes through online-offline-online
+        * transitions, we return if the acomp_ctx has already been initialized.
+        */
+       if (acomp_ctx->acomp) {
+               WARN_ON_ONCE(IS_ERR(acomp_ctx->acomp));
+               return 0;
         }
  
+       acomp_ctx->buffer = kmalloc_node(PAGE_SIZE, GFP_KERNEL, cpu_to_node(cpu));
+       if (!acomp_ctx->buffer)
+               return ret;
+
         /*
          * In case of an error, crypto_alloc_acomp_node() returns an
          * error pointer, never NULL.
          */
-       acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
-       if (IS_ERR(acomp)) {
+       acomp_ctx->acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
+       if (IS_ERR(acomp_ctx->acomp)) {
                 pr_err("could not alloc crypto acomp %s : %pe\n",
-                               pool->tfm_name, acomp);
-               ret = PTR_ERR(acomp);
+                               pool->tfm_name, acomp_ctx->acomp);
+               ret = PTR_ERR(acomp_ctx->acomp);
                 goto fail;
         }
  
         /* acomp_request_alloc() returns NULL in case of an error. */
-       req = acomp_request_alloc(acomp);
-       if (!req) {
+       acomp_ctx->req = acomp_request_alloc(acomp_ctx->acomp);
+       if (!acomp_ctx->req) {
                 pr_err("could not alloc crypto acomp_request %s\n",
                        pool->tfm_name);
-               ret = -ENOMEM;
                 goto fail;
         }
  
-       /*
-        * Only hold the mutex after completing allocations, otherwise we may
-        * recurse into zswap through reclaim and attempt to hold the mutex
-        * again resulting in a deadlock.
-        */
-       mutex_lock(&acomp_ctx->mutex);
         crypto_init_wait(&acomp_ctx->wait);
  
         /*
@@ -783,83 +826,17 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
          * crypto_wait_req(); if the backend of acomp is scomp, the callback
          * won't be called, crypto_wait_req() will return without blocking.
          */
-       acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+       acomp_request_set_callback(acomp_ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
                                    crypto_req_done, &acomp_ctx->wait);
  
-       acomp_ctx->buffer = buffer;
-       acomp_ctx->acomp = acomp;
-       acomp_ctx->req = req;
-       mutex_unlock(&acomp_ctx->mutex);
+       mutex_init(&acomp_ctx->mutex);
         return 0;
  
  fail:
-       if (!IS_ERR_OR_NULL(acomp))
-               crypto_free_acomp(acomp);
-       kfree(buffer);
+       acomp_ctx_free(acomp_ctx);
         return ret;
  }
  
-static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
-{
-       struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
-       struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
-       struct acomp_req *req;
-       struct crypto_acomp *acomp;
-       u8 *buffer;
-
-       if (!acomp_ctx)
-               return 0;
-
-       mutex_lock(&acomp_ctx->mutex);
-       req = acomp_ctx->req;
-       acomp = acomp_ctx->acomp;
-       buffer = acomp_ctx->buffer;
-       acomp_ctx->req = NULL;
-       acomp_ctx->acomp = NULL;
-       acomp_ctx->buffer = NULL;
-       mutex_unlock(&acomp_ctx->mutex);
-
-       /*
-        * Do the actual freeing after releasing the mutex to avoid subtle
-        * locking dependencies causing deadlocks.
-        *
-        * If there was an error in allocating @acomp_ctx->req, it
-        * would be set to NULL.
-        */
-       if (req)
-               acomp_request_free(req);
-       if (!IS_ERR_OR_NULL(acomp))
-               crypto_free_acomp(acomp);
-       kfree(buffer);
-
-       return 0;
-}
-
-static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
-{
-       struct crypto_acomp_ctx *acomp_ctx;
-
-       for (;;) {
-               acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
-               mutex_lock(&acomp_ctx->mutex);
-               if (likely(acomp_ctx->req))
-                       return acomp_ctx;
-               /*
-                * It is possible that we were migrated to a different CPU after
-                * getting the per-CPU ctx but before the mutex was acquired. If
-                * the old CPU got offlined, zswap_cpu_comp_dead() could have
-                * already freed ctx->req (among other things) and set it to
-                * NULL. Just try again on the new CPU that we ended up on.
-                */
-               mutex_unlock(&acomp_ctx->mutex);
-       }
-}
-
-static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
-{
-       mutex_unlock(&acomp_ctx->mutex);
-}
-
  static bool zswap_compress(struct page *page, struct zswap_entry *entry,
                            struct zswap_pool *pool)
  {
@@ -872,7 +849,9 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
         u8 *dst;
         bool mapped = false;
  
-       acomp_ctx = acomp_ctx_get_cpu_lock(pool);
+       acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
+       mutex_lock(&acomp_ctx->mutex);
+
         dst = acomp_ctx->buffer;
         sg_init_table(&input, 1);
         sg_set_page(&input, page, PAGE_SIZE, 0);
@@ -938,7 +917,7 @@ unlock:
         else if (alloc_ret)
                 zswap_reject_alloc_fail++;
  
-       acomp_ctx_put_unlock(acomp_ctx);
+       mutex_unlock(&acomp_ctx->mutex);
         return comp_ret == 0 && alloc_ret == 0;
  }
  
@@ -950,7 +929,8 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
         struct crypto_acomp_ctx *acomp_ctx;
         int ret = 0, dlen;
  
-       acomp_ctx = acomp_ctx_get_cpu_lock(pool);
+       acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
+       mutex_lock(&acomp_ctx->mutex);
         zs_obj_read_sg_begin(pool->zs_pool, entry->handle, input, entry->length);
  
         /* zswap entries of length PAGE_SIZE are not compressed. */
@@ -975,7 +955,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
         }
  
         zs_obj_read_sg_end(pool->zs_pool, entry->handle);
-       acomp_ctx_put_unlock(acomp_ctx);
+       mutex_unlock(&acomp_ctx->mutex);
  
         if (!ret && dlen == PAGE_SIZE)
                 return true;
@@ -1795,7 +1775,7 @@ static int zswap_setup(void)
         ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE,
                                       "mm/zswap_pool:prepare",
                                       zswap_cpu_comp_prepare,
-                                     zswap_cpu_comp_dead);
+                                     NULL);
         if (ret)
                 goto hp_fail;
author	Kanchana P. Sridhar <kanchanapsridhar2026@gmail.com>
	Tue, 31 Mar 2026 18:33:51 +0000 (11:33 -0700)
committer	Andrew Morton <akpm@linux-foundation.org>
	Sat, 18 Apr 2026 07:10:50 +0000 (00:10 -0700)