From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 29 Jan 2025 09:26:03 +0000 (+0100)
Subject: 6.12-stable patches
X-Git-Tag: v6.13.1~40
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=60db0f2e3ddaa21ab1273918f550701661418275;p=thirdparty%2Fkernel%2Fstable-queue.git

6.12-stable patches

added patches:
	mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch
	mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch
---

diff --git a/queue-6.12/mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch b/queue-6.12/mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch
new file mode 100644
index 0000000000..b829965174
--- /dev/null
+++ b/queue-6.12/mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch
@@ -0,0 +1,110 @@
+From 779b9955f64327c339a16f68055af98252fd3315 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosryahmed@google.com>
+Date: Mon, 13 Jan 2025 21:44:58 +0000
+Subject: mm: zswap: move allocations during CPU init outside the lock
+
+From: Yosry Ahmed <yosryahmed@google.com>
+
+commit 779b9955f64327c339a16f68055af98252fd3315 upstream.
+
+In zswap_cpu_comp_prepare(), allocations are made and assigned to various
+members of acomp_ctx under acomp_ctx->mutex.  However, allocations may
+recurse into zswap through reclaim, trying to acquire the same mutex and
+deadlocking.
+
+Move the allocations before the mutex critical section.  Only the
+initialization of acomp_ctx needs to be done with the mutex held.
+
+Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com
+Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug")
+Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
+Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Nhat Pham <nphamcs@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/zswap.c |   42 ++++++++++++++++++++++++------------------
+ 1 file changed, 24 insertions(+), 18 deletions(-)
+
+--- a/mm/zswap.c
++++ b/mm/zswap.c
+@@ -815,15 +815,15 @@ static int zswap_cpu_comp_prepare(unsign
+ {
+ 	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
+ 	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+-	struct crypto_acomp *acomp;
+-	struct acomp_req *req;
++	struct crypto_acomp *acomp = NULL;
++	struct acomp_req *req = NULL;
++	u8 *buffer = NULL;
+ 	int ret;
+ 
+-	mutex_lock(&acomp_ctx->mutex);
+-	acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+-	if (!acomp_ctx->buffer) {
++	buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
++	if (!buffer) {
+ 		ret = -ENOMEM;
+-		goto buffer_fail;
++		goto fail;
+ 	}
+ 
+ 	acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
+@@ -831,21 +831,25 @@ static int zswap_cpu_comp_prepare(unsign
+ 		pr_err("could not alloc crypto acomp %s : %ld\n",
+ 				pool->tfm_name, PTR_ERR(acomp));
+ 		ret = PTR_ERR(acomp);
+-		goto acomp_fail;
++		goto fail;
+ 	}
+-	acomp_ctx->acomp = acomp;
+-	acomp_ctx->is_sleepable = acomp_is_async(acomp);
+ 
+-	req = acomp_request_alloc(acomp_ctx->acomp);
++	req = acomp_request_alloc(acomp);
+ 	if (!req) {
+ 		pr_err("could not alloc crypto acomp_request %s\n",
+ 		       pool->tfm_name);
+ 		ret = -ENOMEM;
+-		goto req_fail;
++		goto fail;
+ 	}
+-	acomp_ctx->req = req;
+ 
++	/*
++	 * Only hold the mutex after completing allocations, otherwise we may
++	 * recurse into zswap through reclaim and attempt to hold the mutex
++	 * again resulting in a deadlock.
++	 */
++	mutex_lock(&acomp_ctx->mutex);
+ 	crypto_init_wait(&acomp_ctx->wait);
++
+ 	/*
+ 	 * if the backend of acomp is async zip, crypto_req_done() will wakeup
+ 	 * crypto_wait_req(); if the backend of acomp is scomp, the callback
+@@ -854,15 +858,17 @@ static int zswap_cpu_comp_prepare(unsign
+ 	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ 				   crypto_req_done, &acomp_ctx->wait);
+ 
++	acomp_ctx->buffer = buffer;
++	acomp_ctx->acomp = acomp;
++	acomp_ctx->is_sleepable = acomp_is_async(acomp);
++	acomp_ctx->req = req;
+ 	mutex_unlock(&acomp_ctx->mutex);
+ 	return 0;
+ 
+-req_fail:
+-	crypto_free_acomp(acomp_ctx->acomp);
+-acomp_fail:
+-	kfree(acomp_ctx->buffer);
+-buffer_fail:
+-	mutex_unlock(&acomp_ctx->mutex);
++fail:
++	if (acomp)
++		crypto_free_acomp(acomp);
++	kfree(buffer);
+ 	return ret;
+ }
+ 
diff --git a/queue-6.12/mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch b/queue-6.12/mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch
new file mode 100644
index 0000000000..7c18ddb84b
--- /dev/null
+++ b/queue-6.12/mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch
@@ -0,0 +1,224 @@
+From 12dcb0ef540629a281533f9dedc1b6b8e14cfb65 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosryahmed@google.com>
+Date: Wed, 8 Jan 2025 22:24:41 +0000
+Subject: mm: zswap: properly synchronize freeing resources during CPU hotunplug
+
+From: Yosry Ahmed <yosryahmed@google.com>
+
+commit 12dcb0ef540629a281533f9dedc1b6b8e14cfb65 upstream.
+
+In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the
+current CPU at the beginning of the operation is retrieved and used
+throughout.  However, since neither preemption nor migration are disabled,
+it is possible that the operation continues on a different CPU.
+
+If the original CPU is hotunplugged while the acomp_ctx is still in use,
+we run into a UAF bug as some of the resources attached to the acomp_ctx
+are freed during hotunplug in zswap_cpu_comp_dead() (i.e.
+acomp_ctx.buffer, acomp_ctx.req, or acomp_ctx.acomp).
+
+The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to use
+crypto_acomp API for hardware acceleration") when the switch to the
+crypto_acomp API was made.  Prior to that, the per-CPU crypto_comp was
+retrieved using get_cpu_ptr() which disables preemption and makes sure the
+CPU cannot go away from under us.  Preemption cannot be disabled with the
+crypto_acomp API as a sleepable context is needed.
+
+Use the acomp_ctx.mutex to synchronize CPU hotplug callbacks allocating
+and freeing resources with compression/decompression paths.  Make sure
+that acomp_ctx.req is NULL when the resources are freed.  In the
+compression/decompression paths, check if acomp_ctx.req is NULL after
+acquiring the mutex (meaning the CPU was offlined) and retry on the new
+CPU.
+
+The initialization of acomp_ctx.mutex is moved from the CPU hotplug
+callback to the pool initialization where it belongs (where the mutex is
+allocated).  In addition to adding clarity, this makes sure that CPU
+hotplug cannot reinitialize a mutex that is already locked by
+compression/decompression.
+
+Previously a fix was attempted by holding cpus_read_lock() [1].  This
+would have caused a potential deadlock as it is possible for code already
+holding the lock to fall into reclaim and enter zswap (causing a
+deadlock).  A fix was also attempted using SRCU for synchronization, but
+Johannes pointed out that synchronize_srcu() cannot be used in CPU hotplug
+notifiers [2].
+
+Alternative fixes that were considered/attempted and could have worked:
+- Refcounting the per-CPU acomp_ctx. This involves complexity in
+  handling the race between the refcount dropping to zero in
+  zswap_[de]compress() and the refcount being re-initialized when the
+  CPU is onlined.
+- Disabling migration before getting the per-CPU acomp_ctx [3], but
+  that's discouraged and is a much bigger hammer than needed, and could
+  result in subtle performance issues.
+
+[1]https://lkml.kernel.org/20241219212437.2714151-1-yosryahmed@google.com/
+[2]https://lkml.kernel.org/20250107074724.1756696-2-yosryahmed@google.com/
+[3]https://lkml.kernel.org/20250107222236.2715883-2-yosryahmed@google.com/
+
+[yosryahmed@google.com: remove comment]
+  Link: https://lkml.kernel.org/r/CAJD7tkaxS1wjn+swugt8QCvQ-rVF5RZnjxwPGX17k8x9zSManA@mail.gmail.com
+Link: https://lkml.kernel.org/r/20250108222441.3622031-1-yosryahmed@google.com
+Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
+Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
+Reported-by: Johannes Weiner <hannes@cmpxchg.org>
+Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/
+Reported-by: Sam Sun <samsun1006219@gmail.com>
+Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tPg6OaQ@mail.gmail.com/
+Cc: Barry Song <baohua@kernel.org>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
+Cc: Nhat Pham <nphamcs@gmail.com>
+Cc: Vitaly Wool <vitalywool@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/zswap.c |   58 ++++++++++++++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 44 insertions(+), 14 deletions(-)
+
+--- a/mm/zswap.c
++++ b/mm/zswap.c
+@@ -251,7 +251,7 @@ static struct zswap_pool *zswap_pool_cre
+ 	struct zswap_pool *pool;
+ 	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
+ 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+-	int ret;
++	int ret, cpu;
+ 
+ 	if (!zswap_has_pool) {
+ 		/* if either are unset, pool initialization failed, and we
+@@ -285,6 +285,9 @@ static struct zswap_pool *zswap_pool_cre
+ 		goto error;
+ 	}
+ 
++	for_each_possible_cpu(cpu)
++		mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex);
++
+ 	ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
+ 				       &pool->node);
+ 	if (ret)
+@@ -816,11 +819,12 @@ static int zswap_cpu_comp_prepare(unsign
+ 	struct acomp_req *req;
+ 	int ret;
+ 
+-	mutex_init(&acomp_ctx->mutex);
+-
++	mutex_lock(&acomp_ctx->mutex);
+ 	acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu));
+-	if (!acomp_ctx->buffer)
+-		return -ENOMEM;
++	if (!acomp_ctx->buffer) {
++		ret = -ENOMEM;
++		goto buffer_fail;
++	}
+ 
+ 	acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu));
+ 	if (IS_ERR(acomp)) {
+@@ -850,12 +854,15 @@ static int zswap_cpu_comp_prepare(unsign
+ 	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ 				   crypto_req_done, &acomp_ctx->wait);
+ 
++	mutex_unlock(&acomp_ctx->mutex);
+ 	return 0;
+ 
+ req_fail:
+ 	crypto_free_acomp(acomp_ctx->acomp);
+ acomp_fail:
+ 	kfree(acomp_ctx->buffer);
++buffer_fail:
++	mutex_unlock(&acomp_ctx->mutex);
+ 	return ret;
+ }
+ 
+@@ -864,17 +871,45 @@ static int zswap_cpu_comp_dead(unsigned
+ 	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
+ 	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+ 
++	mutex_lock(&acomp_ctx->mutex);
+ 	if (!IS_ERR_OR_NULL(acomp_ctx)) {
+ 		if (!IS_ERR_OR_NULL(acomp_ctx->req))
+ 			acomp_request_free(acomp_ctx->req);
++		acomp_ctx->req = NULL;
+ 		if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
+ 			crypto_free_acomp(acomp_ctx->acomp);
+ 		kfree(acomp_ctx->buffer);
+ 	}
++	mutex_unlock(&acomp_ctx->mutex);
+ 
+ 	return 0;
+ }
+ 
++static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool)
++{
++	struct crypto_acomp_ctx *acomp_ctx;
++
++	for (;;) {
++		acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
++		mutex_lock(&acomp_ctx->mutex);
++		if (likely(acomp_ctx->req))
++			return acomp_ctx;
++		/*
++		 * It is possible that we were migrated to a different CPU after
++		 * getting the per-CPU ctx but before the mutex was acquired. If
++		 * the old CPU got offlined, zswap_cpu_comp_dead() could have
++		 * already freed ctx->req (among other things) and set it to
++		 * NULL. Just try again on the new CPU that we ended up on.
++		 */
++		mutex_unlock(&acomp_ctx->mutex);
++	}
++}
++
++static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
++{
++	mutex_unlock(&acomp_ctx->mutex);
++}
++
+ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
+ {
+ 	struct crypto_acomp_ctx *acomp_ctx;
+@@ -887,10 +922,7 @@ static bool zswap_compress(struct folio
+ 	gfp_t gfp;
+ 	u8 *dst;
+ 
+-	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+-
+-	mutex_lock(&acomp_ctx->mutex);
+-
++	acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
+ 	dst = acomp_ctx->buffer;
+ 	sg_init_table(&input, 1);
+ 	sg_set_folio(&input, folio, PAGE_SIZE, 0);
+@@ -943,7 +975,7 @@ unlock:
+ 	else if (alloc_ret)
+ 		zswap_reject_alloc_fail++;
+ 
+-	mutex_unlock(&acomp_ctx->mutex);
++	acomp_ctx_put_unlock(acomp_ctx);
+ 	return comp_ret == 0 && alloc_ret == 0;
+ }
+ 
+@@ -954,9 +986,7 @@ static void zswap_decompress(struct zswa
+ 	struct crypto_acomp_ctx *acomp_ctx;
+ 	u8 *src;
+ 
+-	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+-	mutex_lock(&acomp_ctx->mutex);
+-
++	acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool);
+ 	src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
+ 	/*
+ 	 * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
+@@ -980,10 +1010,10 @@ static void zswap_decompress(struct zswa
+ 	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
+ 	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
+ 	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+-	mutex_unlock(&acomp_ctx->mutex);
+ 
+ 	if (src != acomp_ctx->buffer)
+ 		zpool_unmap_handle(zpool, entry->handle);
++	acomp_ctx_put_unlock(acomp_ctx);
+ }
+ 
+ /*********************************
diff --git a/queue-6.12/series b/queue-6.12/series
index 26c6f9cb91..73f1154ca8 100644
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -12,3 +12,5 @@ drm-connector-hdmi-validate-supported_formats-matche.patch
 irqchip-sunxi-nmi-add-missing-skip_wake-flag.patch
 hwmon-drivetemp-set-scsi-command-timeout-to-10s.patch
 asoc-samsung-add-missing-depends-on-i2c.patch
+mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch
+mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch