From: Greg Kroah-Hartman Date: Wed, 29 Jan 2025 09:26:03 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.13.1~40 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=60db0f2e3ddaa21ab1273918f550701661418275;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch --- diff --git a/queue-6.12/mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch b/queue-6.12/mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch new file mode 100644 index 0000000000..b829965174 --- /dev/null +++ b/queue-6.12/mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch @@ -0,0 +1,110 @@ +From 779b9955f64327c339a16f68055af98252fd3315 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Mon, 13 Jan 2025 21:44:58 +0000 +Subject: mm: zswap: move allocations during CPU init outside the lock + +From: Yosry Ahmed + +commit 779b9955f64327c339a16f68055af98252fd3315 upstream. + +In zswap_cpu_comp_prepare(), allocations are made and assigned to various +members of acomp_ctx under acomp_ctx->mutex. However, allocations may +recurse into zswap through reclaim, trying to acquire the same mutex and +deadlocking. + +Move the allocations before the mutex critical section. Only the +initialization of acomp_ctx needs to be done with the mutex held. + +Link: https://lkml.kernel.org/r/20250113214458.2123410-1-yosryahmed@google.com +Fixes: 12dcb0ef5406 ("mm: zswap: properly synchronize freeing resources during CPU hotunplug") +Signed-off-by: Yosry Ahmed +Reviewed-by: Chengming Zhou +Cc: Johannes Weiner +Cc: Nhat Pham +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/zswap.c | 42 ++++++++++++++++++++++++------------------ + 1 file changed, 24 insertions(+), 18 deletions(-) + +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@ -815,15 +815,15 @@ static int zswap_cpu_comp_prepare(unsign + { + struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); + struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); +- struct crypto_acomp *acomp; +- struct acomp_req *req; ++ struct crypto_acomp *acomp = NULL; ++ struct acomp_req *req = NULL; ++ u8 *buffer = NULL; + int ret; + +- mutex_lock(&acomp_ctx->mutex); +- acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); +- if (!acomp_ctx->buffer) { ++ buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); ++ if (!buffer) { + ret = -ENOMEM; +- goto buffer_fail; ++ goto fail; + } + + acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); +@@ -831,21 +831,25 @@ static int zswap_cpu_comp_prepare(unsign + pr_err("could not alloc crypto acomp %s : %ld\n", + pool->tfm_name, PTR_ERR(acomp)); + ret = PTR_ERR(acomp); +- goto acomp_fail; ++ goto fail; + } +- acomp_ctx->acomp = acomp; +- acomp_ctx->is_sleepable = acomp_is_async(acomp); + +- req = acomp_request_alloc(acomp_ctx->acomp); ++ req = acomp_request_alloc(acomp); + if (!req) { + pr_err("could not alloc crypto acomp_request %s\n", + pool->tfm_name); + ret = -ENOMEM; +- goto req_fail; ++ goto fail; + } +- acomp_ctx->req = req; + ++ /* ++ * Only hold the mutex after completing allocations, otherwise we may ++ * recurse into zswap through reclaim and attempt to hold the mutex ++ * again resulting in a deadlock. ++ */ ++ mutex_lock(&acomp_ctx->mutex); + crypto_init_wait(&acomp_ctx->wait); ++ + /* + * if the backend of acomp is async zip, crypto_req_done() will wakeup + * crypto_wait_req(); if the backend of acomp is scomp, the callback +@@ -854,15 +858,17 @@ static int zswap_cpu_comp_prepare(unsign + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &acomp_ctx->wait); + ++ acomp_ctx->buffer = buffer; ++ acomp_ctx->acomp = acomp; ++ acomp_ctx->is_sleepable = acomp_is_async(acomp); ++ acomp_ctx->req = req; + mutex_unlock(&acomp_ctx->mutex); + return 0; + +-req_fail: +- crypto_free_acomp(acomp_ctx->acomp); +-acomp_fail: +- kfree(acomp_ctx->buffer); +-buffer_fail: +- mutex_unlock(&acomp_ctx->mutex); ++fail: ++ if (acomp) ++ crypto_free_acomp(acomp); ++ kfree(buffer); + return ret; + } + diff --git a/queue-6.12/mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch b/queue-6.12/mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch new file mode 100644 index 0000000000..7c18ddb84b --- /dev/null +++ b/queue-6.12/mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch @@ -0,0 +1,224 @@ +From 12dcb0ef540629a281533f9dedc1b6b8e14cfb65 Mon Sep 17 00:00:00 2001 +From: Yosry Ahmed +Date: Wed, 8 Jan 2025 22:24:41 +0000 +Subject: mm: zswap: properly synchronize freeing resources during CPU hotunplug + +From: Yosry Ahmed + +commit 12dcb0ef540629a281533f9dedc1b6b8e14cfb65 upstream. + +In zswap_compress() and zswap_decompress(), the per-CPU acomp_ctx of the +current CPU at the beginning of the operation is retrieved and used +throughout. However, since neither preemption nor migration are disabled, +it is possible that the operation continues on a different CPU. + +If the original CPU is hotunplugged while the acomp_ctx is still in use, +we run into a UAF bug as some of the resources attached to the acomp_ctx +are freed during hotunplug in zswap_cpu_comp_dead() (i.e. +acomp_ctx.buffer, acomp_ctx.req, or acomp_ctx.acomp). + +The problem was introduced in commit 1ec3b5fe6eec ("mm/zswap: move to use +crypto_acomp API for hardware acceleration") when the switch to the +crypto_acomp API was made. Prior to that, the per-CPU crypto_comp was +retrieved using get_cpu_ptr() which disables preemption and makes sure the +CPU cannot go away from under us. Preemption cannot be disabled with the +crypto_acomp API as a sleepable context is needed. + +Use the acomp_ctx.mutex to synchronize CPU hotplug callbacks allocating +and freeing resources with compression/decompression paths. Make sure +that acomp_ctx.req is NULL when the resources are freed. In the +compression/decompression paths, check if acomp_ctx.req is NULL after +acquiring the mutex (meaning the CPU was offlined) and retry on the new +CPU. + +The initialization of acomp_ctx.mutex is moved from the CPU hotplug +callback to the pool initialization where it belongs (where the mutex is +allocated). In addition to adding clarity, this makes sure that CPU +hotplug cannot reinitialize a mutex that is already locked by +compression/decompression. + +Previously a fix was attempted by holding cpus_read_lock() [1]. This +would have caused a potential deadlock as it is possible for code already +holding the lock to fall into reclaim and enter zswap (causing a +deadlock). A fix was also attempted using SRCU for synchronization, but +Johannes pointed out that synchronize_srcu() cannot be used in CPU hotplug +notifiers [2]. + +Alternative fixes that were considered/attempted and could have worked: +- Refcounting the per-CPU acomp_ctx. This involves complexity in + handling the race between the refcount dropping to zero in + zswap_[de]compress() and the refcount being re-initialized when the + CPU is onlined. +- Disabling migration before getting the per-CPU acomp_ctx [3], but + that's discouraged and is a much bigger hammer than needed, and could + result in subtle performance issues. + +[1]https://lkml.kernel.org/20241219212437.2714151-1-yosryahmed@google.com/ +[2]https://lkml.kernel.org/20250107074724.1756696-2-yosryahmed@google.com/ +[3]https://lkml.kernel.org/20250107222236.2715883-2-yosryahmed@google.com/ + +[yosryahmed@google.com: remove comment] + Link: https://lkml.kernel.org/r/CAJD7tkaxS1wjn+swugt8QCvQ-rVF5RZnjxwPGX17k8x9zSManA@mail.gmail.com +Link: https://lkml.kernel.org/r/20250108222441.3622031-1-yosryahmed@google.com +Fixes: 1ec3b5fe6eec ("mm/zswap: move to use crypto_acomp API for hardware acceleration") +Signed-off-by: Yosry Ahmed +Reported-by: Johannes Weiner +Closes: https://lore.kernel.org/lkml/20241113213007.GB1564047@cmpxchg.org/ +Reported-by: Sam Sun +Closes: https://lore.kernel.org/lkml/CAEkJfYMtSdM5HceNsXUDf5haghD5+o2e7Qv4OcuruL4tPg6OaQ@mail.gmail.com/ +Cc: Barry Song +Cc: Chengming Zhou +Cc: Kanchana P Sridhar +Cc: Nhat Pham +Cc: Vitaly Wool +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/zswap.c | 58 ++++++++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 44 insertions(+), 14 deletions(-) + +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@ -251,7 +251,7 @@ static struct zswap_pool *zswap_pool_cre + struct zswap_pool *pool; + char name[38]; /* 'zswap' + 32 char (max) num + \0 */ + gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; +- int ret; ++ int ret, cpu; + + if (!zswap_has_pool) { + /* if either are unset, pool initialization failed, and we +@@ -285,6 +285,9 @@ static struct zswap_pool *zswap_pool_cre + goto error; + } + ++ for_each_possible_cpu(cpu) ++ mutex_init(&per_cpu_ptr(pool->acomp_ctx, cpu)->mutex); ++ + ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, + &pool->node); + if (ret) +@@ -816,11 +819,12 @@ static int zswap_cpu_comp_prepare(unsign + struct acomp_req *req; + int ret; + +- mutex_init(&acomp_ctx->mutex); +- ++ mutex_lock(&acomp_ctx->mutex); + acomp_ctx->buffer = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); +- if (!acomp_ctx->buffer) +- return -ENOMEM; ++ if (!acomp_ctx->buffer) { ++ ret = -ENOMEM; ++ goto buffer_fail; ++ } + + acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); + if (IS_ERR(acomp)) { +@@ -850,12 +854,15 @@ static int zswap_cpu_comp_prepare(unsign + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &acomp_ctx->wait); + ++ mutex_unlock(&acomp_ctx->mutex); + return 0; + + req_fail: + crypto_free_acomp(acomp_ctx->acomp); + acomp_fail: + kfree(acomp_ctx->buffer); ++buffer_fail: ++ mutex_unlock(&acomp_ctx->mutex); + return ret; + } + +@@ -864,17 +871,45 @@ static int zswap_cpu_comp_dead(unsigned + struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); + struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); + ++ mutex_lock(&acomp_ctx->mutex); + if (!IS_ERR_OR_NULL(acomp_ctx)) { + if (!IS_ERR_OR_NULL(acomp_ctx->req)) + acomp_request_free(acomp_ctx->req); ++ acomp_ctx->req = NULL; + if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) + crypto_free_acomp(acomp_ctx->acomp); + kfree(acomp_ctx->buffer); + } ++ mutex_unlock(&acomp_ctx->mutex); + + return 0; + } + ++static struct crypto_acomp_ctx *acomp_ctx_get_cpu_lock(struct zswap_pool *pool) ++{ ++ struct crypto_acomp_ctx *acomp_ctx; ++ ++ for (;;) { ++ acomp_ctx = raw_cpu_ptr(pool->acomp_ctx); ++ mutex_lock(&acomp_ctx->mutex); ++ if (likely(acomp_ctx->req)) ++ return acomp_ctx; ++ /* ++ * It is possible that we were migrated to a different CPU after ++ * getting the per-CPU ctx but before the mutex was acquired. If ++ * the old CPU got offlined, zswap_cpu_comp_dead() could have ++ * already freed ctx->req (among other things) and set it to ++ * NULL. Just try again on the new CPU that we ended up on. ++ */ ++ mutex_unlock(&acomp_ctx->mutex); ++ } ++} ++ ++static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx) ++{ ++ mutex_unlock(&acomp_ctx->mutex); ++} ++ + static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) + { + struct crypto_acomp_ctx *acomp_ctx; +@@ -887,10 +922,7 @@ static bool zswap_compress(struct folio + gfp_t gfp; + u8 *dst; + +- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); +- +- mutex_lock(&acomp_ctx->mutex); +- ++ acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); + dst = acomp_ctx->buffer; + sg_init_table(&input, 1); + sg_set_folio(&input, folio, PAGE_SIZE, 0); +@@ -943,7 +975,7 @@ unlock: + else if (alloc_ret) + zswap_reject_alloc_fail++; + +- mutex_unlock(&acomp_ctx->mutex); ++ acomp_ctx_put_unlock(acomp_ctx); + return comp_ret == 0 && alloc_ret == 0; + } + +@@ -954,9 +986,7 @@ static void zswap_decompress(struct zswa + struct crypto_acomp_ctx *acomp_ctx; + u8 *src; + +- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); +- mutex_lock(&acomp_ctx->mutex); +- ++ acomp_ctx = acomp_ctx_get_cpu_lock(entry->pool); + src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO); + /* + * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer +@@ -980,10 +1010,10 @@ static void zswap_decompress(struct zswa + acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); + BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); + BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); +- mutex_unlock(&acomp_ctx->mutex); + + if (src != acomp_ctx->buffer) + zpool_unmap_handle(zpool, entry->handle); ++ acomp_ctx_put_unlock(acomp_ctx); + } + + /********************************* diff --git a/queue-6.12/series b/queue-6.12/series index 26c6f9cb91..73f1154ca8 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -12,3 +12,5 @@ drm-connector-hdmi-validate-supported_formats-matche.patch irqchip-sunxi-nmi-add-missing-skip_wake-flag.patch hwmon-drivetemp-set-scsi-command-timeout-to-10s.patch asoc-samsung-add-missing-depends-on-i2c.patch +mm-zswap-properly-synchronize-freeing-resources-during-cpu-hotunplug.patch +mm-zswap-move-allocations-during-cpu-init-outside-the-lock.patch