From: Greg Kroah-Hartman Date: Thu, 19 Mar 2026 10:41:13 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.18.19~14 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a7820040c00800dbc0409beac704f0159ea3b12b;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: binfmt_misc-restore-write-access-before-closing-files-opened-by-open_exec.patch blk-throttle-fix-access-race-during-throttle-policy-activation.patch dmaengine-mmp_pdma-fix-race-condition-in-mmp_pdma_residue.patch drm-i915-alpm-alpm-disable-fixes.patch drm-i915-psr-repeat-selective-update-area-alignment.patch erofs-fix-inline-data-read-failure-for-ztailpacking-pclusters.patch f2fs-compress-change-the-first-parameter-of-page_array_-alloc-free-to-sbi.patch f2fs-compress-fix-uaf-of-f2fs_inode_info-in-f2fs_free_dic.patch f2fs-fix-to-avoid-migrating-empty-section.patch io_uring-kbuf-check-if-target-buffer-list-is-still-legacy-on-recycle.patch media-i2c-ov5647-use-our-own-mutex-for-the-ctrl-lock.patch mm-thp-deny-thp-for-files-on-anonymous-inodes.patch mptcp-pm-in-kernel-always-set-id-as-avail-when-rm-endp.patch net-dsa-properly-keep-track-of-conduit-reference.patch net-stmmac-remove-support-for-lpi_intr_o.patch s390-stackleak-fix-__stackleak_poison-inline-assembly-constraint.patch s390-xor-fix-xor_xc_2-inline-assembly-constraints.patch sched-fair-fix-zero_vruntime-tracking.patch sched_ext-remove-redundant-css_put-in-scx_cgroup_init.patch xfs-get-rid-of-the-xchk_xfile_-_descr-calls.patch --- diff --git a/queue-6.12/binfmt_misc-restore-write-access-before-closing-files-opened-by-open_exec.patch b/queue-6.12/binfmt_misc-restore-write-access-before-closing-files-opened-by-open_exec.patch new file mode 100644 index 0000000000..dec4ecfa02 --- /dev/null +++ b/queue-6.12/binfmt_misc-restore-write-access-before-closing-files-opened-by-open_exec.patch @@ -0,0 +1,50 @@ +From stable+bounces-222980-greg=kroah.com@vger.kernel.org Wed Mar 4 06:44:15 2026 +From: Robert Garcia +Date: Wed, 4 Mar 2026 13:43:11 +0800 +Subject: binfmt_misc: restore write access before closing files opened by open_exec() +To: stable@vger.kernel.org, Zilin Guan +Cc: Christian Brauner , Alexander Viro , Robert Garcia , Jan Kara , Eric Biederman , Kees Cook , Andrew Morton , Helge Deller , Lior Ribak , linux-fsdevel@vger.kernel.org, linux-mm@kvack.org, linux-kernel@vger.kernel.org +Message-ID: <20260304054311.108543-1-rob_garcia@163.com> + +From: Zilin Guan + +[ Upstream commit 90f601b497d76f40fa66795c3ecf625b6aced9fd ] + +bm_register_write() opens an executable file using open_exec(), which +internally calls do_open_execat() and denies write access on the file to +avoid modification while it is being executed. + +However, when an error occurs, bm_register_write() closes the file using +filp_close() directly. This does not restore the write permission, which +may cause subsequent write operations on the same file to fail. + +Fix this by calling exe_file_allow_write_access() before filp_close() to +restore the write permission properly. + +Fixes: e7850f4d844e ("binfmt_misc: fix possible deadlock in bm_register_write") +Signed-off-by: Zilin Guan +Link: https://patch.msgid.link/20251105022923.1813587-1-zilin@seu.edu.cn +Signed-off-by: Christian Brauner +[ Use allow_write_access() instead of exe_file_allow_write_access() +according to commit 0357ef03c94ef +("fs: don't block write during exec on pre-content watched files"). ] +Signed-off-by: Robert Garcia +Signed-off-by: Greg Kroah-Hartman +--- + fs/binfmt_misc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/binfmt_misc.c ++++ b/fs/binfmt_misc.c +@@ -875,8 +875,10 @@ out: + inode_unlock(d_inode(root)); + + if (err) { +- if (f) ++ if (f) { ++ allow_write_access(f); + filp_close(f, NULL); ++ } + kfree(e); + return err; + } diff --git a/queue-6.12/blk-throttle-fix-access-race-during-throttle-policy-activation.patch b/queue-6.12/blk-throttle-fix-access-race-during-throttle-policy-activation.patch new file mode 100644 index 0000000000..7e131e1088 --- /dev/null +++ b/queue-6.12/blk-throttle-fix-access-race-during-throttle-policy-activation.patch @@ -0,0 +1,164 @@ +From stable+bounces-219918-greg=kroah.com@vger.kernel.org Fri Feb 27 06:43:04 2026 +From: Robert Garcia +Date: Fri, 27 Feb 2026 13:42:23 +0800 +Subject: blk-throttle: fix access race during throttle policy activation +To: stable@vger.kernel.org, Han Guangjiang +Cc: Jens Axboe , Robert Garcia , Liang Jie , Yu Kuai , Tejun Heo , Josef Bacik , cgroups@vger.kernel.org, linux-block@vger.kernel.org, linux-kernel@vger.kernel.org +Message-ID: <20260227054223.1552598-1-rob_garcia@163.com> + +From: Han Guangjiang + +[ Upstream commit bd9fd5be6bc0836820500f68fff144609fbd85a9 ] + +On repeated cold boots we occasionally hit a NULL pointer crash in +blk_should_throtl() when throttling is consulted before the throttle +policy is fully enabled for the queue. Checking only q->td != NULL is +insufficient during early initialization, so blkg_to_pd() for the +throttle policy can still return NULL and blkg_to_tg() becomes NULL, +which later gets dereferenced. + + Unable to handle kernel NULL pointer dereference + at virtual address 0000000000000156 + ... + pc : submit_bio_noacct+0x14c/0x4c8 + lr : submit_bio_noacct+0x48/0x4c8 + sp : ffff800087f0b690 + x29: ffff800087f0b690 x28: 0000000000005f90 x27: ffff00068af393c0 + x26: 0000000000080000 x25: 000000000002fbc0 x24: ffff000684ddcc70 + x23: 0000000000000000 x22: 0000000000000000 x21: 0000000000000000 + x20: 0000000000080000 x19: ffff000684ddcd08 x18: ffffffffffffffff + x17: 0000000000000000 x16: ffff80008132a550 x15: 0000ffff98020fff + x14: 0000000000000000 x13: 1fffe000d11d7021 x12: ffff000688eb810c + x11: ffff00077ec4bb80 x10: ffff000688dcb720 x9 : ffff80008068ef60 + x8 : 00000a6fb8a86e85 x7 : 000000000000111e x6 : 0000000000000002 + x5 : 0000000000000246 x4 : 0000000000015cff x3 : 0000000000394500 + x2 : ffff000682e35e40 x1 : 0000000000364940 x0 : 000000000000001a + Call trace: + submit_bio_noacct+0x14c/0x4c8 + verity_map+0x178/0x2c8 + __map_bio+0x228/0x250 + dm_submit_bio+0x1c4/0x678 + __submit_bio+0x170/0x230 + submit_bio_noacct_nocheck+0x16c/0x388 + submit_bio_noacct+0x16c/0x4c8 + submit_bio+0xb4/0x210 + f2fs_submit_read_bio+0x4c/0xf0 + f2fs_mpage_readpages+0x3b0/0x5f0 + f2fs_readahead+0x90/0xe8 + +Tighten blk_throtl_activated() to also require that the throttle policy +bit is set on the queue: + + return q->td != NULL && + test_bit(blkcg_policy_throtl.plid, q->blkcg_pols); + +This prevents blk_should_throtl() from accessing throttle group state +until policy data has been attached to blkgs. + +Fixes: a3166c51702b ("blk-throttle: delay initialization until configuration") +Co-developed-by: Liang Jie +Signed-off-by: Liang Jie +Signed-off-by: Han Guangjiang +Reviewed-by: Yu Kuai +Signed-off-by: Jens Axboe +Signed-off-by: Robert Garcia +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-cgroup.c | 6 ------ + block/blk-cgroup.h | 6 ++++++ + block/blk-throttle.c | 6 +----- + block/blk-throttle.h | 18 +++++++++++------- + 4 files changed, 18 insertions(+), 18 deletions(-) + +--- a/block/blk-cgroup.c ++++ b/block/blk-cgroup.c +@@ -110,12 +110,6 @@ static struct cgroup_subsys_state *blkcg + return task_css(current, io_cgrp_id); + } + +-static bool blkcg_policy_enabled(struct request_queue *q, +- const struct blkcg_policy *pol) +-{ +- return pol && test_bit(pol->plid, q->blkcg_pols); +-} +- + static void blkg_free_workfn(struct work_struct *work) + { + struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, +--- a/block/blk-cgroup.h ++++ b/block/blk-cgroup.h +@@ -455,6 +455,12 @@ static inline bool blk_cgroup_mergeable( + bio_issue_as_root_blkg(rq->bio) == bio_issue_as_root_blkg(bio); + } + ++static inline bool blkcg_policy_enabled(struct request_queue *q, ++ const struct blkcg_policy *pol) ++{ ++ return pol && test_bit(pol->plid, q->blkcg_pols); ++} ++ + void blk_cgroup_bio_start(struct bio *bio); + void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); + #else /* CONFIG_BLK_CGROUP */ +--- a/block/blk-throttle.c ++++ b/block/blk-throttle.c +@@ -1209,17 +1209,13 @@ static int blk_throtl_init(struct gendis + INIT_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn); + throtl_service_queue_init(&td->service_queue); + +- /* +- * Freeze queue before activating policy, to synchronize with IO path, +- * which is protected by 'q_usage_counter'. +- */ + blk_mq_freeze_queue(disk->queue); + blk_mq_quiesce_queue(disk->queue); + + q->td = td; + td->queue = q; + +- /* activate policy */ ++ /* activate policy, blk_throtl_activated() will return true */ + ret = blkcg_activate_policy(disk, &blkcg_policy_throtl); + if (ret) { + q->td = NULL; +--- a/block/blk-throttle.h ++++ b/block/blk-throttle.h +@@ -154,7 +154,13 @@ void blk_throtl_cancel_bios(struct gendi + + static inline bool blk_throtl_activated(struct request_queue *q) + { +- return q->td != NULL; ++ /* ++ * q->td guarantees that the blk-throttle module is already loaded, ++ * and the plid of blk-throttle is assigned. ++ * blkcg_policy_enabled() guarantees that the policy is activated ++ * in the request_queue. ++ */ ++ return q->td != NULL && blkcg_policy_enabled(q, &blkcg_policy_throtl); + } + + static inline bool blk_should_throtl(struct bio *bio) +@@ -162,11 +168,6 @@ static inline bool blk_should_throtl(str + struct throtl_grp *tg; + int rw = bio_data_dir(bio); + +- /* +- * This is called under bio_queue_enter(), and it's synchronized with +- * the activation of blk-throtl, which is protected by +- * blk_mq_freeze_queue(). +- */ + if (!blk_throtl_activated(bio->bi_bdev->bd_queue)) + return false; + +@@ -192,7 +193,10 @@ static inline bool blk_should_throtl(str + + static inline bool blk_throtl_bio(struct bio *bio) + { +- ++ /* ++ * block throttling takes effect if the policy is activated ++ * in the bio's request_queue. ++ */ + if (!blk_should_throtl(bio)) + return false; + diff --git a/queue-6.12/dmaengine-mmp_pdma-fix-race-condition-in-mmp_pdma_residue.patch b/queue-6.12/dmaengine-mmp_pdma-fix-race-condition-in-mmp_pdma_residue.patch new file mode 100644 index 0000000000..1f39392aee --- /dev/null +++ b/queue-6.12/dmaengine-mmp_pdma-fix-race-condition-in-mmp_pdma_residue.patch @@ -0,0 +1,85 @@ +From stable+bounces-220036-greg=kroah.com@vger.kernel.org Sat Feb 28 04:04:05 2026 +From: Wenshan Lan +Date: Sat, 28 Feb 2026 11:02:47 +0800 +Subject: dmaengine: mmp_pdma: Fix race condition in mmp_pdma_residue() +To: gregkh@linuxfoundation.org, stable@vger.kernel.org +Cc: Guodong Xu , Juan Li , Vinod Koul , Wenshan Lan +Message-ID: <20260228030247.4178-1-jetlan9@163.com> + +From: Guodong Xu + +[ Upstream commit a143545855bc2c6e1330f6f57ae375ac44af00a7 ] + +Add proper locking in mmp_pdma_residue() to prevent use-after-free when +accessing descriptor list and descriptor contents. + +The race occurs when multiple threads call tx_status() while the tasklet +on another CPU is freeing completed descriptors: + +CPU 0 CPU 1 +----- ----- +mmp_pdma_tx_status() +mmp_pdma_residue() + -> NO LOCK held + list_for_each_entry(sw, ..) + DMA interrupt + dma_do_tasklet() + -> spin_lock(&desc_lock) + list_move(sw->node, ...) + spin_unlock(&desc_lock) + | dma_pool_free(sw) <- FREED! + -> access sw->desc <- UAF! + +This issue can be reproduced when running dmatest on the same channel with +multiple threads (threads_per_chan > 1). + +Fix by protecting the chain_running list iteration and descriptor access +with the chan->desc_lock spinlock. + +Signed-off-by: Juan Li +Signed-off-by: Guodong Xu +Link: https://patch.msgid.link/20251216-mmp-pdma-race-v1-1-976a224bb622@riscstar.com +Signed-off-by: Vinod Koul +[ Minor context conflict resolved. ] +Signed-off-by: Wenshan Lan +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma/mmp_pdma.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/dma/mmp_pdma.c ++++ b/drivers/dma/mmp_pdma.c +@@ -763,6 +763,7 @@ static unsigned int mmp_pdma_residue(str + { + struct mmp_pdma_desc_sw *sw; + u32 curr, residue = 0; ++ unsigned long flags; + bool passed = false; + bool cyclic = chan->cyclic_first != NULL; + +@@ -778,6 +779,8 @@ static unsigned int mmp_pdma_residue(str + else + curr = readl(chan->phy->base + DSADR(chan->phy->idx)); + ++ spin_lock_irqsave(&chan->desc_lock, flags); ++ + list_for_each_entry(sw, &chan->chain_running, node) { + u32 start, end, len; + +@@ -821,6 +824,7 @@ static unsigned int mmp_pdma_residue(str + continue; + + if (sw->async_tx.cookie == cookie) { ++ spin_unlock_irqrestore(&chan->desc_lock, flags); + return residue; + } else { + residue = 0; +@@ -828,6 +832,8 @@ static unsigned int mmp_pdma_residue(str + } + } + ++ spin_unlock_irqrestore(&chan->desc_lock, flags); ++ + /* We should only get here in case of cyclic transactions */ + return residue; + } diff --git a/queue-6.12/drm-i915-alpm-alpm-disable-fixes.patch b/queue-6.12/drm-i915-alpm-alpm-disable-fixes.patch new file mode 100644 index 0000000000..3b30718f96 --- /dev/null +++ b/queue-6.12/drm-i915-alpm-alpm-disable-fixes.patch @@ -0,0 +1,49 @@ +From eb4a7139e97374f42b7242cc754e77f1623fbcd5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jouni=20H=C3=B6gander?= +Date: Thu, 12 Feb 2026 08:27:31 +0200 +Subject: drm/i915/alpm: ALPM disable fixes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jouni Högander + +commit eb4a7139e97374f42b7242cc754e77f1623fbcd5 upstream. + +PORT_ALPM_CTL is supposed to be written only before link training. Remove +writing it from ALPM disable. + +Also clearing ALPM_CTL_ALPM_AUX_LESS_ENABLE and is not about disabling ALPM +but switching to AUX-Wake ALPM. Stop touching this bit on ALPM disable. + +Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/7153 +Fixes: 1ccbf135862b ("drm/i915/psr: Enable ALPM on source side for eDP Panel replay") +Cc: Animesh Manna +Cc: Jani Nikula +Cc: # v6.10+ +Signed-off-by: Jouni Högander +Reviewed-by: Michał Grzelak +Link: https://patch.msgid.link/20260212062731.397801-1-jouni.hogander@intel.com +(cherry picked from commit 008304c9ae75c772d3460040de56e12112cdf5e6) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_psr.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +--- a/drivers/gpu/drm/i915/display/intel_psr.c ++++ b/drivers/gpu/drm/i915/display/intel_psr.c +@@ -2114,12 +2114,7 @@ static void intel_psr_disable_locked(str + /* Panel Replay on eDP is always using ALPM aux less. */ + if (intel_dp->psr.panel_replay_enabled && intel_dp_is_edp(intel_dp)) { + intel_de_rmw(display, ALPM_CTL(display, cpu_transcoder), +- ALPM_CTL_ALPM_ENABLE | +- ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); +- +- intel_de_rmw(display, +- PORT_ALPM_CTL(display, cpu_transcoder), +- PORT_ALPM_CTL_ALPM_AUX_LESS_ENABLE, 0); ++ ALPM_CTL_ALPM_ENABLE, 0); + } + + /* Disable PSR on Sink */ diff --git a/queue-6.12/drm-i915-psr-repeat-selective-update-area-alignment.patch b/queue-6.12/drm-i915-psr-repeat-selective-update-area-alignment.patch new file mode 100644 index 0000000000..fc5b335d72 --- /dev/null +++ b/queue-6.12/drm-i915-psr-repeat-selective-update-area-alignment.patch @@ -0,0 +1,122 @@ +From 1be2fca84f520105413d0d89ed04bb0ff742ab16 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Jouni=20H=C3=B6gander?= +Date: Wed, 4 Mar 2026 13:30:08 +0200 +Subject: drm/i915/psr: Repeat Selective Update area alignment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jouni Högander + +commit 1be2fca84f520105413d0d89ed04bb0ff742ab16 upstream. + +Currently we are aligning Selective Update area to cover cursor fully if +needed only once. It may happen that cursor is in Selective Update area +after pipe alignment and after that covering cursor plane only +partially. Fix this by looping alignment as long as alignment isn't needed +anymore. + +v2: + - do not unecessarily loop if cursor was already fully covered + - rename aligned as su_area_changed + +Fixes: 1bff93b8bc27 ("drm/i915/psr: Extend SU area to cover cursor fully if needed") +Cc: # v6.9+ +Signed-off-by: Jouni Högander +Reviewed-by: Ankit Nautiyal +Link: https://patch.msgid.link/20260304113011.626542-2-jouni.hogander@intel.com +(cherry picked from commit 681e12440d8b110350a5709101169f319e10ccbb) +Signed-off-by: Tvrtko Ursulin +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_psr.c | 50 +++++++++++++++++++++++-------- + 1 file changed, 38 insertions(+), 12 deletions(-) + +--- a/drivers/gpu/drm/i915/display/intel_psr.c ++++ b/drivers/gpu/drm/i915/display/intel_psr.c +@@ -2385,12 +2385,13 @@ static void clip_area_update(struct drm_ + overlap_damage_area->y2 = damage_area->y2; + } + +-static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_state) ++static bool intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_state) + { + struct intel_display *display = to_intel_display(crtc_state); + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config; + u16 y_alignment; ++ bool su_area_changed = false; + + /* ADLP aligns the SU region to vdsc slice height in case dsc is enabled */ + if (crtc_state->dsc.compression_enable && +@@ -2399,10 +2400,18 @@ static void intel_psr2_sel_fetch_pipe_al + else + y_alignment = crtc_state->su_y_granularity; + +- crtc_state->psr2_su_area.y1 -= crtc_state->psr2_su_area.y1 % y_alignment; +- if (crtc_state->psr2_su_area.y2 % y_alignment) ++ if (crtc_state->psr2_su_area.y1 % y_alignment) { ++ crtc_state->psr2_su_area.y1 -= crtc_state->psr2_su_area.y1 % y_alignment; ++ su_area_changed = true; ++ } ++ ++ if (crtc_state->psr2_su_area.y2 % y_alignment) { + crtc_state->psr2_su_area.y2 = ((crtc_state->psr2_su_area.y2 / + y_alignment) + 1) * y_alignment; ++ su_area_changed = true; ++ } ++ ++ return su_area_changed; + } + + /* +@@ -2487,7 +2496,7 @@ int intel_psr2_sel_fetch_update(struct i + struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc); + struct intel_plane_state *new_plane_state, *old_plane_state; + struct intel_plane *plane; +- bool full_update = false, cursor_in_su_area = false; ++ bool full_update = false, su_area_changed; + int i, ret; + + if (!crtc_state->enable_psr2_sel_fetch) +@@ -2599,15 +2608,32 @@ int intel_psr2_sel_fetch_update(struct i + if (ret) + return ret; + +- /* +- * Adjust su area to cover cursor fully as necessary (early +- * transport). This needs to be done after +- * drm_atomic_add_affected_planes to ensure visible cursor is added into +- * affected planes even when cursor is not updated by itself. +- */ +- intel_psr2_sel_fetch_et_alignment(state, crtc, &cursor_in_su_area); ++ do { ++ bool cursor_in_su_area; + +- intel_psr2_sel_fetch_pipe_alignment(crtc_state); ++ /* ++ * Adjust su area to cover cursor fully as necessary ++ * (early transport). This needs to be done after ++ * drm_atomic_add_affected_planes to ensure visible ++ * cursor is added into affected planes even when ++ * cursor is not updated by itself. ++ */ ++ intel_psr2_sel_fetch_et_alignment(state, crtc, &cursor_in_su_area); ++ ++ su_area_changed = intel_psr2_sel_fetch_pipe_alignment(crtc_state); ++ ++ /* ++ * If the cursor was outside the SU area before ++ * alignment, the alignment step (which only expands ++ * SU) may pull the cursor partially inside, so we ++ * must run ET alignment again to fully cover it. But ++ * if the cursor was already fully inside before ++ * alignment, expanding the SU area won't change that, ++ * so no further work is needed. ++ */ ++ if (cursor_in_su_area) ++ break; ++ } while (su_area_changed); + + /* + * Now that we have the pipe damaged area check if it intersect with diff --git a/queue-6.12/erofs-fix-inline-data-read-failure-for-ztailpacking-pclusters.patch b/queue-6.12/erofs-fix-inline-data-read-failure-for-ztailpacking-pclusters.patch new file mode 100644 index 0000000000..25ff10c370 --- /dev/null +++ b/queue-6.12/erofs-fix-inline-data-read-failure-for-ztailpacking-pclusters.patch @@ -0,0 +1,119 @@ +From stable+bounces-224650-greg=kroah.com@vger.kernel.org Wed Mar 11 09:17:27 2026 +From: Zhiguo Niu +Date: Wed, 11 Mar 2026 16:14:29 +0800 +Subject: erofs: fix inline data read failure for ztailpacking pclusters +To: , +Cc: , , , , , +Message-ID: <1773216869-2760-1-git-send-email-zhiguo.niu@unisoc.com> + +From: Gao Xiang + +[ Upstream commit c134a40f86efb8d6b5a949ef70e06d5752209be5 ] + +Compressed folios for ztailpacking pclusters must be valid before adding +these pclusters to I/O chains. Otherwise, z_erofs_decompress_pcluster() +may assume they are already valid and then trigger a NULL pointer +dereference. + +It is somewhat hard to reproduce because the inline data is in the same +block as the tail of the compressed indexes, which are usually read just +before. However, it may still happen if a fatal signal arrives while +read_mapping_folio() is running, as shown below: + + erofs: (device dm-1): z_erofs_pcluster_begin: failed to get inline data -4 + Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 + + ... + + pc : z_erofs_decompress_queue+0x4c8/0xa14 + lr : z_erofs_decompress_queue+0x160/0xa14 + sp : ffffffc08b3eb3a0 + x29: ffffffc08b3eb570 x28: ffffffc08b3eb418 x27: 0000000000001000 + x26: ffffff8086ebdbb8 x25: ffffff8086ebdbb8 x24: 0000000000000001 + x23: 0000000000000008 x22: 00000000fffffffb x21: dead000000000700 + x20: 00000000000015e7 x19: ffffff808babb400 x18: ffffffc089edc098 + x17: 00000000c006287d x16: 00000000c006287d x15: 0000000000000004 + x14: ffffff80ba8f8000 x13: 0000000000000004 x12: 00000006589a77c9 + x11: 0000000000000015 x10: 0000000000000000 x9 : 0000000000000000 + x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f + x5 : 0000000000000040 x4 : ffffffffffffffe0 x3 : 0000000000000020 + x2 : 0000000000000008 x1 : 0000000000000000 x0 : 0000000000000000 + Call trace: + z_erofs_decompress_queue+0x4c8/0xa14 + z_erofs_runqueue+0x908/0x97c + z_erofs_read_folio+0x128/0x228 + filemap_read_folio+0x68/0x128 + filemap_get_pages+0x44c/0x8b4 + filemap_read+0x12c/0x5b8 + generic_file_read_iter+0x4c/0x15c + do_iter_readv_writev+0x188/0x1e0 + vfs_iter_read+0xac/0x1a4 + backing_file_read_iter+0x170/0x34c + ovl_read_iter+0xf0/0x140 + vfs_read+0x28c/0x344 + ksys_read+0x80/0xf0 + __arm64_sys_read+0x24/0x34 + invoke_syscall+0x60/0x114 + el0_svc_common+0x88/0xe4 + do_el0_svc+0x24/0x30 + el0_svc+0x40/0xa8 + el0t_64_sync_handler+0x70/0xbc + el0t_64_sync+0x1bc/0x1c0 + +Fix this by reading the inline data before allocating and adding +the pclusters to the I/O chains. + +Fixes: cecf864d3d76 ("erofs: support inline data decompression") +Reported-by: Zhiguo Niu +Reviewed-and-tested-by: Zhiguo Niu +Signed-off-by: Gao Xiang +Signed-off-by: Zhiguo Niu +Signed-off-by: Greg Kroah-Hartman +--- + fs/erofs/zdata.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +--- a/fs/erofs/zdata.c ++++ b/fs/erofs/zdata.c +@@ -787,6 +787,7 @@ static int z_erofs_pcluster_begin(struct + struct super_block *sb = fe->inode->i_sb; + erofs_blk_t blknr = erofs_blknr(sb, map->m_pa); + struct z_erofs_pcluster *pcl = NULL; ++ void *ptr = NULL; + int ret; + + DBG_BUGON(fe->pcl); +@@ -807,6 +808,14 @@ static int z_erofs_pcluster_begin(struct + } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) { + DBG_BUGON(1); + return -EFSCORRUPTED; ++ } else { ++ ptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, EROFS_NO_KMAP); ++ if (IS_ERR(ptr)) { ++ erofs_err(sb, "failed to read inline data %pe @ pa %llu of nid %llu", ++ ptr, map->m_pa, EROFS_I(fe->inode)->nid); ++ return PTR_ERR(ptr); ++ } ++ ptr = map->buf.page; + } + + if (pcl) { +@@ -836,16 +845,8 @@ static int z_erofs_pcluster_begin(struct + /* bind cache first when cached decompression is preferred */ + z_erofs_bind_cache(fe); + } else { +- void *mptr; +- +- mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, EROFS_NO_KMAP); +- if (IS_ERR(mptr)) { +- ret = PTR_ERR(mptr); +- erofs_err(sb, "failed to get inline data %d", ret); +- return ret; +- } +- get_page(map->buf.page); +- WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page); ++ get_page((struct page *)ptr); ++ WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, ptr); + fe->pcl->pageofs_in = map->m_pa & ~PAGE_MASK; + fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; + } diff --git a/queue-6.12/f2fs-compress-change-the-first-parameter-of-page_array_-alloc-free-to-sbi.patch b/queue-6.12/f2fs-compress-change-the-first-parameter-of-page_array_-alloc-free-to-sbi.patch new file mode 100644 index 0000000000..b2c6ee79b0 --- /dev/null +++ b/queue-6.12/f2fs-compress-change-the-first-parameter-of-page_array_-alloc-free-to-sbi.patch @@ -0,0 +1,193 @@ +From stable+bounces-223298-greg=kroah.com@vger.kernel.org Fri Mar 6 03:39:43 2026 +From: Bin Lan +Date: Fri, 6 Mar 2026 10:38:45 +0800 +Subject: f2fs: compress: change the first parameter of page_array_{alloc,free} to sbi +To: gregkh@linuxfoundation.org, stable@vger.kernel.org +Cc: linux-kernel@vger.kernel.org, Zhiguo Niu , Baocong Liu , Chao Yu , Jaegeuk Kim , Bin Lan +Message-ID: <20260306023846.2147903-1-lanbincn@139.com> + +From: Zhiguo Niu + +[ Upstream commit 8e2a9b656474d67c55010f2c003ea2cf889a19ff ] + +No logic changes, just cleanup and prepare for fixing the UAF issue +in f2fs_free_dic. + +Signed-off-by: Zhiguo Niu +Signed-off-by: Baocong Liu +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Signed-off-by: Bin Lan +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/compress.c | 40 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +--- a/fs/f2fs/compress.c ++++ b/fs/f2fs/compress.c +@@ -23,20 +23,18 @@ + static struct kmem_cache *cic_entry_slab; + static struct kmem_cache *dic_entry_slab; + +-static void *page_array_alloc(struct inode *inode, int nr) ++static void *page_array_alloc(struct f2fs_sb_info *sbi, int nr) + { +- struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int size = sizeof(struct page *) * nr; + + if (likely(size <= sbi->page_array_slab_size)) + return f2fs_kmem_cache_alloc(sbi->page_array_slab, +- GFP_F2FS_ZERO, false, F2FS_I_SB(inode)); ++ GFP_F2FS_ZERO, false, sbi); + return f2fs_kzalloc(sbi, size, GFP_NOFS); + } + +-static void page_array_free(struct inode *inode, void *pages, int nr) ++static void page_array_free(struct f2fs_sb_info *sbi, void *pages, int nr) + { +- struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int size = sizeof(struct page *) * nr; + + if (!pages) +@@ -147,13 +145,13 @@ int f2fs_init_compress_ctx(struct compre + if (cc->rpages) + return 0; + +- cc->rpages = page_array_alloc(cc->inode, cc->cluster_size); ++ cc->rpages = page_array_alloc(F2FS_I_SB(cc->inode), cc->cluster_size); + return cc->rpages ? 0 : -ENOMEM; + } + + void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) + { +- page_array_free(cc->inode, cc->rpages, cc->cluster_size); ++ page_array_free(F2FS_I_SB(cc->inode), cc->rpages, cc->cluster_size); + cc->rpages = NULL; + cc->nr_rpages = 0; + cc->nr_cpages = 0; +@@ -616,6 +614,7 @@ static void *f2fs_vmap(struct page **pag + + static int f2fs_compress_pages(struct compress_ctx *cc) + { ++ struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct f2fs_inode_info *fi = F2FS_I(cc->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; +@@ -636,7 +635,7 @@ static int f2fs_compress_pages(struct co + cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); + cc->valid_nr_cpages = cc->nr_cpages; + +- cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages); ++ cc->cpages = page_array_alloc(sbi, cc->nr_cpages); + if (!cc->cpages) { + ret = -ENOMEM; + goto destroy_compress_ctx; +@@ -711,7 +710,7 @@ out_free_cpages: + if (cc->cpages[i]) + f2fs_compress_free_page(cc->cpages[i]); + } +- page_array_free(cc->inode, cc->cpages, cc->nr_cpages); ++ page_array_free(sbi, cc->cpages, cc->nr_cpages); + cc->cpages = NULL; + destroy_compress_ctx: + if (cops->destroy_compress_ctx) +@@ -1325,7 +1324,7 @@ static int f2fs_write_compressed_pages(s + cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + cic->inode = inode; + atomic_set(&cic->pending_pages, cc->valid_nr_cpages); +- cic->rpages = page_array_alloc(cc->inode, cc->cluster_size); ++ cic->rpages = page_array_alloc(sbi, cc->cluster_size); + if (!cic->rpages) + goto out_put_cic; + +@@ -1427,13 +1426,13 @@ unlock_continue: + spin_unlock(&fi->i_size_lock); + + f2fs_put_rpages(cc); +- page_array_free(cc->inode, cc->cpages, cc->nr_cpages); ++ page_array_free(sbi, cc->cpages, cc->nr_cpages); + cc->cpages = NULL; + f2fs_destroy_compress_ctx(cc, false); + return 0; + + out_destroy_crypt: +- page_array_free(cc->inode, cic->rpages, cc->cluster_size); ++ page_array_free(sbi, cic->rpages, cc->cluster_size); + + for (--i; i >= 0; i--) { + if (!cc->cpages[i]) +@@ -1454,7 +1453,7 @@ out_free: + f2fs_compress_free_page(cc->cpages[i]); + cc->cpages[i] = NULL; + } +- page_array_free(cc->inode, cc->cpages, cc->nr_cpages); ++ page_array_free(sbi, cc->cpages, cc->nr_cpages); + cc->cpages = NULL; + return -EAGAIN; + } +@@ -1484,7 +1483,7 @@ void f2fs_compress_write_end_io(struct b + end_page_writeback(cic->rpages[i]); + } + +- page_array_free(cic->inode, cic->rpages, cic->nr_rpages); ++ page_array_free(sbi, cic->rpages, cic->nr_rpages); + kmem_cache_free(cic_entry_slab, cic); + } + +@@ -1623,7 +1622,7 @@ static int f2fs_prepare_decomp_mem(struc + if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) + return 0; + +- dic->tpages = page_array_alloc(dic->inode, dic->cluster_size); ++ dic->tpages = page_array_alloc(F2FS_I_SB(dic->inode), dic->cluster_size); + if (!dic->tpages) + return -ENOMEM; + +@@ -1683,7 +1682,7 @@ struct decompress_io_ctx *f2fs_alloc_dic + if (!dic) + return ERR_PTR(-ENOMEM); + +- dic->rpages = page_array_alloc(cc->inode, cc->cluster_size); ++ dic->rpages = page_array_alloc(sbi, cc->cluster_size); + if (!dic->rpages) { + kmem_cache_free(dic_entry_slab, dic); + return ERR_PTR(-ENOMEM); +@@ -1704,7 +1703,7 @@ struct decompress_io_ctx *f2fs_alloc_dic + dic->rpages[i] = cc->rpages[i]; + dic->nr_rpages = cc->cluster_size; + +- dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages); ++ dic->cpages = page_array_alloc(sbi, dic->nr_cpages); + if (!dic->cpages) { + ret = -ENOMEM; + goto out_free; +@@ -1734,6 +1733,7 @@ static void f2fs_free_dic(struct decompr + bool bypass_destroy_callback) + { + int i; ++ struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + + f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); + +@@ -1745,7 +1745,7 @@ static void f2fs_free_dic(struct decompr + continue; + f2fs_compress_free_page(dic->tpages[i]); + } +- page_array_free(dic->inode, dic->tpages, dic->cluster_size); ++ page_array_free(sbi, dic->tpages, dic->cluster_size); + } + + if (dic->cpages) { +@@ -1754,10 +1754,10 @@ static void f2fs_free_dic(struct decompr + continue; + f2fs_compress_free_page(dic->cpages[i]); + } +- page_array_free(dic->inode, dic->cpages, dic->nr_cpages); ++ page_array_free(sbi, dic->cpages, dic->nr_cpages); + } + +- page_array_free(dic->inode, dic->rpages, dic->nr_rpages); ++ page_array_free(sbi, dic->rpages, dic->nr_rpages); + kmem_cache_free(dic_entry_slab, dic); + } + diff --git a/queue-6.12/f2fs-compress-fix-uaf-of-f2fs_inode_info-in-f2fs_free_dic.patch b/queue-6.12/f2fs-compress-fix-uaf-of-f2fs_inode_info-in-f2fs_free_dic.patch new file mode 100644 index 0000000000..46df3bc623 --- /dev/null +++ b/queue-6.12/f2fs-compress-fix-uaf-of-f2fs_inode_info-in-f2fs_free_dic.patch @@ -0,0 +1,218 @@ +From lanbincn@139.com Fri Mar 6 03:39:28 2026 +From: Bin Lan +Date: Fri, 6 Mar 2026 10:38:46 +0800 +Subject: f2fs: compress: fix UAF of f2fs_inode_info in f2fs_free_dic +To: gregkh@linuxfoundation.org, stable@vger.kernel.org +Cc: linux-kernel@vger.kernel.org, Zhiguo Niu , Daeho Jeong , Baocong Liu , Chao Yu , Jaegeuk Kim , Bin Lan +Message-ID: <20260306023846.2147903-2-lanbincn@139.com> + +From: Zhiguo Niu + +[ Upstream commit 39868685c2a94a70762bc6d77dc81d781d05bff5 ] + +The decompress_io_ctx may be released asynchronously after +I/O completion. If this file is deleted immediately after read, +and the kworker of processing post_read_wq has not been executed yet +due to high workloads, It is possible that the inode(f2fs_inode_info) +is evicted and freed before it is used f2fs_free_dic. + + The UAF case as below: + Thread A Thread B + - f2fs_decompress_end_io + - f2fs_put_dic + - queue_work + add free_dic work to post_read_wq + - do_unlink + - iput + - evict + - call_rcu + This file is deleted after read. + + Thread C kworker to process post_read_wq + - rcu_do_batch + - f2fs_free_inode + - kmem_cache_free + inode is freed by rcu + - process_scheduled_works + - f2fs_late_free_dic + - f2fs_free_dic + - f2fs_release_decomp_mem + read (dic->inode)->i_compress_algorithm + +This patch store compress_algorithm and sbi in dic to avoid inode UAF. + +In addition, the previous solution is deprecated in [1] may cause system hang. +[1] https://lore.kernel.org/all/c36ab955-c8db-4a8b-a9d0-f07b5f426c3f@kernel.org + +Cc: Daeho Jeong +Fixes: bff139b49d9f ("f2fs: handle decompress only post processing in softirq") +Signed-off-by: Zhiguo Niu +Signed-off-by: Baocong Liu +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[ Keep the original f2fs_vmalloc(workspace_size) in v6.12.y instead of +f2fs_vmalloc(dic->sbi, workspace_size) per commit +54ca9be0bc58 ("f2fs: introduce FAULT_VMALLOC"). ] +Signed-off-by: Bin Lan +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/compress.c | 38 +++++++++++++++++++------------------- + fs/f2fs/f2fs.h | 2 ++ + 2 files changed, 21 insertions(+), 19 deletions(-) + +--- a/fs/f2fs/compress.c ++++ b/fs/f2fs/compress.c +@@ -211,13 +211,13 @@ static int lzo_decompress_pages(struct d + ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, + dic->rbuf, &dic->rlen); + if (ret != LZO_E_OK) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lzo decompress failed, ret:%d", ret); + return -EIO; + } + + if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lzo invalid rlen:%zu, expected:%lu", + dic->rlen, PAGE_SIZE << dic->log_cluster_size); + return -EIO; +@@ -291,13 +291,13 @@ static int lz4_decompress_pages(struct d + ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, + dic->clen, dic->rlen); + if (ret < 0) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lz4 decompress failed, ret:%d", ret); + return -EIO; + } + + if (ret != PAGE_SIZE << dic->log_cluster_size) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "lz4 invalid ret:%d, expected:%lu", + ret, PAGE_SIZE << dic->log_cluster_size); + return -EIO; +@@ -425,7 +425,7 @@ static int zstd_init_decompress_ctx(stru + + stream = zstd_init_dstream(max_window_size, workspace, workspace_size); + if (!stream) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "%s zstd_init_dstream failed", __func__); + vfree(workspace); + return -EIO; +@@ -461,14 +461,14 @@ static int zstd_decompress_pages(struct + + ret = zstd_decompress_stream(stream, &outbuf, &inbuf); + if (zstd_is_error(ret)) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "%s zstd_decompress_stream failed, ret: %d", + __func__, zstd_get_error_code(ret)); + return -EIO; + } + + if (dic->rlen != outbuf.pos) { +- f2fs_err_ratelimited(F2FS_I_SB(dic->inode), ++ f2fs_err_ratelimited(dic->sbi, + "%s ZSTD invalid rlen:%zu, expected:%lu", + __func__, dic->rlen, + PAGE_SIZE << dic->log_cluster_size); +@@ -728,7 +728,7 @@ static void f2fs_release_decomp_mem(stru + + void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) + { +- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); ++ struct f2fs_sb_info *sbi = dic->sbi; + struct f2fs_inode_info *fi = F2FS_I(dic->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; +@@ -798,7 +798,7 @@ void f2fs_end_read_compressed_page(struc + { + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); +- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); ++ struct f2fs_sb_info *sbi = dic->sbi; + + dec_page_count(sbi, F2FS_RD_DATA); + +@@ -1615,14 +1615,13 @@ static inline bool allow_memalloc_for_de + static int f2fs_prepare_decomp_mem(struct decompress_io_ctx *dic, + bool pre_alloc) + { +- const struct f2fs_compress_ops *cops = +- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; ++ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; + int i; + +- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) ++ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) + return 0; + +- dic->tpages = page_array_alloc(F2FS_I_SB(dic->inode), dic->cluster_size); ++ dic->tpages = page_array_alloc(dic->sbi, dic->cluster_size); + if (!dic->tpages) + return -ENOMEM; + +@@ -1652,10 +1651,9 @@ static int f2fs_prepare_decomp_mem(struc + static void f2fs_release_decomp_mem(struct decompress_io_ctx *dic, + bool bypass_destroy_callback, bool pre_alloc) + { +- const struct f2fs_compress_ops *cops = +- f2fs_cops[F2FS_I(dic->inode)->i_compress_algorithm]; ++ const struct f2fs_compress_ops *cops = f2fs_cops[dic->compress_algorithm]; + +- if (!allow_memalloc_for_decomp(F2FS_I_SB(dic->inode), pre_alloc)) ++ if (!allow_memalloc_for_decomp(dic->sbi, pre_alloc)) + return; + + if (!bypass_destroy_callback && cops->destroy_decompress_ctx) +@@ -1690,6 +1688,8 @@ struct decompress_io_ctx *f2fs_alloc_dic + + dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + dic->inode = cc->inode; ++ dic->sbi = sbi; ++ dic->compress_algorithm = F2FS_I(cc->inode)->i_compress_algorithm; + atomic_set(&dic->remaining_pages, cc->nr_cpages); + dic->cluster_idx = cc->cluster_idx; + dic->cluster_size = cc->cluster_size; +@@ -1733,7 +1733,8 @@ static void f2fs_free_dic(struct decompr + bool bypass_destroy_callback) + { + int i; +- struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); ++ /* use sbi in dic to avoid UFA of dic->inode*/ ++ struct f2fs_sb_info *sbi = dic->sbi; + + f2fs_release_decomp_mem(dic, bypass_destroy_callback, true); + +@@ -1776,8 +1777,7 @@ static void f2fs_put_dic(struct decompre + f2fs_free_dic(dic, false); + } else { + INIT_WORK(&dic->free_work, f2fs_late_free_dic); +- queue_work(F2FS_I_SB(dic->inode)->post_read_wq, +- &dic->free_work); ++ queue_work(dic->sbi->post_read_wq, &dic->free_work); + } + } + } +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -1525,6 +1525,7 @@ struct compress_io_ctx { + struct decompress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ ++ struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ +@@ -1565,6 +1566,7 @@ struct decompress_io_ctx { + + bool failed; /* IO error occurred before decompression? */ + bool need_verity; /* need fs-verity verification after decompression? */ ++ unsigned char compress_algorithm; /* backup algorithm type */ + void *private; /* payload buffer for specified decompression algorithm */ + void *private2; /* extra payload buffer */ + struct work_struct verity_work; /* work to verify the decompressed pages */ diff --git a/queue-6.12/f2fs-fix-to-avoid-migrating-empty-section.patch b/queue-6.12/f2fs-fix-to-avoid-migrating-empty-section.patch new file mode 100644 index 0000000000..d142a395de --- /dev/null +++ b/queue-6.12/f2fs-fix-to-avoid-migrating-empty-section.patch @@ -0,0 +1,95 @@ +From stable+bounces-223179-greg=kroah.com@vger.kernel.org Thu Mar 5 09:48:57 2026 +From: Robert Garcia +Date: Thu, 5 Mar 2026 16:48:03 +0800 +Subject: f2fs: fix to avoid migrating empty section +To: stable@vger.kernel.org, Chao Yu +Cc: Jaegeuk Kim , Daeho Jeong , Robert Garcia , linux-f2fs-devel@lists.sourceforge.net, linux-kernel@vger.kernel.org +Message-ID: <20260305084803.210354-1-rob_garcia@163.com> + +From: Chao Yu + +[ Upstream commit d625a2b08c089397d3a03bff13fa8645e4ec7a01 ] + +It reports a bug from device w/ zufs: + +F2FS-fs (dm-64): Inconsistent segment (173822) type [1, 0] in SSA and SIT +F2FS-fs (dm-64): Stopped filesystem due to reason: 4 + +Thread A Thread B +- f2fs_expand_inode_data + - f2fs_allocate_pinning_section + - f2fs_gc_range + - do_garbage_collect w/ segno #x + - writepage + - f2fs_allocate_data_block + - new_curseg + - allocate segno #x + +The root cause is: fallocate on pinning file may race w/ block allocation +as above, result in do_garbage_collect() from fallocate() may migrate +segment which is just allocated by a log, the log will update segment type +in its in-memory structure, however GC will get segment type from on-disk +SSA block, once segment type changes by log, we can detect such +inconsistency, then shutdown filesystem. + +In this case, on-disk SSA shows type of segno #173822 is 1 (SUM_TYPE_NODE), +however segno #173822 was just allocated as data type segment, so in-memory +SIT shows type of segno #173822 is 0 (SUM_TYPE_DATA). + +Change as below to fix this issue: +- check whether current section is empty before gc +- add sanity checks on do_garbage_collect() to avoid any race case, result +in migrating segment used by log. +- btw, it fixes misc issue in printed logs: "SSA and SIT" -> "SIT and SSA". + +Fixes: 9703d69d9d15 ("f2fs: support file pinning for zoned devices") +Cc: Daeho Jeong +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +[ Use IS_CURSEC instead of is_cursec according to +commit c1cfc87e49525 ("f2fs: introduce is_cur{seg,sec}()"). ] +Signed-off-by: Robert Garcia +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/gc.c | 16 +++++++++++++++- + 1 file changed, 15 insertions(+), 1 deletion(-) + +--- a/fs/f2fs/gc.c ++++ b/fs/f2fs/gc.c +@@ -1805,6 +1805,13 @@ static int do_garbage_collect(struct f2f + GET_SUM_BLOCK(sbi, segno)); + f2fs_put_page(sum_page, 0); + ++ if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) { ++ f2fs_err(sbi, "%s: segment %u is used by log", ++ __func__, segno); ++ f2fs_bug_on(sbi, 1); ++ goto skip; ++ } ++ + if (get_valid_blocks(sbi, segno, false) == 0) + goto freed; + if (gc_type == BG_GC && __is_large_section(sbi) && +@@ -1815,7 +1822,7 @@ static int do_garbage_collect(struct f2f + + sum = page_address(sum_page); + if (type != GET_SUM_TYPE((&sum->footer))) { +- f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", ++ f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SIT and SSA", + segno, type, GET_SUM_TYPE((&sum->footer))); + f2fs_stop_checkpoint(sbi, false, + STOP_CP_REASON_CORRUPTED_SUMMARY); +@@ -2079,6 +2086,13 @@ int f2fs_gc_range(struct f2fs_sb_info *s + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + ++ /* ++ * avoid migrating empty section, as it can be allocated by ++ * log in parallel. ++ */ ++ if (!get_valid_blocks(sbi, segno, true)) ++ continue; ++ + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + continue; + diff --git a/queue-6.12/io_uring-kbuf-check-if-target-buffer-list-is-still-legacy-on-recycle.patch b/queue-6.12/io_uring-kbuf-check-if-target-buffer-list-is-still-legacy-on-recycle.patch new file mode 100644 index 0000000000..4d47a5fdf7 --- /dev/null +++ b/queue-6.12/io_uring-kbuf-check-if-target-buffer-list-is-still-legacy-on-recycle.patch @@ -0,0 +1,47 @@ +From d9c9b22983ade3ab50cb36ff7d5f886473963af2 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Thu, 12 Mar 2026 08:59:25 -0600 +Subject: io_uring/kbuf: check if target buffer list is still legacy on recycle + +From: Jens Axboe + +Commit c2c185be5c85d37215397c8e8781abf0a69bec1f upstream. + +There's a gap between when the buffer was grabbed and when it +potentially gets recycled, where if the list is empty, someone could've +upgraded it to a ring provided type. This can happen if the request +is forced via io-wq. The legacy recycling is missing checking if the +buffer_list still exists, and if it's of the correct type. Add those +checks. + +Cc: stable@vger.kernel.org +Fixes: c7fb19428d67 ("io_uring: add support for ring mapped supplied buffers") +Reported-by: Keenan Dong +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/kbuf.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -62,9 +62,17 @@ bool io_kbuf_recycle_legacy(struct io_ki + + buf = req->kbuf; + bl = io_buffer_get_list(ctx, buf->bgid); +- list_add(&buf->list, &bl->buf_list); +- req->flags &= ~REQ_F_BUFFER_SELECTED; ++ /* ++ * If the buffer list was upgraded to a ring-based one, or removed, ++ * while the request was in-flight in io-wq, drop it. ++ */ + req->buf_index = buf->bgid; ++ if (bl && !(bl->flags & IOBL_BUF_RING)) ++ list_add(&buf->list, &bl->buf_list); ++ else ++ kmem_cache_free(io_buf_cachep, buf); ++ req->flags &= ~REQ_F_BUFFER_SELECTED; ++ req->kbuf = NULL; + + io_ring_submit_unlock(ctx, issue_flags); + return true; diff --git a/queue-6.12/media-i2c-ov5647-use-our-own-mutex-for-the-ctrl-lock.patch b/queue-6.12/media-i2c-ov5647-use-our-own-mutex-for-the-ctrl-lock.patch new file mode 100644 index 0000000000..577e07e7ad --- /dev/null +++ b/queue-6.12/media-i2c-ov5647-use-our-own-mutex-for-the-ctrl-lock.patch @@ -0,0 +1,39 @@ +From stable+bounces-222393-greg=kroah.com@vger.kernel.org Sun Mar 1 03:36:28 2026 +From: Xiaolei Wang +Date: Sun, 1 Mar 2026 10:35:32 +0800 +Subject: media: i2c: ov5647: use our own mutex for the ctrl lock +To: sashal@kernel.org, stable@vger.kernel.org +Cc: sakari.ailus@linux.intel.com, hverkuil+cisco@kernel.org, Xiaolei.Wang@windriver.com +Message-ID: <20260301023535.2438766-1-xiaolei.wang@windriver.com> + +From: Xiaolei Wang + +[ Upstream commit 973e42fd5d2b397bff34f0c249014902dbf65912 ] + +__v4l2_ctrl_handler_setup() and __v4l2_ctrl_modify_range() contains an +assertion to verify that the v4l2_ctrl_handler::lock is held, as it should +only be called when the lock has already been acquired. Therefore use our +own mutex for the ctrl lock, otherwise a warning will be reported. + +Fixes: 4974c2f19fd8 ("media: ov5647: Support gain, exposure and AWB controls") +Cc: stable@vger.kernel.org +Signed-off-by: Xiaolei Wang +[Sakari Ailus: Fix a minor conflict.] +Signed-off-by: Sakari Ailus +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman +--- + drivers/media/i2c/ov5647.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/media/i2c/ov5647.c ++++ b/drivers/media/i2c/ov5647.c +@@ -1293,6 +1293,8 @@ static int ov5647_init_controls(struct o + + sensor->ctrls.lock = &sensor->lock; + ++ sensor->ctrls.lock = &sensor->lock; ++ + v4l2_ctrl_new_std(&sensor->ctrls, &ov5647_ctrl_ops, + V4L2_CID_AUTOGAIN, 0, 1, 1, 0); + diff --git a/queue-6.12/mm-thp-deny-thp-for-files-on-anonymous-inodes.patch b/queue-6.12/mm-thp-deny-thp-for-files-on-anonymous-inodes.patch new file mode 100644 index 0000000000..4d4bf396de --- /dev/null +++ b/queue-6.12/mm-thp-deny-thp-for-files-on-anonymous-inodes.patch @@ -0,0 +1,103 @@ +From dd085fe9a8ebfc5d10314c60452db38d2b75e609 Mon Sep 17 00:00:00 2001 +From: Deepanshu Kartikey +Date: Sat, 14 Feb 2026 05:45:35 +0530 +Subject: mm: thp: deny THP for files on anonymous inodes + +From: Deepanshu Kartikey + +commit dd085fe9a8ebfc5d10314c60452db38d2b75e609 upstream. + +file_thp_enabled() incorrectly allows THP for files on anonymous inodes +(e.g. guest_memfd and secretmem). These files are created via +alloc_file_pseudo(), which does not call get_write_access() and leaves +inode->i_writecount at 0. Combined with S_ISREG(inode->i_mode) being +true, they appear as read-only regular files when +CONFIG_READ_ONLY_THP_FOR_FS is enabled, making them eligible for THP +collapse. + +Anonymous inodes can never pass the inode_is_open_for_write() check +since their i_writecount is never incremented through the normal VFS +open path. The right thing to do is to exclude them from THP eligibility +altogether, since CONFIG_READ_ONLY_THP_FOR_FS was designed for real +filesystem files (e.g. shared libraries), not for pseudo-filesystem +inodes. + +For guest_memfd, this allows khugepaged and MADV_COLLAPSE to create +large folios in the page cache via the collapse path, but the +guest_memfd fault handler does not support large folios. This triggers +WARN_ON_ONCE(folio_test_large(folio)) in kvm_gmem_fault_user_mapping(). + +For secretmem, collapse_file() tries to copy page contents through the +direct map, but secretmem pages are removed from the direct map. This +can result in a kernel crash: + + BUG: unable to handle page fault for address: ffff88810284d000 + RIP: 0010:memcpy_orig+0x16/0x130 + Call Trace: + collapse_file + hpage_collapse_scan_file + madvise_collapse + +Secretmem is not affected by the crash on upstream as the memory failure +recovery handles the failed copy gracefully, but it still triggers +confusing false memory failure reports: + + Memory failure: 0x106d96f: recovery action for clean unevictable + LRU page: Recovered + +Check IS_ANON_FILE(inode) in file_thp_enabled() to deny THP for all +anonymous inode files. + +Link: https://syzkaller.appspot.com/bug?extid=33a04338019ac7e43a44 +Link: https://lore.kernel.org/linux-mm/CAEvNRgHegcz3ro35ixkDw39ES8=U6rs6S7iP0gkR9enr7HoGtA@mail.gmail.com +Link: https://lkml.kernel.org/r/20260214001535.435626-1-kartikey406@gmail.com +Fixes: 7fbb5e188248 ("mm: remove VM_EXEC requirement for THP eligibility") +Signed-off-by: Deepanshu Kartikey +Reported-by: syzbot+33a04338019ac7e43a44@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=33a04338019ac7e43a44 +Tested-by: syzbot+33a04338019ac7e43a44@syzkaller.appspotmail.com +Tested-by: Lance Yang +Acked-by: David Hildenbrand (Arm) +Reviewed-by: Barry Song +Reviewed-by: Ackerley Tng +Tested-by: Ackerley Tng +Reviewed-by: Lorenzo Stoakes +Cc: Baolin Wang +Cc: Dev Jain +Cc: Fangrui Song +Cc: Liam Howlett +Cc: Nico Pache +Cc: Ryan Roberts +Cc: Yang Shi +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +[ Ackerley: we don't have IS_ANON_FILE() yet. As guest_memfd does + not apply yet, simply check for secretmem explicitly. ] +Signed-off-by: Ackerley Tng +Reviewed-by: David Hildenbrand (Arm) +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/huge_mm.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -7,6 +7,7 @@ + + #include /* only for vma_is_dax() */ + #include ++#include + + vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); + int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, +@@ -262,6 +263,9 @@ static inline bool file_thp_enabled(stru + + inode = vma->vm_file->f_inode; + ++ if (secretmem_mapping(inode->i_mapping)) ++ return false; ++ + return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && + !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); + } diff --git a/queue-6.12/mptcp-pm-in-kernel-always-set-id-as-avail-when-rm-endp.patch b/queue-6.12/mptcp-pm-in-kernel-always-set-id-as-avail-when-rm-endp.patch new file mode 100644 index 0000000000..9fc0b400e1 --- /dev/null +++ b/queue-6.12/mptcp-pm-in-kernel-always-set-id-as-avail-when-rm-endp.patch @@ -0,0 +1,169 @@ +From stable+bounces-223373-greg=kroah.com@vger.kernel.org Fri Mar 6 18:49:00 2026 +From: "Matthieu Baerts (NGI0)" +Date: Fri, 6 Mar 2026 18:48:14 +0100 +Subject: mptcp: pm: in-kernel: always set ID as avail when rm endp +To: stable@vger.kernel.org, gregkh@linuxfoundation.org, sashal@kernel.org +Cc: MPTCP Upstream , "Matthieu Baerts (NGI0)" , syzbot+f56f7d56e2c6e11a01b6@syzkaller.appspotmail.com, Mat Martineau , Jakub Kicinski +Message-ID: <20260306174813.2517544-2-matttbe@kernel.org> + +From: "Matthieu Baerts (NGI0)" + +commit d191101dee25567c2af3b28565f45346c33d65f5 upstream. + +Syzkaller managed to find a combination of actions that was generating +this warning: + + WARNING: net/mptcp/pm_kernel.c:1074 at __mark_subflow_endp_available net/mptcp/pm_kernel.c:1074 [inline], CPU#1: syz.7.48/2535 + WARNING: net/mptcp/pm_kernel.c:1074 at mptcp_pm_nl_fullmesh net/mptcp/pm_kernel.c:1446 [inline], CPU#1: syz.7.48/2535 + WARNING: net/mptcp/pm_kernel.c:1074 at mptcp_pm_nl_set_flags_all net/mptcp/pm_kernel.c:1474 [inline], CPU#1: syz.7.48/2535 + WARNING: net/mptcp/pm_kernel.c:1074 at mptcp_pm_nl_set_flags+0x5de/0x640 net/mptcp/pm_kernel.c:1538, CPU#1: syz.7.48/2535 + Modules linked in: + CPU: 1 UID: 0 PID: 2535 Comm: syz.7.48 Not tainted 6.18.0-03987-gea5f5e676cf5 #17 PREEMPT(voluntary) + Hardware name: QEMU Ubuntu 25.10 PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014 + RIP: 0010:__mark_subflow_endp_available net/mptcp/pm_kernel.c:1074 [inline] + RIP: 0010:mptcp_pm_nl_fullmesh net/mptcp/pm_kernel.c:1446 [inline] + RIP: 0010:mptcp_pm_nl_set_flags_all net/mptcp/pm_kernel.c:1474 [inline] + RIP: 0010:mptcp_pm_nl_set_flags+0x5de/0x640 net/mptcp/pm_kernel.c:1538 + Code: 89 c7 e8 c5 8c 73 fe e9 f7 fd ff ff 49 83 ef 80 e8 b7 8c 73 fe 4c 89 ff be 03 00 00 00 e8 4a 29 e3 fe eb ac e8 a3 8c 73 fe 90 <0f> 0b 90 e9 3d ff ff ff e8 95 8c 73 fe b8 a1 ff ff ff eb 1a e8 89 + RSP: 0018:ffffc9001535b820 EFLAGS: 00010287 + netdevsim0: tun_chr_ioctl cmd 1074025677 + RAX: ffffffff82da294d RBX: 0000000000000001 RCX: 0000000000080000 + RDX: ffffc900096d0000 RSI: 00000000000006d6 RDI: 00000000000006d7 + netdevsim0: linktype set to 823 + RBP: ffff88802cdb2240 R08: 00000000000104ae R09: ffffffffffffffff + R10: ffffffff82da27d4 R11: 0000000000000000 R12: 0000000000000000 + R13: ffff88801246d8c0 R14: ffffc9001535b8b8 R15: ffff88802cdb1800 + FS: 00007fc6ac5a76c0(0000) GS:ffff8880f90c8000(0000) knlGS:0000000000000000 + netlink: 'syz.3.50': attribute type 5 has an invalid length. + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + netlink: 1232 bytes leftover after parsing attributes in process `syz.3.50'. + CR2: 0000200000010000 CR3: 0000000025b1a000 CR4: 0000000000350ef0 + Call Trace: + + mptcp_pm_set_flags net/mptcp/pm_netlink.c:277 [inline] + mptcp_pm_nl_set_flags_doit+0x1d7/0x210 net/mptcp/pm_netlink.c:282 + genl_family_rcv_msg_doit+0x117/0x180 net/netlink/genetlink.c:1115 + genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] + genl_rcv_msg+0x3a8/0x3f0 net/netlink/genetlink.c:1210 + netlink_rcv_skb+0x16d/0x240 net/netlink/af_netlink.c:2550 + genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 + netlink_unicast_kernel net/netlink/af_netlink.c:1318 [inline] + netlink_unicast+0x3e9/0x4c0 net/netlink/af_netlink.c:1344 + netlink_sendmsg+0x4ab/0x5b0 net/netlink/af_netlink.c:1894 + sock_sendmsg_nosec net/socket.c:718 [inline] + __sock_sendmsg+0xc9/0xf0 net/socket.c:733 + ____sys_sendmsg+0x272/0x3b0 net/socket.c:2608 + ___sys_sendmsg+0x2de/0x320 net/socket.c:2662 + __sys_sendmsg net/socket.c:2694 [inline] + __do_sys_sendmsg net/socket.c:2699 [inline] + __se_sys_sendmsg net/socket.c:2697 [inline] + __x64_sys_sendmsg+0x110/0x1a0 net/socket.c:2697 + do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] + do_syscall_64+0xed/0x360 arch/x86/entry/syscall_64.c:94 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + RIP: 0033:0x7fc6adb66f6d + Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48 + RSP: 002b:00007fc6ac5a6ff8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e + RAX: ffffffffffffffda RBX: 00007fc6addf5fa0 RCX: 00007fc6adb66f6d + RDX: 0000000000048084 RSI: 00002000000002c0 RDI: 000000000000000e + RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 + netlink: 'syz.5.51': attribute type 2 has an invalid length. + R13: 00007fff25e91fe0 R14: 00007fc6ac5a7ce4 R15: 00007fff25e920d7 + + +The actions that caused that seem to be: + + - Create an MPTCP endpoint for address A without any flags + - Create a new MPTCP connection from address A + - Remove the MPTCP endpoint: the corresponding subflows will be removed + - Recreate the endpoint with the same ID, but with the subflow flag + - Change the same endpoint to add the fullmesh flag + +In this case, msk->pm.local_addr_used has been kept to 0 as expected, +but the corresponding bit in msk->pm.id_avail_bitmap was still unset +after having removed the endpoint, causing the splat later on. + +When removing an endpoint, the corresponding endpoint ID was only marked +as available for "signal" types with an announced address, plus all +"subflow" types, but not the other types like an endpoint corresponding +to the initial subflow. In these cases, re-creating an endpoint with the +same ID didn't signal/create anything. Here, adding the fullmesh flag +was creating the splat when calling __mark_subflow_endp_available() from +mptcp_pm_nl_fullmesh(), because msk->pm.local_addr_used was set to 0 +while the ID was marked as used. + +To fix this issue, the corresponding bit in msk->pm.id_avail_bitmap can +always be set as available when removing an MPTCP in-kernel endpoint. In +other words, moving the call to __set_bit() to do it in all cases, +except for "subflow" types where this bit is handled in a dedicated +helper. + +Note: instead of adding a new spin_(un)lock_bh that would be taken in +all cases, do all the actions requiring the spin lock under the same +block. + +This modification potentially fixes another issue reported by syzbot, +see [1]. But without a reproducer or more details about what exactly +happened before, it is hard to confirm. + +Fixes: e255683c06df ("mptcp: pm: re-using ID of unused removed ADD_ADDR") +Cc: stable@vger.kernel.org +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/606 +Reported-by: syzbot+f56f7d56e2c6e11a01b6@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/68fcfc4a.050a0220.346f24.02fb.GAE@google.com [1] +Reviewed-by: Mat Martineau +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20260205-net-mptcp-misc-fixes-6-19-rc8-v2-1-c2720ce75c34@kernel.org +Signed-off-by: Jakub Kicinski +[ Conflict in pm_netlink.c, because commit 8617e85e04bd ("mptcp: pm: + split in-kernel PM specific code") is not in this version, and move + code from pm_netlink.c to pm_kernel.c. Also, commit 636113918508 + ("mptcp: pm: remove '_nl' from mptcp_pm_nl_rm_addr_received") renamed + mptcp_pm_nl_rm_subflow_received() to mptcp_pm_rm_subflow(). Apart from + that, the same patch can be applied in pm_netlink.c. ] +Signed-off-by: Matthieu Baerts (NGI0) +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_netlink.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -1599,10 +1599,8 @@ static bool mptcp_pm_remove_anno_addr(st + ret = remove_anno_list_by_saddr(msk, addr); + if (ret || force) { + spin_lock_bh(&msk->pm.lock); +- if (ret) { +- __set_bit(addr->id, msk->pm.id_avail_bitmap); ++ if (ret) + msk->pm.add_addr_signaled--; +- } + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + } +@@ -1640,17 +1638,15 @@ static int mptcp_nl_remove_subflow_and_s + !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); + + list.ids[0] = mptcp_endp_get_local_id(msk, addr); +- if (remove_subflow) { +- spin_lock_bh(&msk->pm.lock); +- mptcp_pm_nl_rm_subflow_received(msk, &list); +- spin_unlock_bh(&msk->pm.lock); +- } + +- if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { +- spin_lock_bh(&msk->pm.lock); ++ spin_lock_bh(&msk->pm.lock); ++ if (remove_subflow) ++ mptcp_pm_nl_rm_subflow_received(msk, &list); ++ if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + __mark_subflow_endp_available(msk, list.ids[0]); +- spin_unlock_bh(&msk->pm.lock); +- } ++ else /* mark endp ID as available, e.g. Signal or MPC endp */ ++ __set_bit(addr->id, msk->pm.id_avail_bitmap); ++ spin_unlock_bh(&msk->pm.lock); + + if (msk->mpc_endpoint_id == entry->addr.id) + msk->mpc_endpoint_id = 0; diff --git a/queue-6.12/net-dsa-properly-keep-track-of-conduit-reference.patch b/queue-6.12/net-dsa-properly-keep-track-of-conduit-reference.patch new file mode 100644 index 0000000000..b99b4ed342 --- /dev/null +++ b/queue-6.12/net-dsa-properly-keep-track-of-conduit-reference.patch @@ -0,0 +1,292 @@ +From stable+bounces-222792-greg=kroah.com@vger.kernel.org Tue Mar 3 06:52:34 2026 +From: Robert Garcia +Date: Tue, 3 Mar 2026 13:51:20 +0800 +Subject: net: dsa: properly keep track of conduit reference +To: stable@vger.kernel.org, Vladimir Oltean +Cc: Paolo Abeni , Ma Ke , Robert Garcia , Jonas Gorski , "David S . Miller" , Eric Dumazet , Jakub Kicinski , Simon Horman , Russell King , Florian Fainelli , netdev@vger.kernel.org, linux-kernel@vger.kernel.org +Message-ID: <20260303055120.2111614-1-rob_garcia@163.com> + +From: Vladimir Oltean + +[ Upstream commit 06e219f6a706c367c93051f408ac61417643d2f9 ] + +Problem description +------------------- + +DSA has a mumbo-jumbo of reference handling of the conduit net device +and its kobject which, sadly, is just wrong and doesn't make sense. + +There are two distinct problems. + +1. The OF path, which uses of_find_net_device_by_node(), never releases + the elevated refcount on the conduit's kobject. Nominally, the OF and + non-OF paths should result in objects having identical reference + counts taken, and it is already suspicious that + dsa_dev_to_net_device() has a put_device() call which is missing in + dsa_port_parse_of(), but we can actually even verify that an issue + exists. With CONFIG_DEBUG_KOBJECT_RELEASE=y, if we run this command + "before" and "after" applying this patch: + +(unbind the conduit driver for net device eno2) +echo 0000:00:00.2 > /sys/bus/pci/drivers/fsl_enetc/unbind + +we see these lines in the output diff which appear only with the patch +applied: + +kobject: 'eno2' (ffff002009a3a6b8): kobject_release, parent 0000000000000000 (delayed 1000) +kobject: '109' (ffff0020099d59a0): kobject_release, parent 0000000000000000 (delayed 1000) + +2. After we find the conduit interface one way (OF) or another (non-OF), + it can get unregistered at any time, and DSA remains with a long-lived, + but in this case stale, cpu_dp->conduit pointer. Holding the net + device's underlying kobject isn't actually of much help, it just + prevents it from being freed (but we never need that kobject + directly). What helps us to prevent the net device from being + unregistered is the parallel netdev reference mechanism (dev_hold() + and dev_put()). + +Actually we actually use that netdev tracker mechanism implicitly on +user ports since commit 2f1e8ea726e9 ("net: dsa: link interfaces with +the DSA master to get rid of lockdep warnings"), via netdev_upper_dev_link(). +But time still passes at DSA switch probe time between the initial +of_find_net_device_by_node() code and the user port creation time, time +during which the conduit could unregister itself and DSA wouldn't know +about it. + +So we have to run of_find_net_device_by_node() under rtnl_lock() to +prevent that from happening, and release the lock only with the netdev +tracker having acquired the reference. + +Do we need to keep the reference until dsa_unregister_switch() / +dsa_switch_shutdown()? +1: Maybe yes. A switch device will still be registered even if all user + ports failed to probe, see commit 86f8b1c01a0a ("net: dsa: Do not + make user port errors fatal"), and the cpu_dp->conduit pointers + remain valid. I haven't audited all call paths to see whether they + will actually use the conduit in lack of any user port, but if they + do, it seems safer to not rely on user ports for that reference. +2. Definitely yes. We support changing the conduit which a user port is + associated to, and we can get into a situation where we've moved all + user ports away from a conduit, thus no longer hold any reference to + it via the net device tracker. But we shouldn't let it go nonetheless + - see the next change in relation to dsa_tree_find_first_conduit() + and LAG conduits which disappear. + We have to be prepared to return to the physical conduit, so the CPU + port must explicitly keep another reference to it. This is also to + say: the user ports and their CPU ports may not always keep a + reference to the same conduit net device, and both are needed. + +As for the conduit's kobject for the /sys/class/net/ entry, we don't +care about it, we can release it as soon as we hold the net device +object itself. + +History and blame attribution +----------------------------- + +The code has been refactored so many times, it is very difficult to +follow and properly attribute a blame, but I'll try to make a short +history which I hope to be correct. + +We have two distinct probing paths: +- one for OF, introduced in 2016 in commit 83c0afaec7b7 ("net: dsa: Add + new binding implementation") +- one for non-OF, introduced in 2017 in commit 71e0bbde0d88 ("net: dsa: + Add support for platform data") + +These are both complete rewrites of the original probing paths (which +used struct dsa_switch_driver and other weird stuff, instead of regular +devices on their respective buses for register access, like MDIO, SPI, +I2C etc): +- one for OF, introduced in 2013 in commit 5e95329b701c ("dsa: add + device tree bindings to register DSA switches") +- one for non-OF, introduced in 2008 in commit 91da11f870f0 ("net: + Distributed Switch Architecture protocol support") + +except for tiny bits and pieces like dsa_dev_to_net_device() which were +seemingly carried over since the original commit, and used to this day. + +The point is that the original probing paths received a fix in 2015 in +the form of commit 679fb46c5785 ("net: dsa: Add missing master netdev +dev_put() calls"), but the fix never made it into the "new" (dsa2) +probing paths that can still be traced to today, and the fixed probing +path was later deleted in 2019 in commit 93e86b3bc842 ("net: dsa: Remove +legacy probing support"). + +That is to say, the new probing paths were never quite correct in this +area. + +The existence of the legacy probing support which was deleted in 2019 +explains why dsa_dev_to_net_device() returns a conduit with elevated +refcount (because it was supposed to be released during +dsa_remove_dst()). After the removal of the legacy code, the only user +of dsa_dev_to_net_device() calls dev_put(conduit) immediately after this +function returns. This pattern makes no sense today, and can only be +interpreted historically to understand why dev_hold() was there in the +first place. + +Change details +-------------- + +Today we have a better netdev tracking infrastructure which we should +use. Logically netdev_hold() belongs in common code +(dsa_port_parse_cpu(), where dp->conduit is assigned), but there is a +tradeoff to be made with the rtnl_lock() section which would become a +bit too long if we did that - dsa_port_parse_cpu() also calls +request_module(). So we duplicate a bit of logic in order for the +callers of dsa_port_parse_cpu() to be the ones responsible of holding +the conduit reference and releasing it on error. This shortens the +rtnl_lock() section significantly. + +In the dsa_switch_probe() error path, dsa_switch_release_ports() will be +called in a number of situations, one being where dsa_port_parse_cpu() +maybe didn't get the chance to run at all (a different port failed +earlier, etc). So we have to test for the conduit being NULL prior to +calling netdev_put(). + +There have still been so many transformations to the code since the +blamed commits (rename master -> conduit, commit 0650bf52b31f ("net: +dsa: be compatible with masters which unregister on shutdown")), that it +only makes sense to fix the code using the best methods available today +and see how it can be backported to stable later. I suspect the fix +cannot even be backported to kernels which lack dsa_switch_shutdown(), +and I suspect this is also maybe why the long-lived conduit reference +didn't make it into the new DSA probing paths at the time (problems +during shutdown). + +Because dsa_dev_to_net_device() has a single call site and has to be +changed anyway, the logic was just absorbed into the non-OF +dsa_port_parse(). + +Tested on the ocelot/felix switch and on dsa_loop, both on the NXP +LS1028A with CONFIG_DEBUG_KOBJECT_RELEASE=y. + +Reported-by: Ma Ke +Closes: https://lore.kernel.org/netdev/20251214131204.4684-1-make24@iscas.ac.cn/ +Fixes: 83c0afaec7b7 ("net: dsa: Add new binding implementation") +Fixes: 71e0bbde0d88 ("net: dsa: Add support for platform data") +Reviewed-by: Jonas Gorski +Signed-off-by: Vladimir Oltean +Link: https://patch.msgid.link/20251215150236.3931670-1-vladimir.oltean@nxp.com +Signed-off-by: Paolo Abeni +Signed-off-by: Robert Garcia +Signed-off-by: Greg Kroah-Hartman +--- + include/net/dsa.h | 1 + net/dsa/dsa.c | 59 +++++++++++++++++++++++++++++++----------------------- + 2 files changed, 35 insertions(+), 25 deletions(-) + +--- a/include/net/dsa.h ++++ b/include/net/dsa.h +@@ -296,6 +296,7 @@ struct dsa_port { + struct devlink_port devlink_port; + struct phylink *pl; + struct phylink_config pl_config; ++ netdevice_tracker conduit_tracker; + struct dsa_lag *lag; + struct net_device *hsr_dev; + +--- a/net/dsa/dsa.c ++++ b/net/dsa/dsa.c +@@ -1246,14 +1246,25 @@ static int dsa_port_parse_of(struct dsa_ + if (ethernet) { + struct net_device *conduit; + const char *user_protocol; ++ int err; + ++ rtnl_lock(); + conduit = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); +- if (!conduit) ++ if (!conduit) { ++ rtnl_unlock(); + return -EPROBE_DEFER; ++ } ++ ++ netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL); ++ put_device(&conduit->dev); ++ rtnl_unlock(); + + user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); +- return dsa_port_parse_cpu(dp, conduit, user_protocol); ++ err = dsa_port_parse_cpu(dp, conduit, user_protocol); ++ if (err) ++ netdev_put(conduit, &dp->conduit_tracker); ++ return err; + } + + if (link) +@@ -1386,37 +1397,30 @@ static struct device *dev_find_class(str + return device_find_child(parent, class, dev_is_class); + } + +-static struct net_device *dsa_dev_to_net_device(struct device *dev) +-{ +- struct device *d; +- +- d = dev_find_class(dev, "net"); +- if (d != NULL) { +- struct net_device *nd; +- +- nd = to_net_dev(d); +- dev_hold(nd); +- put_device(d); +- +- return nd; +- } +- +- return NULL; +-} +- + static int dsa_port_parse(struct dsa_port *dp, const char *name, + struct device *dev) + { + if (!strcmp(name, "cpu")) { + struct net_device *conduit; ++ struct device *d; ++ int err; + +- conduit = dsa_dev_to_net_device(dev); +- if (!conduit) ++ rtnl_lock(); ++ d = dev_find_class(dev, "net"); ++ if (!d) { ++ rtnl_unlock(); + return -EPROBE_DEFER; ++ } + +- dev_put(conduit); ++ conduit = to_net_dev(d); ++ netdev_hold(conduit, &dp->conduit_tracker, GFP_KERNEL); ++ put_device(d); ++ rtnl_unlock(); + +- return dsa_port_parse_cpu(dp, conduit, NULL); ++ err = dsa_port_parse_cpu(dp, conduit, NULL); ++ if (err) ++ netdev_put(conduit, &dp->conduit_tracker); ++ return err; + } + + if (!strcmp(name, "dsa")) +@@ -1484,6 +1488,9 @@ static void dsa_switch_release_ports(str + struct dsa_vlan *v, *n; + + dsa_switch_for_each_port_safe(dp, next, ds) { ++ if (dsa_port_is_cpu(dp) && dp->conduit) ++ netdev_put(dp->conduit, &dp->conduit_tracker); ++ + /* These are either entries that upper layers lost track of + * (probably due to bugs), or installed through interfaces + * where one does not necessarily have to remove them, like +@@ -1636,8 +1643,10 @@ void dsa_switch_shutdown(struct dsa_swit + /* Disconnect from further netdevice notifiers on the conduit, + * since netdev_uses_dsa() will now return false. + */ +- dsa_switch_for_each_cpu_port(dp, ds) ++ dsa_switch_for_each_cpu_port(dp, ds) { + dp->conduit->dsa_ptr = NULL; ++ netdev_put(dp->conduit, &dp->conduit_tracker); ++ } + + rtnl_unlock(); + out: diff --git a/queue-6.12/net-stmmac-remove-support-for-lpi_intr_o.patch b/queue-6.12/net-stmmac-remove-support-for-lpi_intr_o.patch new file mode 100644 index 0000000000..46063651f2 --- /dev/null +++ b/queue-6.12/net-stmmac-remove-support-for-lpi_intr_o.patch @@ -0,0 +1,214 @@ +From stable+bounces-223353-greg=kroah.com@vger.kernel.org Fri Mar 6 16:07:30 2026 +From: Ovidiu Panait +Date: Fri, 6 Mar 2026 15:06:20 +0000 +Subject: net: stmmac: remove support for lpi_intr_o +To: stable@vger.kernel.org +Cc: "Russell King (Oracle)" , Ovidiu Panait , Jakub Kicinski +Message-ID: <20260306150621.23751-2-ovidiu.panait.rb@renesas.com> + +From: "Russell King (Oracle)" + +commit 14eb64db8ff07b58a35b98375f446d9e20765674 upstream. + +The dwmac databook for v3.74a states that lpi_intr_o is a sideband +signal which should be used to ungate the application clock, and this +signal is synchronous to the receive clock. The receive clock can run +at 2.5, 25 or 125MHz depending on the media speed, and can stop under +the control of the link partner. This means that the time it takes to +clear is dependent on the negotiated media speed, and thus can be 8, +40, or 400ns after reading the LPI control and status register. + +It has been observed with some aggressive link partners, this clock +can stop while lpi_intr_o is still asserted, meaning that the signal +remains asserted for an indefinite period that the local system has +no direct control over. + +The LPI interrupts will still be signalled through the main interrupt +path in any case, and this path is not dependent on the receive clock. + +This, since we do not gate the application clock, and the chances of +adding clock gating in the future are slim due to the clocks being +ill-defined, lpi_intr_o serves no useful purpose. Remove the code which +requests the interrupt, and all associated code. + +Reported-by: Ovidiu Panait +Tested-by: Ovidiu Panait # Renesas RZ/V2H board +Signed-off-by: Russell King (Oracle) +Link: https://patch.msgid.link/E1vnJbt-00000007YYN-28nm@rmk-PC.armlinux.org.uk +Signed-off-by: Jakub Kicinski +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/common.h | 1 + drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 -- + drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 7 --- + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 - + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 36 ------------------ + drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 ---- + include/linux/stmmac.h | 1 + 7 files changed, 59 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/common.h ++++ b/drivers/net/ethernet/stmicro/stmmac/common.h +@@ -374,7 +374,6 @@ enum request_irq_err { + REQ_IRQ_ERR_SFTY, + REQ_IRQ_ERR_SFTY_UE, + REQ_IRQ_ERR_SFTY_CE, +- REQ_IRQ_ERR_LPI, + REQ_IRQ_ERR_WOL, + REQ_IRQ_ERR_MAC, + REQ_IRQ_ERR_NO, +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +@@ -618,7 +618,6 @@ static int intel_mgbe_common_data(struct + + /* Setup MSI vector offset specific to Intel mGbE controller */ + plat->msi_mac_vec = 29; +- plat->msi_lpi_vec = 28; + plat->msi_sfty_ce_vec = 27; + plat->msi_sfty_ue_vec = 26; + plat->msi_rx_base_vec = 0; +@@ -1004,8 +1003,6 @@ static int stmmac_config_multi_msi(struc + res->irq = pci_irq_vector(pdev, plat->msi_mac_vec); + if (plat->msi_wol_vec < STMMAC_MSI_VEC_MAX) + res->wol_irq = pci_irq_vector(pdev, plat->msi_wol_vec); +- if (plat->msi_lpi_vec < STMMAC_MSI_VEC_MAX) +- res->lpi_irq = pci_irq_vector(pdev, plat->msi_lpi_vec); + if (plat->msi_sfty_ce_vec < STMMAC_MSI_VEC_MAX) + res->sfty_ce_irq = pci_irq_vector(pdev, plat->msi_sfty_ce_vec); + if (plat->msi_sfty_ue_vec < STMMAC_MSI_VEC_MAX) +@@ -1087,7 +1084,6 @@ static int intel_eth_pci_probe(struct pc + */ + plat->msi_mac_vec = STMMAC_MSI_VEC_MAX; + plat->msi_wol_vec = STMMAC_MSI_VEC_MAX; +- plat->msi_lpi_vec = STMMAC_MSI_VEC_MAX; + plat->msi_sfty_ce_vec = STMMAC_MSI_VEC_MAX; + plat->msi_sfty_ue_vec = STMMAC_MSI_VEC_MAX; + plat->msi_rx_base_vec = STMMAC_MSI_VEC_MAX; +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +@@ -476,13 +476,6 @@ static int loongson_dwmac_dt_config(stru + res->wol_irq = res->irq; + } + +- res->lpi_irq = of_irq_get_byname(np, "eth_lpi"); +- if (res->lpi_irq < 0) { +- dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); +- ret = -ENODEV; +- goto err_put_node; +- } +- + ret = device_get_phy_mode(&pdev->dev); + if (ret < 0) { + dev_err(&pdev->dev, "phy_mode not found\n"); +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h +@@ -29,7 +29,6 @@ struct stmmac_resources { + void __iomem *addr; + u8 mac[ETH_ALEN]; + int wol_irq; +- int lpi_irq; + int irq; + int sfty_irq; + int sfty_ce_irq; +@@ -314,7 +313,6 @@ struct stmmac_priv { + bool wol_irq_disabled; + int clk_csr; + struct timer_list eee_ctrl_timer; +- int lpi_irq; + int eee_enabled; + int eee_active; + int tx_lpi_timer; +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -3580,10 +3580,6 @@ static void stmmac_free_irq(struct net_d + free_irq(priv->sfty_ce_irq, dev); + fallthrough; + case REQ_IRQ_ERR_SFTY_CE: +- if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) +- free_irq(priv->lpi_irq, dev); +- fallthrough; +- case REQ_IRQ_ERR_LPI: + if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) + free_irq(priv->wol_irq, dev); + fallthrough; +@@ -3642,24 +3638,6 @@ static int stmmac_request_irq_multi_msi( + } + } + +- /* Request the LPI IRQ in case of another line +- * is used for LPI +- */ +- if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) { +- int_name = priv->int_name_lpi; +- sprintf(int_name, "%s:%s", dev->name, "lpi"); +- ret = request_irq(priv->lpi_irq, +- stmmac_mac_interrupt, +- 0, int_name, dev); +- if (unlikely(ret < 0)) { +- netdev_err(priv->dev, +- "%s: alloc lpi MSI %d (error: %d)\n", +- __func__, priv->lpi_irq, ret); +- irq_err = REQ_IRQ_ERR_LPI; +- goto irq_error; +- } +- } +- + /* Request the common Safety Feature Correctible/Uncorrectible + * Error line in case of another line is used + */ +@@ -3800,19 +3778,6 @@ static int stmmac_request_irq_single(str + } + } + +- /* Request the IRQ lines */ +- if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) { +- ret = request_irq(priv->lpi_irq, stmmac_interrupt, +- IRQF_SHARED, dev->name, dev); +- if (unlikely(ret < 0)) { +- netdev_err(priv->dev, +- "%s: ERROR: allocating the LPI IRQ %d (%d)\n", +- __func__, priv->lpi_irq, ret); +- irq_err = REQ_IRQ_ERR_LPI; +- goto irq_error; +- } +- } +- + /* Request the common Safety Feature Correctible/Uncorrectible + * Error line in case of another line is used + */ +@@ -7576,7 +7541,6 @@ int stmmac_dvr_probe(struct device *devi + + priv->dev->irq = res->irq; + priv->wol_irq = res->wol_irq; +- priv->lpi_irq = res->lpi_irq; + priv->sfty_irq = res->sfty_irq; + priv->sfty_ce_irq = res->sfty_ce_irq; + priv->sfty_ue_irq = res->sfty_ue_irq; +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +@@ -733,14 +733,6 @@ int stmmac_get_platform_resources(struct + stmmac_res->wol_irq = stmmac_res->irq; + } + +- stmmac_res->lpi_irq = +- platform_get_irq_byname_optional(pdev, "eth_lpi"); +- if (stmmac_res->lpi_irq < 0) { +- if (stmmac_res->lpi_irq == -EPROBE_DEFER) +- return -EPROBE_DEFER; +- dev_info(&pdev->dev, "IRQ eth_lpi not found\n"); +- } +- + stmmac_res->sfty_irq = + platform_get_irq_byname_optional(pdev, "sfty"); + if (stmmac_res->sfty_irq < 0) { +--- a/include/linux/stmmac.h ++++ b/include/linux/stmmac.h +@@ -268,7 +268,6 @@ struct plat_stmmacenet_data { + int int_snapshot_num; + int msi_mac_vec; + int msi_wol_vec; +- int msi_lpi_vec; + int msi_sfty_ce_vec; + int msi_sfty_ue_vec; + int msi_rx_base_vec; diff --git a/queue-6.12/s390-stackleak-fix-__stackleak_poison-inline-assembly-constraint.patch b/queue-6.12/s390-stackleak-fix-__stackleak_poison-inline-assembly-constraint.patch new file mode 100644 index 0000000000..77641a7e9f --- /dev/null +++ b/queue-6.12/s390-stackleak-fix-__stackleak_poison-inline-assembly-constraint.patch @@ -0,0 +1,42 @@ +From 674c5ff0f440a051ebf299d29a4c013133d81a65 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Mon, 2 Mar 2026 14:35:00 +0100 +Subject: s390/stackleak: Fix __stackleak_poison() inline assembly constraint + +From: Heiko Carstens + +commit 674c5ff0f440a051ebf299d29a4c013133d81a65 upstream. + +The __stackleak_poison() inline assembly comes with a "count" operand where +the "d" constraint is used. "count" is used with the exrl instruction and +"d" means that the compiler may allocate any register from 0 to 15. + +If the compiler would allocate register 0 then the exrl instruction would +not or the value of "count" into the executed instruction - resulting in a +stackframe which is only partially poisoned. + +Use the correct "a" constraint, which excludes register 0 from register +allocation. + +Fixes: 2a405f6bb3a5 ("s390/stackleak: provide fast __stackleak_poison() implementation") +Cc: stable@vger.kernel.org +Signed-off-by: Heiko Carstens +Reviewed-by: Vasily Gorbik +Link: https://lore.kernel.org/r/20260302133500.1560531-4-hca@linux.ibm.com +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/include/asm/processor.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/s390/include/asm/processor.h ++++ b/arch/s390/include/asm/processor.h +@@ -168,7 +168,7 @@ static __always_inline void __stackleak_ + " j 4f\n" + "3: mvc 8(1,%[addr]),0(%[addr])\n" + "4:\n" +- : [addr] "+&a" (erase_low), [count] "+&d" (count), [tmp] "=&a" (tmp) ++ : [addr] "+&a" (erase_low), [count] "+&a" (count), [tmp] "=&a" (tmp) + : [poison] "d" (poison) + : "memory", "cc" + ); diff --git a/queue-6.12/s390-xor-fix-xor_xc_2-inline-assembly-constraints.patch b/queue-6.12/s390-xor-fix-xor_xc_2-inline-assembly-constraints.patch new file mode 100644 index 0000000000..41dcf5e7da --- /dev/null +++ b/queue-6.12/s390-xor-fix-xor_xc_2-inline-assembly-constraints.patch @@ -0,0 +1,39 @@ +From f775276edc0c505dc0f782773796c189f31a1123 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Mon, 2 Mar 2026 14:34:58 +0100 +Subject: s390/xor: Fix xor_xc_2() inline assembly constraints + +From: Heiko Carstens + +commit f775276edc0c505dc0f782773796c189f31a1123 upstream. + +The inline assembly constraints for xor_xc_2() are incorrect. "bytes", +"p1", and "p2" are input operands, while all three of them are modified +within the inline assembly. Given that the function consists only of this +inline assembly it seems unlikely that this may cause any problems, however +fix this in any case. + +Fixes: 2cfc5f9ce7f5 ("s390/xor: optimized xor routing using the XC instruction") +Cc: stable@vger.kernel.org +Signed-off-by: Heiko Carstens +Reviewed-by: Vasily Gorbik +Link: https://lore.kernel.org/r/20260302133500.1560531-2-hca@linux.ibm.com +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/lib/xor.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/s390/lib/xor.c ++++ b/arch/s390/lib/xor.c +@@ -29,8 +29,8 @@ static void xor_xc_2(unsigned long bytes + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + "3:\n" +- : : "d" (bytes), "a" (p1), "a" (p2) +- : "0", "1", "cc", "memory"); ++ : "+d" (bytes), "+a" (p1), "+a" (p2) ++ : : "0", "1", "cc", "memory"); + } + + static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, diff --git a/queue-6.12/sched-fair-fix-zero_vruntime-tracking.patch b/queue-6.12/sched-fair-fix-zero_vruntime-tracking.patch new file mode 100644 index 0000000000..26d4683de2 --- /dev/null +++ b/queue-6.12/sched-fair-fix-zero_vruntime-tracking.patch @@ -0,0 +1,215 @@ +From b3d99f43c72b56cf7a104a364e7fb34b0702828b Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Mon, 9 Feb 2026 15:28:16 +0100 +Subject: sched/fair: Fix zero_vruntime tracking + +From: Peter Zijlstra + +commit b3d99f43c72b56cf7a104a364e7fb34b0702828b upstream. + +It turns out that zero_vruntime tracking is broken when there is but a single +task running. Current update paths are through __{en,de}queue_entity(), and +when there is but a single task, pick_next_task() will always return that one +task, and put_prev_set_next_task() will end up in neither function. + +This can cause entity_key() to grow indefinitely large and cause overflows, +leading to much pain and suffering. + +Furtermore, doing update_zero_vruntime() from __{de,en}queue_entity(), which +are called from {set_next,put_prev}_entity() has problems because: + + - set_next_entity() calls __dequeue_entity() before it does cfs_rq->curr = se. + This means the avg_vruntime() will see the removal but not current, missing + the entity for accounting. + + - put_prev_entity() calls __enqueue_entity() before it does cfs_rq->curr = + NULL. This means the avg_vruntime() will see the addition *and* current, + leading to double accounting. + +Both cases are incorrect/inconsistent. + +Noting that avg_vruntime is already called on each {en,de}queue, remove the +explicit avg_vruntime() calls (which removes an extra 64bit division for each +{en,de}queue) and have avg_vruntime() update zero_vruntime itself. + +Additionally, have the tick call avg_vruntime() -- discarding the result, but +for the side-effect of updating zero_vruntime. + +While there, optimize avg_vruntime() by noting that the average of one value is +rather trivial to compute. + +Test case: + # taskset -c -p 1 $$ + # taskset -c 2 bash -c 'while :; do :; done&' + # cat /sys/kernel/debug/sched/debug | awk '/^cpu#/ {P=0} /^cpu#2,/ {P=1} {if (P) print $0}' | grep -e zero_vruntime -e "^>" + +PRE: + .zero_vruntime : 31316.407903 + >R bash 487 50787.345112 E 50789.145972 2.800000 50780.298364 16 120 0.000000 0.000000 0.000000 / + .zero_vruntime : 382548.253179 + >R bash 487 427275.204288 E 427276.003584 2.800000 427268.157540 23 120 0.000000 0.000000 0.000000 / + +POST: + .zero_vruntime : 17259.709467 + >R bash 526 17259.709467 E 17262.509467 2.800000 16915.031624 9 120 0.000000 0.000000 0.000000 / + .zero_vruntime : 18702.723356 + >R bash 526 18702.723356 E 18705.523356 2.800000 18358.045513 9 120 0.000000 0.000000 0.000000 / + +Fixes: 79f3f9bedd14 ("sched/eevdf: Fix min_vruntime vs avg_vruntime") +Reported-by: K Prateek Nayak +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: K Prateek Nayak +Tested-by: Shubhang Kaushik +Link: https://patch.msgid.link/20260219080624.438854780%40infradead.org +Tested-by: Eric Hagberg +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 84 +++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 57 insertions(+), 27 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -551,6 +551,21 @@ static inline bool entity_before(const s + return (s64)(a->deadline - b->deadline) < 0; + } + ++/* ++ * Per avg_vruntime() below, cfs_rq::zero_vruntime is only slightly stale ++ * and this value should be no more than two lag bounds. Which puts it in the ++ * general order of: ++ * ++ * (slice + TICK_NSEC) << NICE_0_LOAD_SHIFT ++ * ++ * which is around 44 bits in size (on 64bit); that is 20 for ++ * NICE_0_LOAD_SHIFT, another 20 for NSEC_PER_MSEC and then a handful for ++ * however many msec the actual slice+tick ends up begin. ++ * ++ * (disregarding the actual divide-by-weight part makes for the worst case ++ * weight of 2, which nicely cancels vs the fuzz in zero_vruntime not actually ++ * being the zero-lag point). ++ */ + static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) + { + return (s64)(se->vruntime - cfs_rq->zero_vruntime); +@@ -638,39 +653,61 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, + } + + static inline +-void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) ++void update_zero_vruntime(struct cfs_rq *cfs_rq, s64 delta) + { + /* +- * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load ++ * v' = v + d ==> avg_vruntime' = avg_vruntime - d*avg_load + */ + cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta; ++ cfs_rq->zero_vruntime += delta; + } + + /* +- * Specifically: avg_runtime() + 0 must result in entity_eligible() := true ++ * Specifically: avg_vruntime() + 0 must result in entity_eligible() := true + * For this to be so, the result of this function must have a left bias. ++ * ++ * Called in: ++ * - place_entity() -- before enqueue ++ * - update_entity_lag() -- before dequeue ++ * - entity_tick() ++ * ++ * This means it is one entry 'behind' but that puts it close enough to where ++ * the bound on entity_key() is at most two lag bounds. + */ + u64 avg_vruntime(struct cfs_rq *cfs_rq) + { + struct sched_entity *curr = cfs_rq->curr; +- s64 avg = cfs_rq->avg_vruntime; +- long load = cfs_rq->avg_load; ++ long weight = cfs_rq->avg_load; ++ s64 delta = 0; + +- if (curr && curr->on_rq) { +- unsigned long weight = scale_load_down(curr->load.weight); ++ if (curr && !curr->on_rq) ++ curr = NULL; + +- avg += entity_key(cfs_rq, curr) * weight; +- load += weight; +- } ++ if (weight) { ++ s64 runtime = cfs_rq->avg_vruntime; ++ ++ if (curr) { ++ unsigned long w = scale_load_down(curr->load.weight); ++ ++ runtime += entity_key(cfs_rq, curr) * w; ++ weight += w; ++ } + +- if (load) { + /* sign flips effective floor / ceiling */ +- if (avg < 0) +- avg -= (load - 1); +- avg = div_s64(avg, load); ++ if (runtime < 0) ++ runtime -= (weight - 1); ++ ++ delta = div_s64(runtime, weight); ++ } else if (curr) { ++ /* ++ * When there is but one element, it is the average. ++ */ ++ delta = curr->vruntime - cfs_rq->zero_vruntime; + } + +- return cfs_rq->zero_vruntime + avg; ++ update_zero_vruntime(cfs_rq, delta); ++ ++ return cfs_rq->zero_vruntime; + } + + /* +@@ -744,16 +781,6 @@ int entity_eligible(struct cfs_rq *cfs_r + return vruntime_eligible(cfs_rq, se->vruntime); + } + +-static void update_zero_vruntime(struct cfs_rq *cfs_rq) +-{ +- u64 vruntime = avg_vruntime(cfs_rq); +- s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime); +- +- avg_vruntime_update(cfs_rq, delta); +- +- cfs_rq->zero_vruntime = vruntime; +-} +- + static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq) + { + struct sched_entity *root = __pick_root_entity(cfs_rq); +@@ -824,7 +851,6 @@ RB_DECLARE_CALLBACKS(static, min_vruntim + static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) + { + avg_vruntime_add(cfs_rq, se); +- update_zero_vruntime(cfs_rq); + se->min_vruntime = se->vruntime; + se->min_slice = se->slice; + rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, +@@ -836,7 +862,6 @@ static void __dequeue_entity(struct cfs_ + rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, + &min_vruntime_cb); + avg_vruntime_sub(cfs_rq, se); +- update_zero_vruntime(cfs_rq); + } + + struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq) +@@ -5700,6 +5725,11 @@ entity_tick(struct cfs_rq *cfs_rq, struc + update_load_avg(cfs_rq, curr, UPDATE_TG); + update_cfs_group(curr); + ++ /* ++ * Pulls along cfs_rq::zero_vruntime. ++ */ ++ avg_vruntime(cfs_rq); ++ + #ifdef CONFIG_SCHED_HRTICK + /* + * queued ticks are scheduled to match the slice, so don't bother diff --git a/queue-6.12/sched_ext-remove-redundant-css_put-in-scx_cgroup_init.patch b/queue-6.12/sched_ext-remove-redundant-css_put-in-scx_cgroup_init.patch new file mode 100644 index 0000000000..734d52d19d --- /dev/null +++ b/queue-6.12/sched_ext-remove-redundant-css_put-in-scx_cgroup_init.patch @@ -0,0 +1,42 @@ +From 1336b579f6079fb8520be03624fcd9ba443c930b Mon Sep 17 00:00:00 2001 +From: Cheng-Yang Chou +Date: Tue, 3 Mar 2026 22:35:30 +0800 +Subject: sched_ext: Remove redundant css_put() in scx_cgroup_init() + +From: Cheng-Yang Chou + +commit 1336b579f6079fb8520be03624fcd9ba443c930b upstream. + +The iterator css_for_each_descendant_pre() walks the cgroup hierarchy +under cgroup_lock(). It does not increment the reference counts on +yielded css structs. + +According to the cgroup documentation, css_put() should only be used +to release a reference obtained via css_get() or css_tryget_online(). +Since the iterator does not use either of these to acquire a reference, +calling css_put() in the error path of scx_cgroup_init() causes a +refcount underflow. + +Remove the unbalanced css_put() to prevent a potential Use-After-Free +(UAF) vulnerability. + +Fixes: 819513666966 ("sched_ext: Add cgroup support") +Cc: stable@vger.kernel.org # v6.12+ +Signed-off-by: Cheng-Yang Chou +Reviewed-by: Andrea Righi +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/ext.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -4319,7 +4319,6 @@ static int scx_cgroup_init(void) + ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, cgroup_init, + css->cgroup, &args); + if (ret) { +- css_put(css); + scx_ops_error("ops.cgroup_init() failed (%d)", ret); + return ret; + } diff --git a/queue-6.12/series b/queue-6.12/series index 65b80b411c..8c78f94185 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -281,3 +281,23 @@ octeontx2-af-add-proper-checks-for-fwdata.patch io_uring-uring_cmd-fix-too-strict-requirement-on-ioctl.patch x86-uprobes-fix-xol-allocation-failure-for-32-bit-tasks.patch platform-x86-amd-pmc-add-support-for-van-gogh-soc.patch +mptcp-pm-in-kernel-always-set-id-as-avail-when-rm-endp.patch +net-stmmac-remove-support-for-lpi_intr_o.patch +f2fs-compress-change-the-first-parameter-of-page_array_-alloc-free-to-sbi.patch +f2fs-compress-fix-uaf-of-f2fs_inode_info-in-f2fs_free_dic.patch +f2fs-fix-to-avoid-migrating-empty-section.patch +blk-throttle-fix-access-race-during-throttle-policy-activation.patch +dmaengine-mmp_pdma-fix-race-condition-in-mmp_pdma_residue.patch +media-i2c-ov5647-use-our-own-mutex-for-the-ctrl-lock.patch +net-dsa-properly-keep-track-of-conduit-reference.patch +binfmt_misc-restore-write-access-before-closing-files-opened-by-open_exec.patch +xfs-get-rid-of-the-xchk_xfile_-_descr-calls.patch +erofs-fix-inline-data-read-failure-for-ztailpacking-pclusters.patch +mm-thp-deny-thp-for-files-on-anonymous-inodes.patch +sched_ext-remove-redundant-css_put-in-scx_cgroup_init.patch +io_uring-kbuf-check-if-target-buffer-list-is-still-legacy-on-recycle.patch +sched-fair-fix-zero_vruntime-tracking.patch +s390-stackleak-fix-__stackleak_poison-inline-assembly-constraint.patch +s390-xor-fix-xor_xc_2-inline-assembly-constraints.patch +drm-i915-alpm-alpm-disable-fixes.patch +drm-i915-psr-repeat-selective-update-area-alignment.patch diff --git a/queue-6.12/xfs-get-rid-of-the-xchk_xfile_-_descr-calls.patch b/queue-6.12/xfs-get-rid-of-the-xchk_xfile_-_descr-calls.patch new file mode 100644 index 0000000000..b7643c197d --- /dev/null +++ b/queue-6.12/xfs-get-rid-of-the-xchk_xfile_-_descr-calls.patch @@ -0,0 +1,536 @@ +From stable+bounces-224587-greg=kroah.com@vger.kernel.org Tue Mar 10 22:48:01 2026 +From: "Darrick J. Wong" +Date: Tue, 10 Mar 2026 14:47:50 -0700 +Subject: xfs: get rid of the xchk_xfile_*_descr calls +To: Sasha Levin +Cc: stable@vger.kernel.org, r772577952@gmail.com, Christoph Hellwig , linux-xfs@vger.kernel.org +Message-ID: <20260310214750.GB6023@frogsfrogsfrogs> +Content-Disposition: inline + +From: Darrick J. Wong + +[ Upstream commit 60382993a2e18041f88c7969f567f168cd3b4de3 ] + +The xchk_xfile_*_descr macros call kasprintf, which can fail to allocate +memory if the formatted string is larger than 16 bytes (or whatever the +nofail guarantees are nowadays). Some of them could easily exceed that, +and Jiaming Zhang found a few places where that can happen with syzbot. + +The descriptions are debugging aids and aren't required to be unique, so +let's just pass in static strings and eliminate this path to failure. +Note this patch touches a number of commits, most of which were merged +between 6.6 and 6.14. + +Cc: r772577952@gmail.com +Cc: # v6.12 +Fixes: ab97f4b1c03075 ("xfs: repair AGI unlinked inode bucket lists") +Signed-off-by: "Darrick J. Wong" +Reviewed-by: Christoph Hellwig +Tested-by: Jiaming Zhang +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/scrub/agheader_repair.c | 13 ++++--------- + fs/xfs/scrub/alloc_repair.c | 5 +---- + fs/xfs/scrub/attr_repair.c | 20 +++++--------------- + fs/xfs/scrub/bmap_repair.c | 6 +----- + fs/xfs/scrub/common.h | 18 ------------------ + fs/xfs/scrub/dir.c | 13 ++++--------- + fs/xfs/scrub/dir_repair.c | 11 +++-------- + fs/xfs/scrub/dirtree.c | 11 +++-------- + fs/xfs/scrub/ialloc_repair.c | 5 +---- + fs/xfs/scrub/nlinks.c | 6 ++---- + fs/xfs/scrub/parent.c | 11 +++-------- + fs/xfs/scrub/parent_repair.c | 23 ++++++----------------- + fs/xfs/scrub/quotacheck.c | 13 +++---------- + fs/xfs/scrub/refcount_repair.c | 13 ++----------- + fs/xfs/scrub/rmap_repair.c | 5 +---- + fs/xfs/scrub/rtsummary.c | 7 ++----- + 16 files changed, 41 insertions(+), 139 deletions(-) + +--- a/fs/xfs/scrub/agheader_repair.c ++++ b/fs/xfs/scrub/agheader_repair.c +@@ -1720,7 +1720,6 @@ xrep_agi( + { + struct xrep_agi *ragi; + struct xfs_mount *mp = sc->mp; +- char *descr; + unsigned int i; + int error; + +@@ -1754,17 +1753,13 @@ xrep_agi( + xagino_bitmap_init(&ragi->iunlink_bmp); + sc->buf_cleanup = xrep_agi_buf_cleanup; + +- descr = xchk_xfile_ag_descr(sc, "iunlinked next pointers"); +- error = xfarray_create(descr, 0, sizeof(xfs_agino_t), +- &ragi->iunlink_next); +- kfree(descr); ++ error = xfarray_create("iunlinked next pointers", 0, ++ sizeof(xfs_agino_t), &ragi->iunlink_next); + if (error) + return error; + +- descr = xchk_xfile_ag_descr(sc, "iunlinked prev pointers"); +- error = xfarray_create(descr, 0, sizeof(xfs_agino_t), +- &ragi->iunlink_prev); +- kfree(descr); ++ error = xfarray_create("iunlinked prev pointers", 0, ++ sizeof(xfs_agino_t), &ragi->iunlink_prev); + if (error) + return error; + +--- a/fs/xfs/scrub/alloc_repair.c ++++ b/fs/xfs/scrub/alloc_repair.c +@@ -849,7 +849,6 @@ xrep_allocbt( + { + struct xrep_abt *ra; + struct xfs_mount *mp = sc->mp; +- char *descr; + int error; + + /* We require the rmapbt to rebuild anything. */ +@@ -875,11 +874,9 @@ xrep_allocbt( + } + + /* Set up enough storage to handle maximally fragmented free space. */ +- descr = xchk_xfile_ag_descr(sc, "free space records"); +- error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2, ++ error = xfarray_create("free space records", mp->m_sb.sb_agblocks / 2, + sizeof(struct xfs_alloc_rec_incore), + &ra->free_records); +- kfree(descr); + if (error) + goto out_ra; + +--- a/fs/xfs/scrub/attr_repair.c ++++ b/fs/xfs/scrub/attr_repair.c +@@ -1531,7 +1531,6 @@ xrep_xattr_setup_scan( + struct xrep_xattr **rxp) + { + struct xrep_xattr *rx; +- char *descr; + int max_len; + int error; + +@@ -1557,35 +1556,26 @@ xrep_xattr_setup_scan( + goto out_rx; + + /* Set up some staging for salvaged attribute keys and values */ +- descr = xchk_xfile_ino_descr(sc, "xattr keys"); +- error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key), ++ error = xfarray_create("xattr keys", 0, sizeof(struct xrep_xattr_key), + &rx->xattr_records); +- kfree(descr); + if (error) + goto out_rx; + +- descr = xchk_xfile_ino_descr(sc, "xattr names"); +- error = xfblob_create(descr, &rx->xattr_blobs); +- kfree(descr); ++ error = xfblob_create("xattr names", &rx->xattr_blobs); + if (error) + goto out_keys; + + if (xfs_has_parent(sc->mp)) { + ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); + +- descr = xchk_xfile_ino_descr(sc, +- "xattr retained parent pointer entries"); +- error = xfarray_create(descr, 0, ++ error = xfarray_create("xattr parent pointer entries", 0, + sizeof(struct xrep_xattr_pptr), + &rx->pptr_recs); +- kfree(descr); + if (error) + goto out_values; + +- descr = xchk_xfile_ino_descr(sc, +- "xattr retained parent pointer names"); +- error = xfblob_create(descr, &rx->pptr_names); +- kfree(descr); ++ error = xfblob_create("xattr parent pointer names", ++ &rx->pptr_names); + if (error) + goto out_pprecs; + +--- a/fs/xfs/scrub/bmap_repair.c ++++ b/fs/xfs/scrub/bmap_repair.c +@@ -800,7 +800,6 @@ xrep_bmap( + bool allow_unwritten) + { + struct xrep_bmap *rb; +- char *descr; + xfs_extnum_t max_bmbt_recs; + bool large_extcount; + int error = 0; +@@ -822,11 +821,8 @@ xrep_bmap( + /* Set up enough storage to handle the max records for this fork. */ + large_extcount = xfs_has_large_extent_counts(sc->mp); + max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork); +- descr = xchk_xfile_ino_descr(sc, "%s fork mapping records", +- whichfork == XFS_DATA_FORK ? "data" : "attr"); +- error = xfarray_create(descr, max_bmbt_recs, ++ error = xfarray_create("fork mapping records", max_bmbt_recs, + sizeof(struct xfs_bmbt_rec), &rb->bmap_records); +- kfree(descr); + if (error) + goto out_rb; + +--- a/fs/xfs/scrub/common.h ++++ b/fs/xfs/scrub/common.h +@@ -202,24 +202,6 @@ static inline bool xchk_could_repair(con + int xchk_metadata_inode_forks(struct xfs_scrub *sc); + + /* +- * Helper macros to allocate and format xfile description strings. +- * Callers must kfree the pointer returned. +- */ +-#define xchk_xfile_descr(sc, fmt, ...) \ +- kasprintf(XCHK_GFP_FLAGS, "XFS (%s): " fmt, \ +- (sc)->mp->m_super->s_id, ##__VA_ARGS__) +-#define xchk_xfile_ag_descr(sc, fmt, ...) \ +- kasprintf(XCHK_GFP_FLAGS, "XFS (%s): AG 0x%x " fmt, \ +- (sc)->mp->m_super->s_id, \ +- (sc)->sa.pag ? (sc)->sa.pag->pag_agno : (sc)->sm->sm_agno, \ +- ##__VA_ARGS__) +-#define xchk_xfile_ino_descr(sc, fmt, ...) \ +- kasprintf(XCHK_GFP_FLAGS, "XFS (%s): inode 0x%llx " fmt, \ +- (sc)->mp->m_super->s_id, \ +- (sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \ +- ##__VA_ARGS__) +- +-/* + * Setting up a hook to wait for intents to drain is costly -- we have to take + * the CPU hotplug lock and force an i-cache flush on all CPUs once to set it + * up, and again to tear it down. These costs add up quickly, so we only want +--- a/fs/xfs/scrub/dir.c ++++ b/fs/xfs/scrub/dir.c +@@ -1094,22 +1094,17 @@ xchk_directory( + sd->xname.name = sd->namebuf; + + if (xfs_has_parent(sc->mp)) { +- char *descr; +- + /* + * Set up some staging memory for dirents that we can't check + * due to locking contention. + */ +- descr = xchk_xfile_ino_descr(sc, "slow directory entries"); +- error = xfarray_create(descr, 0, sizeof(struct xchk_dirent), +- &sd->dir_entries); +- kfree(descr); ++ error = xfarray_create("slow directory entries", 0, ++ sizeof(struct xchk_dirent), &sd->dir_entries); + if (error) + goto out_sd; + +- descr = xchk_xfile_ino_descr(sc, "slow directory entry names"); +- error = xfblob_create(descr, &sd->dir_names); +- kfree(descr); ++ error = xfblob_create("slow directory entry names", ++ &sd->dir_names); + if (error) + goto out_entries; + } +--- a/fs/xfs/scrub/dir_repair.c ++++ b/fs/xfs/scrub/dir_repair.c +@@ -1782,20 +1782,15 @@ xrep_dir_setup_scan( + struct xrep_dir *rd) + { + struct xfs_scrub *sc = rd->sc; +- char *descr; + int error; + + /* Set up some staging memory for salvaging dirents. */ +- descr = xchk_xfile_ino_descr(sc, "directory entries"); +- error = xfarray_create(descr, 0, sizeof(struct xrep_dirent), +- &rd->dir_entries); +- kfree(descr); ++ error = xfarray_create("directory entries", 0, ++ sizeof(struct xrep_dirent), &rd->dir_entries); + if (error) + return error; + +- descr = xchk_xfile_ino_descr(sc, "directory entry names"); +- error = xfblob_create(descr, &rd->dir_names); +- kfree(descr); ++ error = xfblob_create("directory entry names", &rd->dir_names); + if (error) + goto out_xfarray; + +--- a/fs/xfs/scrub/dirtree.c ++++ b/fs/xfs/scrub/dirtree.c +@@ -96,7 +96,6 @@ xchk_setup_dirtree( + struct xfs_scrub *sc) + { + struct xchk_dirtree *dl; +- char *descr; + int error; + + xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); +@@ -120,16 +119,12 @@ xchk_setup_dirtree( + + mutex_init(&dl->lock); + +- descr = xchk_xfile_ino_descr(sc, "dirtree path steps"); +- error = xfarray_create(descr, 0, sizeof(struct xchk_dirpath_step), +- &dl->path_steps); +- kfree(descr); ++ error = xfarray_create("dirtree path steps", 0, ++ sizeof(struct xchk_dirpath_step), &dl->path_steps); + if (error) + goto out_dl; + +- descr = xchk_xfile_ino_descr(sc, "dirtree path names"); +- error = xfblob_create(descr, &dl->path_names); +- kfree(descr); ++ error = xfblob_create("dirtree path names", &dl->path_names); + if (error) + goto out_steps; + +--- a/fs/xfs/scrub/ialloc_repair.c ++++ b/fs/xfs/scrub/ialloc_repair.c +@@ -804,7 +804,6 @@ xrep_iallocbt( + { + struct xrep_ibt *ri; + struct xfs_mount *mp = sc->mp; +- char *descr; + xfs_agino_t first_agino, last_agino; + int error = 0; + +@@ -823,11 +822,9 @@ xrep_iallocbt( + /* Set up enough storage to handle an AG with nothing but inodes. */ + xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino); + last_agino /= XFS_INODES_PER_CHUNK; +- descr = xchk_xfile_ag_descr(sc, "inode index records"); +- error = xfarray_create(descr, last_agino, ++ error = xfarray_create("inode index records", last_agino, + sizeof(struct xfs_inobt_rec_incore), + &ri->inode_records); +- kfree(descr); + if (error) + goto out_ri; + +--- a/fs/xfs/scrub/nlinks.c ++++ b/fs/xfs/scrub/nlinks.c +@@ -995,7 +995,6 @@ xchk_nlinks_setup_scan( + struct xchk_nlink_ctrs *xnc) + { + struct xfs_mount *mp = sc->mp; +- char *descr; + unsigned long long max_inos; + xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; + xfs_agino_t first_agino, last_agino; +@@ -1012,10 +1011,9 @@ xchk_nlinks_setup_scan( + */ + xfs_agino_range(mp, last_agno, &first_agino, &last_agino); + max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; +- descr = xchk_xfile_descr(sc, "file link counts"); +- error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), ++ error = xfarray_create("file link counts", ++ min(XFS_MAXINUMBER + 1, max_inos), + sizeof(struct xchk_nlink), &xnc->nlinks); +- kfree(descr); + if (error) + goto out_teardown; + +--- a/fs/xfs/scrub/parent.c ++++ b/fs/xfs/scrub/parent.c +@@ -733,7 +733,6 @@ xchk_parent_pptr( + struct xfs_scrub *sc) + { + struct xchk_pptrs *pp; +- char *descr; + int error; + + pp = kvzalloc(sizeof(struct xchk_pptrs), XCHK_GFP_FLAGS); +@@ -746,16 +745,12 @@ xchk_parent_pptr( + * Set up some staging memory for parent pointers that we can't check + * due to locking contention. + */ +- descr = xchk_xfile_ino_descr(sc, "slow parent pointer entries"); +- error = xfarray_create(descr, 0, sizeof(struct xchk_pptr), +- &pp->pptr_entries); +- kfree(descr); ++ error = xfarray_create("slow parent pointer entries", 0, ++ sizeof(struct xchk_pptr), &pp->pptr_entries); + if (error) + goto out_pp; + +- descr = xchk_xfile_ino_descr(sc, "slow parent pointer names"); +- error = xfblob_create(descr, &pp->pptr_names); +- kfree(descr); ++ error = xfblob_create("slow parent pointer names", &pp->pptr_names); + if (error) + goto out_entries; + +--- a/fs/xfs/scrub/parent_repair.c ++++ b/fs/xfs/scrub/parent_repair.c +@@ -1476,7 +1476,6 @@ xrep_parent_setup_scan( + struct xrep_parent *rp) + { + struct xfs_scrub *sc = rp->sc; +- char *descr; + struct xfs_da_geometry *geo = sc->mp->m_attr_geo; + int max_len; + int error; +@@ -1504,32 +1503,22 @@ xrep_parent_setup_scan( + goto out_xattr_name; + + /* Set up some staging memory for logging parent pointer updates. */ +- descr = xchk_xfile_ino_descr(sc, "parent pointer entries"); +- error = xfarray_create(descr, 0, sizeof(struct xrep_pptr), +- &rp->pptr_recs); +- kfree(descr); ++ error = xfarray_create("parent pointer entries", 0, ++ sizeof(struct xrep_pptr), &rp->pptr_recs); + if (error) + goto out_xattr_value; + +- descr = xchk_xfile_ino_descr(sc, "parent pointer names"); +- error = xfblob_create(descr, &rp->pptr_names); +- kfree(descr); ++ error = xfblob_create("parent pointer names", &rp->pptr_names); + if (error) + goto out_recs; + + /* Set up some storage for copying attrs before the mapping exchange */ +- descr = xchk_xfile_ino_descr(sc, +- "parent pointer retained xattr entries"); +- error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr), +- &rp->xattr_records); +- kfree(descr); ++ error = xfarray_create("parent pointer xattr entries", 0, ++ sizeof(struct xrep_parent_xattr), &rp->xattr_records); + if (error) + goto out_names; + +- descr = xchk_xfile_ino_descr(sc, +- "parent pointer retained xattr values"); +- error = xfblob_create(descr, &rp->xattr_blobs); +- kfree(descr); ++ error = xfblob_create("parent pointer xattr values", &rp->xattr_blobs); + if (error) + goto out_attr_keys; + +--- a/fs/xfs/scrub/quotacheck.c ++++ b/fs/xfs/scrub/quotacheck.c +@@ -741,7 +741,6 @@ xqcheck_setup_scan( + struct xfs_scrub *sc, + struct xqcheck *xqc) + { +- char *descr; + struct xfs_quotainfo *qi = sc->mp->m_quotainfo; + unsigned long long max_dquots = XFS_DQ_ID_MAX + 1ULL; + int error; +@@ -756,28 +755,22 @@ xqcheck_setup_scan( + + error = -ENOMEM; + if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) { +- descr = xchk_xfile_descr(sc, "user dquot records"); +- error = xfarray_create(descr, max_dquots, ++ error = xfarray_create("user dquot records", max_dquots, + sizeof(struct xqcheck_dquot), &xqc->ucounts); +- kfree(descr); + if (error) + goto out_teardown; + } + + if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) { +- descr = xchk_xfile_descr(sc, "group dquot records"); +- error = xfarray_create(descr, max_dquots, ++ error = xfarray_create("group dquot records", max_dquots, + sizeof(struct xqcheck_dquot), &xqc->gcounts); +- kfree(descr); + if (error) + goto out_teardown; + } + + if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) { +- descr = xchk_xfile_descr(sc, "project dquot records"); +- error = xfarray_create(descr, max_dquots, ++ error = xfarray_create("project dquot records", max_dquots, + sizeof(struct xqcheck_dquot), &xqc->pcounts); +- kfree(descr); + if (error) + goto out_teardown; + } +--- a/fs/xfs/scrub/refcount_repair.c ++++ b/fs/xfs/scrub/refcount_repair.c +@@ -123,13 +123,7 @@ int + xrep_setup_ag_refcountbt( + struct xfs_scrub *sc) + { +- char *descr; +- int error; +- +- descr = xchk_xfile_ag_descr(sc, "rmap record bag"); +- error = xrep_setup_xfbtree(sc, descr); +- kfree(descr); +- return error; ++ return xrep_setup_xfbtree(sc, "rmap record bag"); + } + + /* Check for any obvious conflicts with this shared/CoW staging extent. */ +@@ -705,7 +699,6 @@ xrep_refcountbt( + { + struct xrep_refc *rr; + struct xfs_mount *mp = sc->mp; +- char *descr; + int error; + + /* We require the rmapbt to rebuild anything. */ +@@ -718,11 +711,9 @@ xrep_refcountbt( + rr->sc = sc; + + /* Set up enough storage to handle one refcount record per block. */ +- descr = xchk_xfile_ag_descr(sc, "reference count records"); +- error = xfarray_create(descr, mp->m_sb.sb_agblocks, ++ error = xfarray_create("reference count records", mp->m_sb.sb_agblocks, + sizeof(struct xfs_refcount_irec), + &rr->refcount_records); +- kfree(descr); + if (error) + goto out_rr; + +--- a/fs/xfs/scrub/rmap_repair.c ++++ b/fs/xfs/scrub/rmap_repair.c +@@ -161,14 +161,11 @@ xrep_setup_ag_rmapbt( + struct xfs_scrub *sc) + { + struct xrep_rmap *rr; +- char *descr; + int error; + + xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP); + +- descr = xchk_xfile_ag_descr(sc, "reverse mapping records"); +- error = xrep_setup_xfbtree(sc, descr); +- kfree(descr); ++ error = xrep_setup_xfbtree(sc, "reverse mapping records"); + if (error) + return error; + +--- a/fs/xfs/scrub/rtsummary.c ++++ b/fs/xfs/scrub/rtsummary.c +@@ -42,7 +42,6 @@ xchk_setup_rtsummary( + struct xfs_scrub *sc) + { + struct xfs_mount *mp = sc->mp; +- char *descr; + struct xchk_rtsummary *rts; + int error; + +@@ -62,10 +61,8 @@ xchk_setup_rtsummary( + * Create an xfile to construct a new rtsummary file. The xfile allows + * us to avoid pinning kernel memory for this purpose. + */ +- descr = xchk_xfile_descr(sc, "realtime summary file"); +- error = xfile_create(descr, XFS_FSB_TO_B(mp, mp->m_rsumblocks), +- &sc->xfile); +- kfree(descr); ++ error = xfile_create("realtime summary file", ++ XFS_FSB_TO_B(mp, mp->m_rsumblocks), &sc->xfile); + if (error) + return error; +