From: Greg Kroah-Hartman Date: Mon, 29 Dec 2025 13:07:43 +0000 (+0100) Subject: 6.6-stable patches X-Git-Tag: v6.18.3~36 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f38e3e988fd16e8af5785684c383c6c2b0775847;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch ext4-clear-i_state_flags-when-alloc-inode.patch ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch jbd2-use-a-weaker-annotation-in-journal-handling.patch media-v4l2-mem2mem-fix-outdated-documentation.patch mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch mptcp-schedule-rtx-timer-only-after-pushing-data.patch --- diff --git a/queue-6.6/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch b/queue-6.6/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch new file mode 100644 index 0000000000..28cf41b8a8 --- /dev/null +++ b/queue-6.6/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch @@ -0,0 +1,59 @@ +From 7c11c56eb32eae96893eebafdbe3decadefe88ad Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Thu, 20 Nov 2025 21:42:33 +0800 +Subject: ext4: align max orphan file size with e2fsprogs limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Baokun Li + +commit 7c11c56eb32eae96893eebafdbe3decadefe88ad upstream. + +Kernel commit 0a6ce20c1564 ("ext4: verify orphan file size is not too big") +limits the maximum supported orphan file size to 8 << 20. + +However, in e2fsprogs, the orphan file size is set to 32–512 filesystem +blocks when creating a filesystem. + +With 64k block size, formatting an ext4 fs >32G gives an orphan file bigger +than the kernel allows, so mount prints an error and fails: + + EXT4-fs (vdb): orphan file too big: 8650752 + EXT4-fs (vdb): mount failed + +To prevent this issue and allow previously created 64KB filesystems to +mount, we updates the maximum allowed orphan file size in the kernel to +512 filesystem blocks. + +Fixes: 0a6ce20c1564 ("ext4: verify orphan file size is not too big") +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Message-ID: <20251120134233.2994147-1-libaokun@huaweicloud.com> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/orphan.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/ext4/orphan.c ++++ b/fs/ext4/orphan.c +@@ -8,6 +8,8 @@ + #include "ext4.h" + #include "ext4_jbd2.h" + ++#define EXT4_MAX_ORPHAN_FILE_BLOCKS 512 ++ + static int ext4_orphan_file_add(handle_t *handle, struct inode *inode) + { + int i, j, start; +@@ -589,7 +591,7 @@ int ext4_init_orphan_info(struct super_b + * consuming absurd amounts of memory when pinning blocks of orphan + * file in memory. + */ +- if (inode->i_size > 8 << 20) { ++ if (inode->i_size > (EXT4_MAX_ORPHAN_FILE_BLOCKS << inode->i_blkbits)) { + ext4_msg(sb, KERN_ERR, "orphan file too big: %llu", + (unsigned long long)inode->i_size); + ret = -EFSCORRUPTED; diff --git a/queue-6.6/ext4-clear-i_state_flags-when-alloc-inode.patch b/queue-6.6/ext4-clear-i_state_flags-when-alloc-inode.patch new file mode 100644 index 0000000000..350965fb33 --- /dev/null +++ b/queue-6.6/ext4-clear-i_state_flags-when-alloc-inode.patch @@ -0,0 +1,59 @@ +From 4091c8206cfd2e3bb529ef260887296b90d9b6a2 Mon Sep 17 00:00:00 2001 +From: Haibo Chen +Date: Tue, 4 Nov 2025 16:12:24 +0800 +Subject: ext4: clear i_state_flags when alloc inode + +From: Haibo Chen + +commit 4091c8206cfd2e3bb529ef260887296b90d9b6a2 upstream. + +i_state_flags used on 32-bit archs, need to clear this flag when +alloc inode. +Find this issue when umount ext4, sometimes track the inode as orphan +accidently, cause ext4 mesg dump. + +Fixes: acf943e9768e ("ext4: fix checks for orphan inodes") +Signed-off-by: Haibo Chen +Reviewed-by: Baokun Li +Reviewed-by: Zhang Yi +Reviewed-by: Jan Kara +Message-ID: <20251104-ext4-v1-1-73691a0800f9@nxp.com> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ialloc.c | 1 - + fs/ext4/inode.c | 1 - + fs/ext4/super.c | 1 + + 3 files changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -1299,7 +1299,6 @@ got: + sizeof(gen)); + } + +- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ + ext4_set_inode_state(inode, EXT4_STATE_NEW); + + ei->i_extra_isize = sbi->s_want_extra_isize; +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4913,7 +4913,6 @@ struct inode *__ext4_iget(struct super_b + ei->i_projid = make_kprojid(&init_user_ns, i_projid); + set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); + +- ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ + ei->i_inline_off = 0; + ei->i_dir_start_lookup = 0; + ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1412,6 +1412,7 @@ static struct inode *ext4_alloc_inode(st + + inode_set_iversion(&ei->vfs_inode, 1); + ei->i_flags = 0; ++ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ + spin_lock_init(&ei->i_raw_lock); + ei->i_prealloc_node = RB_ROOT; + atomic_set(&ei->i_prealloc_active, 0); diff --git a/queue-6.6/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch b/queue-6.6/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch new file mode 100644 index 0000000000..f340ffea08 --- /dev/null +++ b/queue-6.6/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch @@ -0,0 +1,63 @@ +From 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 Mon Sep 17 00:00:00 2001 +From: Yongjian Sun +Date: Thu, 6 Nov 2025 14:06:13 +0800 +Subject: ext4: fix incorrect group number assertion in mb_check_buddy + +From: Yongjian Sun + +commit 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 upstream. + +When the MB_CHECK_ASSERT macro is enabled, an assertion failure can +occur in __mb_check_buddy when checking preallocated blocks (pa) in +a block group: + +Assertion failure in mb_free_blocks() : "groupnr == e4b->bd_group" + +This happens when a pa at the very end of a block group (e.g., +pa_pstart=32765, pa_len=3 in a group of 32768 blocks) becomes +exhausted - its pa_pstart is advanced by pa_len to 32768, which +lies in the next block group. If this exhausted pa (with pa_len == 0) +is still in the bb_prealloc_list during the buddy check, the assertion +incorrectly flags it as belonging to the wrong group. A possible +sequence is as follows: + +ext4_mb_new_blocks + ext4_mb_release_context + pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len) + pa->pa_len -= ac->ac_b_ex.fe_len + + __mb_check_buddy + for each pa in group + ext4_get_group_no_and_offset + MB_CHECK_ASSERT(groupnr == e4b->bd_group) + +To fix this, we modify the check to skip block group validation for +exhausted preallocations (where pa_len == 0). Such entries are in a +transitional state and will be removed from the list soon, so they +should not trigger an assertion. This change prevents the false +positive while maintaining the integrity of the checks for active +allocations. + +Fixes: c9de560ded61f ("ext4: Add multi block allocator for ext4") +Signed-off-by: Yongjian Sun +Reviewed-by: Baokun Li +Reviewed-by: Jan Kara +Message-ID: <20251106060614.631382-2-sunyongjian@huaweicloud.com> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -777,6 +777,8 @@ static void __mb_check_buddy(struct ext4 + ext4_group_t groupnr; + struct ext4_prealloc_space *pa; + pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); ++ if (!pa->pa_len) ++ continue; + ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k); + MB_CHECK_ASSERT(groupnr == e4b->bd_group); + for (i = 0; i < pa->pa_len; i++) diff --git a/queue-6.6/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch b/queue-6.6/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch new file mode 100644 index 0000000000..2323a5bdcb --- /dev/null +++ b/queue-6.6/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch @@ -0,0 +1,43 @@ +From b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 Mon Sep 17 00:00:00 2001 +From: Karina Yankevich +Date: Wed, 22 Oct 2025 12:32:53 +0300 +Subject: ext4: xattr: fix null pointer deref in ext4_raw_inode() + +From: Karina Yankevich + +commit b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 upstream. + +If ext4_get_inode_loc() fails (e.g. if it returns -EFSCORRUPTED), +iloc.bh will remain set to NULL. Since ext4_xattr_inode_dec_ref_all() +lacks error checking, this will lead to a null pointer dereference +in ext4_raw_inode(), called right after ext4_get_inode_loc(). + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: c8e008b60492 ("ext4: ignore xattrs past end") +Cc: stable@kernel.org +Signed-off-by: Karina Yankevich +Reviewed-by: Sergey Shtylyov +Reviewed-by: Baokun Li +Message-ID: <20251022093253.3546296-1-k.yankevich@omp.ru> +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/xattr.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1191,7 +1191,11 @@ ext4_xattr_inode_dec_ref_all(handle_t *h + if (block_csum) + end = (void *)bh->b_data + bh->b_size; + else { +- ext4_get_inode_loc(parent, &iloc); ++ err = ext4_get_inode_loc(parent, &iloc); ++ if (err) { ++ EXT4_ERROR_INODE(parent, "parent inode loc (error %d)", err); ++ return; ++ } + end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size; + } + diff --git a/queue-6.6/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch b/queue-6.6/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch new file mode 100644 index 0000000000..7c4cb05c97 --- /dev/null +++ b/queue-6.6/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch @@ -0,0 +1,85 @@ +From 524c3853831cf4f7e1db579e487c757c3065165c Mon Sep 17 00:00:00 2001 +From: Tetsuo Handa +Date: Wed, 22 Oct 2025 20:11:37 +0900 +Subject: jbd2: use a per-journal lock_class_key for jbd2_trans_commit_key + +From: Tetsuo Handa + +commit 524c3853831cf4f7e1db579e487c757c3065165c upstream. + +syzbot is reporting possibility of deadlock due to sharing lock_class_key +for jbd2_handle across ext4 and ocfs2. But this is a false positive, for +one disk partition can't have two filesystems at the same time. + +Reported-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=6e493c165d26d6fcbf72 +Signed-off-by: Tetsuo Handa +Tested-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com +Reviewed-by: Jan Kara +Message-ID: <987110fc-5470-457a-a218-d286a09dd82f@I-love.SAKURA.ne.jp> +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/journal.c | 6 ++++-- + include/linux/jbd2.h | 6 ++++++ + 2 files changed, 10 insertions(+), 2 deletions(-) + +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1563,7 +1563,6 @@ static journal_t *journal_init_common(st + struct block_device *fs_dev, + unsigned long long start, int len, int blocksize) + { +- static struct lock_class_key jbd2_trans_commit_key; + journal_t *journal; + int err; + int n; +@@ -1572,6 +1571,7 @@ static journal_t *journal_init_common(st + if (!journal) + return ERR_PTR(-ENOMEM); + ++ lockdep_register_key(&journal->jbd2_trans_commit_key); + journal->j_blocksize = blocksize; + journal->j_dev = bdev; + journal->j_fs_dev = fs_dev; +@@ -1601,7 +1601,7 @@ static journal_t *journal_init_common(st + journal->j_max_batch_time = 15000; /* 15ms */ + atomic_set(&journal->j_reserved_credits, 0); + lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", +- &jbd2_trans_commit_key, 0); ++ &journal->jbd2_trans_commit_key, 0); + + /* The journal is marked for error until we succeed with recovery! */ + journal->j_flags = JBD2_ABORT; +@@ -1648,6 +1648,7 @@ err_cleanup: + kfree(journal->j_wbuf); + jbd2_journal_destroy_revoke(journal); + journal_fail_superblock(journal); ++ lockdep_unregister_key(&journal->jbd2_trans_commit_key); + kfree(journal); + return ERR_PTR(err); + } +@@ -2229,6 +2230,7 @@ int jbd2_journal_destroy(journal_t *jour + crypto_free_shash(journal->j_chksum_driver); + kfree(journal->j_fc_wbuf); + kfree(journal->j_wbuf); ++ lockdep_unregister_key(&journal->jbd2_trans_commit_key); + kfree(journal); + + return err; +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1266,6 +1266,12 @@ struct journal_s + */ + struct lockdep_map j_trans_commit_map; + #endif ++ /** ++ * @jbd2_trans_commit_key: ++ * ++ * "struct lock_class_key" for @j_trans_commit_map ++ */ ++ struct lock_class_key jbd2_trans_commit_key; + + /** + * @j_fc_cleanup_callback: diff --git a/queue-6.6/jbd2-use-a-weaker-annotation-in-journal-handling.patch b/queue-6.6/jbd2-use-a-weaker-annotation-in-journal-handling.patch new file mode 100644 index 0000000000..3cf2a07c64 --- /dev/null +++ b/queue-6.6/jbd2-use-a-weaker-annotation-in-journal-handling.patch @@ -0,0 +1,49 @@ +From 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 Mon Sep 17 00:00:00 2001 +From: Byungchul Park +Date: Fri, 24 Oct 2025 16:39:40 +0900 +Subject: jbd2: use a weaker annotation in journal handling + +From: Byungchul Park + +commit 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 upstream. + +jbd2 journal handling code doesn't want jbd2_might_wait_for_commit() +to be placed between start_this_handle() and stop_this_handle(). So it +marks the region with rwsem_acquire_read() and rwsem_release(). + +However, the annotation is too strong for that purpose. We don't have +to use more than try lock annotation for that. + +rwsem_acquire_read() implies: + + 1. might be a waiter on contention of the lock. + 2. enter to the critical section of the lock. + +All we need in here is to act 2, not 1. So trylock version of +annotation is sufficient for that purpose. Now that dept partially +relies on lockdep annotaions, dept interpets rwsem_acquire_read() as a +potential wait and might report a deadlock by the wait. + +Replace it with trylock version of annotation. + +Signed-off-by: Byungchul Park +Reviewed-by: Jan Kara +Cc: stable@kernel.org +Message-ID: <20251024073940.1063-1-byungchul@sk.com> +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/transaction.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -445,7 +445,7 @@ repeat: + read_unlock(&journal->j_state_lock); + current->journal_info = handle; + +- rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_); ++ rwsem_acquire_read(&journal->j_trans_commit_map, 0, 1, _THIS_IP_); + jbd2_journal_free_transaction(new_transaction); + /* + * Ensure that no allocations done while the transaction is open are diff --git a/queue-6.6/media-v4l2-mem2mem-fix-outdated-documentation.patch b/queue-6.6/media-v4l2-mem2mem-fix-outdated-documentation.patch new file mode 100644 index 0000000000..3ccd1e021a --- /dev/null +++ b/queue-6.6/media-v4l2-mem2mem-fix-outdated-documentation.patch @@ -0,0 +1,37 @@ +From 082b86919b7a94de01d849021b4da820a6cb89dc Mon Sep 17 00:00:00 2001 +From: Laurent Pinchart +Date: Wed, 8 Oct 2025 12:55:18 +0300 +Subject: media: v4l2-mem2mem: Fix outdated documentation + +From: Laurent Pinchart + +commit 082b86919b7a94de01d849021b4da820a6cb89dc upstream. + +Commit cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in +v4l2_m2m_job_finish") deferred calls to .device_run() to a work queue to +avoid recursive calls when a job is finished right away from +.device_run(). It failed to update the v4l2_m2m_job_finish() +documentation that still states the function must not be called from +.device_run(). Fix it. + +Fixes: cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in v4l2_m2m_job_finish") +Cc: stable@vger.kernel.org +Signed-off-by: Laurent Pinchart +Signed-off-by: Hans Verkuil +Signed-off-by: Greg Kroah-Hartman +--- + include/media/v4l2-mem2mem.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/include/media/v4l2-mem2mem.h ++++ b/include/media/v4l2-mem2mem.h +@@ -185,8 +185,7 @@ void v4l2_m2m_try_schedule(struct v4l2_m + * other instances to take control of the device. + * + * This function has to be called only after &v4l2_m2m_ops->device_run +- * callback has been called on the driver. To prevent recursion, it should +- * not be called directly from the &v4l2_m2m_ops->device_run callback though. ++ * callback has been called on the driver. + */ + void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev, + struct v4l2_m2m_ctx *m2m_ctx); diff --git a/queue-6.6/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch b/queue-6.6/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch new file mode 100644 index 0000000000..350f1d4292 --- /dev/null +++ b/queue-6.6/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch @@ -0,0 +1,110 @@ +From ffb8c27b0539dd90262d1021488e7817fae57c42 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Fri, 5 Dec 2025 19:55:17 +0100 +Subject: mptcp: avoid deadlock on fallback while reinjecting + +From: Paolo Abeni + +commit ffb8c27b0539dd90262d1021488e7817fae57c42 upstream. + +Jakub reported an MPTCP deadlock at fallback time: + + WARNING: possible recursive locking detected + 6.18.0-rc7-virtme #1 Not tainted + -------------------------------------------- + mptcp_connect/20858 is trying to acquire lock: + ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_try_fallback+0xd8/0x280 + + but task is already holding lock: + ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0 + + other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(&msk->fallback_lock); + lock(&msk->fallback_lock); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + + 3 locks held by mptcp_connect/20858: + #0: ff1100001da18290 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x114/0x1bc0 + #1: ff1100001db40fd0 (k-sk_lock-AF_INET#2){+.+.}-{0:0}, at: __mptcp_retrans+0x2cb/0xaa0 + #2: ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0 + + stack backtrace: + CPU: 0 UID: 0 PID: 20858 Comm: mptcp_connect Not tainted 6.18.0-rc7-virtme #1 PREEMPT(full) + Hardware name: Bochs, BIOS Bochs 01/01/2011 + Call Trace: + + dump_stack_lvl+0x6f/0xa0 + print_deadlock_bug.cold+0xc0/0xcd + validate_chain+0x2ff/0x5f0 + __lock_acquire+0x34c/0x740 + lock_acquire.part.0+0xbc/0x260 + _raw_spin_lock_bh+0x38/0x50 + __mptcp_try_fallback+0xd8/0x280 + mptcp_sendmsg_frag+0x16c2/0x3050 + __mptcp_retrans+0x421/0xaa0 + mptcp_release_cb+0x5aa/0xa70 + release_sock+0xab/0x1d0 + mptcp_sendmsg+0xd5b/0x1bc0 + sock_write_iter+0x281/0x4d0 + new_sync_write+0x3c5/0x6f0 + vfs_write+0x65e/0xbb0 + ksys_write+0x17e/0x200 + do_syscall_64+0xbb/0xfd0 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + RIP: 0033:0x7fa5627cbc5e + Code: 4d 89 d8 e8 14 bd 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa + RSP: 002b:00007fff1fe14700 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 + RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fa5627cbc5e + RDX: 0000000000001f9c RSI: 00007fff1fe16984 RDI: 0000000000000005 + RBP: 00007fff1fe14710 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff1fe16920 + R13: 0000000000002000 R14: 0000000000001f9c R15: 0000000000001f9c + +The packet scheduler could attempt a reinjection after receiving an +MP_FAIL and before the infinite map has been transmitted, causing a +deadlock since MPTCP needs to do the reinjection atomically from WRT +fallback. + +Address the issue explicitly avoiding the reinjection in the critical +scenario. Note that this is the only fallback critical section that +could potentially send packets and hit the double-lock. + +Reported-by: Jakub Kicinski +Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/mptcp-dbg/results/412720/1-mptcp-join-sh/stderr +Fixes: f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-4-9e4781a6c1b8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2712,10 +2712,13 @@ static void __mptcp_retrans(struct sock + + /* + * make the whole retrans decision, xmit, disallow +- * fallback atomic ++ * fallback atomic, note that we can't retrans even ++ * when an infinite fallback is in progress, i.e. new ++ * subflows are disallowed. + */ + spin_lock_bh(&msk->fallback_lock); +- if (__mptcp_check_fallback(msk)) { ++ if (__mptcp_check_fallback(msk) || ++ !msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + release_sock(ssk); + goto clear_scheduled; diff --git a/queue-6.6/mptcp-schedule-rtx-timer-only-after-pushing-data.patch b/queue-6.6/mptcp-schedule-rtx-timer-only-after-pushing-data.patch new file mode 100644 index 0000000000..7c0b0538b0 --- /dev/null +++ b/queue-6.6/mptcp-schedule-rtx-timer-only-after-pushing-data.patch @@ -0,0 +1,71 @@ +From 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Fri, 5 Dec 2025 19:55:16 +0100 +Subject: mptcp: schedule rtx timer only after pushing data + +From: Paolo Abeni + +commit 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb upstream. + +The MPTCP protocol usually schedule the retransmission timer only +when there is some chances for such retransmissions to happen. + +With a notable exception: __mptcp_push_pending() currently schedule +such timer unconditionally, potentially leading to unnecessary rtx +timer expiration. + +The issue is present since the blamed commit below but become easily +reproducible after commit 27b0e701d387 ("mptcp: drop bogus optimization +in __mptcp_check_push()") + +Fixes: 33d41c9cd74c ("mptcp: more accurate timeout") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts (NGI0) +Signed-off-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-3-9e4781a6c1b8@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1630,7 +1630,7 @@ void __mptcp_push_pending(struct sock *s + struct mptcp_sendmsg_info info = { + .flags = flags, + }; +- bool do_check_data_fin = false; ++ bool copied = false; + int push_count = 1; + + while (mptcp_send_head(sk) && (push_count > 0)) { +@@ -1672,7 +1672,7 @@ void __mptcp_push_pending(struct sock *s + push_count--; + continue; + } +- do_check_data_fin = true; ++ copied = true; + } + } + } +@@ -1681,11 +1681,14 @@ void __mptcp_push_pending(struct sock *s + if (ssk) + mptcp_push_release(ssk, &info); + +- /* ensure the rtx timer is running */ +- if (!mptcp_rtx_timer_pending(sk)) +- mptcp_reset_rtx_timer(sk); +- if (do_check_data_fin) ++ /* Avoid scheduling the rtx timer if no data has been pushed; the timer ++ * will be updated on positive acks by __mptcp_cleanup_una(). ++ */ ++ if (copied) { ++ if (!mptcp_rtx_timer_pending(sk)) ++ mptcp_reset_rtx_timer(sk); + mptcp_check_send_data_fin(sk); ++ } + } + + static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) diff --git a/queue-6.6/series b/queue-6.6/series index 69d083df3c..803e4162e7 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -419,3 +419,12 @@ floppy-fix-for-page_size-4kb.patch kallsyms-fix-wrong-big-kernel-symbol-type-read-from-procfs.patch fs-ntfs3-fix-mount-failure-for-sparse-runs-in-run_unpack.patch ktest.pl-fix-uninitialized-var-in-config-bisect.pl.patch +ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch +ext4-clear-i_state_flags-when-alloc-inode.patch +ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch +ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch +jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch +jbd2-use-a-weaker-annotation-in-journal-handling.patch +media-v4l2-mem2mem-fix-outdated-documentation.patch +mptcp-schedule-rtx-timer-only-after-pushing-data.patch +mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch