]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Dec 2025 13:07:43 +0000 (14:07 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 29 Dec 2025 13:07:43 +0000 (14:07 +0100)
added patches:
ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch
ext4-clear-i_state_flags-when-alloc-inode.patch
ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch
ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch
jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch
jbd2-use-a-weaker-annotation-in-journal-handling.patch
media-v4l2-mem2mem-fix-outdated-documentation.patch
mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch
mptcp-schedule-rtx-timer-only-after-pushing-data.patch

queue-6.6/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch [new file with mode: 0644]
queue-6.6/ext4-clear-i_state_flags-when-alloc-inode.patch [new file with mode: 0644]
queue-6.6/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch [new file with mode: 0644]
queue-6.6/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch [new file with mode: 0644]
queue-6.6/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch [new file with mode: 0644]
queue-6.6/jbd2-use-a-weaker-annotation-in-journal-handling.patch [new file with mode: 0644]
queue-6.6/media-v4l2-mem2mem-fix-outdated-documentation.patch [new file with mode: 0644]
queue-6.6/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch [new file with mode: 0644]
queue-6.6/mptcp-schedule-rtx-timer-only-after-pushing-data.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch b/queue-6.6/ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch
new file mode 100644 (file)
index 0000000..28cf41b
--- /dev/null
@@ -0,0 +1,59 @@
+From 7c11c56eb32eae96893eebafdbe3decadefe88ad Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Thu, 20 Nov 2025 21:42:33 +0800
+Subject: ext4: align max orphan file size with e2fsprogs limit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit 7c11c56eb32eae96893eebafdbe3decadefe88ad upstream.
+
+Kernel commit 0a6ce20c1564 ("ext4: verify orphan file size is not too big")
+limits the maximum supported orphan file size to 8 << 20.
+
+However, in e2fsprogs, the orphan file size is set to 32–512 filesystem
+blocks when creating a filesystem.
+
+With 64k block size, formatting an ext4 fs >32G gives an orphan file bigger
+than the kernel allows, so mount prints an error and fails:
+
+    EXT4-fs (vdb): orphan file too big: 8650752
+    EXT4-fs (vdb): mount failed
+
+To prevent this issue and allow previously created 64KB filesystems to
+mount, we updates the maximum allowed orphan file size in the kernel to
+512 filesystem blocks.
+
+Fixes: 0a6ce20c1564 ("ext4: verify orphan file size is not too big")
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251120134233.2994147-1-libaokun@huaweicloud.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/orphan.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/orphan.c
++++ b/fs/ext4/orphan.c
+@@ -8,6 +8,8 @@
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
++#define EXT4_MAX_ORPHAN_FILE_BLOCKS 512
++
+ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
+ {
+       int i, j, start;
+@@ -589,7 +591,7 @@ int ext4_init_orphan_info(struct super_b
+        * consuming absurd amounts of memory when pinning blocks of orphan
+        * file in memory.
+        */
+-      if (inode->i_size > 8 << 20) {
++      if (inode->i_size > (EXT4_MAX_ORPHAN_FILE_BLOCKS << inode->i_blkbits)) {
+               ext4_msg(sb, KERN_ERR, "orphan file too big: %llu",
+                        (unsigned long long)inode->i_size);
+               ret = -EFSCORRUPTED;
diff --git a/queue-6.6/ext4-clear-i_state_flags-when-alloc-inode.patch b/queue-6.6/ext4-clear-i_state_flags-when-alloc-inode.patch
new file mode 100644 (file)
index 0000000..350965f
--- /dev/null
@@ -0,0 +1,59 @@
+From 4091c8206cfd2e3bb529ef260887296b90d9b6a2 Mon Sep 17 00:00:00 2001
+From: Haibo Chen <haibo.chen@nxp.com>
+Date: Tue, 4 Nov 2025 16:12:24 +0800
+Subject: ext4: clear i_state_flags when alloc inode
+
+From: Haibo Chen <haibo.chen@nxp.com>
+
+commit 4091c8206cfd2e3bb529ef260887296b90d9b6a2 upstream.
+
+i_state_flags used on 32-bit archs, need to clear this flag when
+alloc inode.
+Find this issue when umount ext4, sometimes track the inode as orphan
+accidently, cause ext4 mesg dump.
+
+Fixes: acf943e9768e ("ext4: fix checks for orphan inodes")
+Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251104-ext4-v1-1-73691a0800f9@nxp.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/ialloc.c |    1 -
+ fs/ext4/inode.c  |    1 -
+ fs/ext4/super.c  |    1 +
+ 3 files changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -1299,7 +1299,6 @@ got:
+                                             sizeof(gen));
+       }
+-      ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
+       ext4_set_inode_state(inode, EXT4_STATE_NEW);
+       ei->i_extra_isize = sbi->s_want_extra_isize;
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4913,7 +4913,6 @@ struct inode *__ext4_iget(struct super_b
+       ei->i_projid = make_kprojid(&init_user_ns, i_projid);
+       set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
+-      ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
+       ei->i_inline_off = 0;
+       ei->i_dir_start_lookup = 0;
+       ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1412,6 +1412,7 @@ static struct inode *ext4_alloc_inode(st
+       inode_set_iversion(&ei->vfs_inode, 1);
+       ei->i_flags = 0;
++      ext4_clear_state_flags(ei);     /* Only relevant on 32-bit archs */
+       spin_lock_init(&ei->i_raw_lock);
+       ei->i_prealloc_node = RB_ROOT;
+       atomic_set(&ei->i_prealloc_active, 0);
diff --git a/queue-6.6/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch b/queue-6.6/ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch
new file mode 100644 (file)
index 0000000..f340ffe
--- /dev/null
@@ -0,0 +1,63 @@
+From 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 Mon Sep 17 00:00:00 2001
+From: Yongjian Sun <sunyongjian1@huawei.com>
+Date: Thu, 6 Nov 2025 14:06:13 +0800
+Subject: ext4: fix incorrect group number assertion in mb_check_buddy
+
+From: Yongjian Sun <sunyongjian1@huawei.com>
+
+commit 3f7a79d05c692c7cfec70bf104b1b3c3d0ce6247 upstream.
+
+When the MB_CHECK_ASSERT macro is enabled, an assertion failure can
+occur in __mb_check_buddy when checking preallocated blocks (pa) in
+a block group:
+
+Assertion failure in mb_free_blocks() : "groupnr == e4b->bd_group"
+
+This happens when a pa at the very end of a block group (e.g.,
+pa_pstart=32765, pa_len=3 in a group of 32768 blocks) becomes
+exhausted - its pa_pstart is advanced by pa_len to 32768, which
+lies in the next block group. If this exhausted pa (with pa_len == 0)
+is still in the bb_prealloc_list during the buddy check, the assertion
+incorrectly flags it as belonging to the wrong group. A possible
+sequence is as follows:
+
+ext4_mb_new_blocks
+  ext4_mb_release_context
+    pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len)
+    pa->pa_len -= ac->ac_b_ex.fe_len
+
+                        __mb_check_buddy
+                           for each pa in group
+                             ext4_get_group_no_and_offset
+                             MB_CHECK_ASSERT(groupnr == e4b->bd_group)
+
+To fix this, we modify the check to skip block group validation for
+exhausted preallocations (where pa_len == 0). Such entries are in a
+transitional state and will be removed from the list soon, so they
+should not trigger an assertion. This change prevents the false
+positive while maintaining the integrity of the checks for active
+allocations.
+
+Fixes: c9de560ded61f ("ext4: Add multi block allocator for ext4")
+Signed-off-by: Yongjian Sun <sunyongjian1@huawei.com>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <20251106060614.631382-2-sunyongjian@huaweicloud.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -777,6 +777,8 @@ static void __mb_check_buddy(struct ext4
+               ext4_group_t groupnr;
+               struct ext4_prealloc_space *pa;
+               pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
++              if (!pa->pa_len)
++                      continue;
+               ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
+               MB_CHECK_ASSERT(groupnr == e4b->bd_group);
+               for (i = 0; i < pa->pa_len; i++)
diff --git a/queue-6.6/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch b/queue-6.6/ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch
new file mode 100644 (file)
index 0000000..2323a5b
--- /dev/null
@@ -0,0 +1,43 @@
+From b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 Mon Sep 17 00:00:00 2001
+From: Karina Yankevich <k.yankevich@omp.ru>
+Date: Wed, 22 Oct 2025 12:32:53 +0300
+Subject: ext4: xattr: fix null pointer deref in ext4_raw_inode()
+
+From: Karina Yankevich <k.yankevich@omp.ru>
+
+commit b97cb7d6a051aa6ebd57906df0e26e9e36c26d14 upstream.
+
+If ext4_get_inode_loc() fails (e.g. if it returns -EFSCORRUPTED),
+iloc.bh will remain set to NULL. Since ext4_xattr_inode_dec_ref_all()
+lacks error checking, this will lead to a null pointer dereference
+in ext4_raw_inode(), called right after ext4_get_inode_loc().
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: c8e008b60492 ("ext4: ignore xattrs past end")
+Cc: stable@kernel.org
+Signed-off-by: Karina Yankevich <k.yankevich@omp.ru>
+Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Message-ID: <20251022093253.3546296-1-k.yankevich@omp.ru>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/xattr.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1191,7 +1191,11 @@ ext4_xattr_inode_dec_ref_all(handle_t *h
+       if (block_csum)
+               end = (void *)bh->b_data + bh->b_size;
+       else {
+-              ext4_get_inode_loc(parent, &iloc);
++              err = ext4_get_inode_loc(parent, &iloc);
++              if (err) {
++                      EXT4_ERROR_INODE(parent, "parent inode loc (error %d)", err);
++                      return;
++              }
+               end = (void *)ext4_raw_inode(&iloc) + EXT4_SB(parent->i_sb)->s_inode_size;
+       }
diff --git a/queue-6.6/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch b/queue-6.6/jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch
new file mode 100644 (file)
index 0000000..7c4cb05
--- /dev/null
@@ -0,0 +1,85 @@
+From 524c3853831cf4f7e1db579e487c757c3065165c Mon Sep 17 00:00:00 2001
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Date: Wed, 22 Oct 2025 20:11:37 +0900
+Subject: jbd2: use a per-journal lock_class_key for jbd2_trans_commit_key
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+commit 524c3853831cf4f7e1db579e487c757c3065165c upstream.
+
+syzbot is reporting possibility of deadlock due to sharing lock_class_key
+for jbd2_handle across ext4 and ocfs2. But this is a false positive, for
+one disk partition can't have two filesystems at the same time.
+
+Reported-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=6e493c165d26d6fcbf72
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Tested-by: syzbot+6e493c165d26d6fcbf72@syzkaller.appspotmail.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Message-ID: <987110fc-5470-457a-a218-d286a09dd82f@I-love.SAKURA.ne.jp>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/journal.c    |    6 ++++--
+ include/linux/jbd2.h |    6 ++++++
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -1563,7 +1563,6 @@ static journal_t *journal_init_common(st
+                       struct block_device *fs_dev,
+                       unsigned long long start, int len, int blocksize)
+ {
+-      static struct lock_class_key jbd2_trans_commit_key;
+       journal_t *journal;
+       int err;
+       int n;
+@@ -1572,6 +1571,7 @@ static journal_t *journal_init_common(st
+       if (!journal)
+               return ERR_PTR(-ENOMEM);
++      lockdep_register_key(&journal->jbd2_trans_commit_key);
+       journal->j_blocksize = blocksize;
+       journal->j_dev = bdev;
+       journal->j_fs_dev = fs_dev;
+@@ -1601,7 +1601,7 @@ static journal_t *journal_init_common(st
+       journal->j_max_batch_time = 15000; /* 15ms */
+       atomic_set(&journal->j_reserved_credits, 0);
+       lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
+-                       &jbd2_trans_commit_key, 0);
++                       &journal->jbd2_trans_commit_key, 0);
+       /* The journal is marked for error until we succeed with recovery! */
+       journal->j_flags = JBD2_ABORT;
+@@ -1648,6 +1648,7 @@ err_cleanup:
+       kfree(journal->j_wbuf);
+       jbd2_journal_destroy_revoke(journal);
+       journal_fail_superblock(journal);
++      lockdep_unregister_key(&journal->jbd2_trans_commit_key);
+       kfree(journal);
+       return ERR_PTR(err);
+ }
+@@ -2229,6 +2230,7 @@ int jbd2_journal_destroy(journal_t *jour
+               crypto_free_shash(journal->j_chksum_driver);
+       kfree(journal->j_fc_wbuf);
+       kfree(journal->j_wbuf);
++      lockdep_unregister_key(&journal->jbd2_trans_commit_key);
+       kfree(journal);
+       return err;
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -1266,6 +1266,12 @@ struct journal_s
+        */
+       struct lockdep_map      j_trans_commit_map;
+ #endif
++      /**
++       * @jbd2_trans_commit_key:
++       *
++       * "struct lock_class_key" for @j_trans_commit_map
++       */
++      struct lock_class_key   jbd2_trans_commit_key;
+       /**
+        * @j_fc_cleanup_callback:
diff --git a/queue-6.6/jbd2-use-a-weaker-annotation-in-journal-handling.patch b/queue-6.6/jbd2-use-a-weaker-annotation-in-journal-handling.patch
new file mode 100644 (file)
index 0000000..3cf2a07
--- /dev/null
@@ -0,0 +1,49 @@
+From 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 Mon Sep 17 00:00:00 2001
+From: Byungchul Park <byungchul@sk.com>
+Date: Fri, 24 Oct 2025 16:39:40 +0900
+Subject: jbd2: use a weaker annotation in journal handling
+
+From: Byungchul Park <byungchul@sk.com>
+
+commit 40a71b53d5a6d4ea17e4d54b99b2ac03a7f5e783 upstream.
+
+jbd2 journal handling code doesn't want jbd2_might_wait_for_commit()
+to be placed between start_this_handle() and stop_this_handle().  So it
+marks the region with rwsem_acquire_read() and rwsem_release().
+
+However, the annotation is too strong for that purpose.  We don't have
+to use more than try lock annotation for that.
+
+rwsem_acquire_read() implies:
+
+   1. might be a waiter on contention of the lock.
+   2. enter to the critical section of the lock.
+
+All we need in here is to act 2, not 1.  So trylock version of
+annotation is sufficient for that purpose.  Now that dept partially
+relies on lockdep annotaions, dept interpets rwsem_acquire_read() as a
+potential wait and might report a deadlock by the wait.
+
+Replace it with trylock version of annotation.
+
+Signed-off-by: Byungchul Park <byungchul@sk.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: stable@kernel.org
+Message-ID: <20251024073940.1063-1-byungchul@sk.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/transaction.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/jbd2/transaction.c
++++ b/fs/jbd2/transaction.c
+@@ -445,7 +445,7 @@ repeat:
+       read_unlock(&journal->j_state_lock);
+       current->journal_info = handle;
+-      rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
++      rwsem_acquire_read(&journal->j_trans_commit_map, 0, 1, _THIS_IP_);
+       jbd2_journal_free_transaction(new_transaction);
+       /*
+        * Ensure that no allocations done while the transaction is open are
diff --git a/queue-6.6/media-v4l2-mem2mem-fix-outdated-documentation.patch b/queue-6.6/media-v4l2-mem2mem-fix-outdated-documentation.patch
new file mode 100644 (file)
index 0000000..3ccd1e0
--- /dev/null
@@ -0,0 +1,37 @@
+From 082b86919b7a94de01d849021b4da820a6cb89dc Mon Sep 17 00:00:00 2001
+From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Date: Wed, 8 Oct 2025 12:55:18 +0300
+Subject: media: v4l2-mem2mem: Fix outdated documentation
+
+From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+
+commit 082b86919b7a94de01d849021b4da820a6cb89dc upstream.
+
+Commit cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in
+v4l2_m2m_job_finish") deferred calls to .device_run() to a work queue to
+avoid recursive calls when a job is finished right away from
+.device_run(). It failed to update the v4l2_m2m_job_finish()
+documentation that still states the function must not be called from
+.device_run(). Fix it.
+
+Fixes: cbd9463da1b1 ("media: v4l2-mem2mem: Avoid calling .device_run in v4l2_m2m_job_finish")
+Cc: stable@vger.kernel.org
+Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Signed-off-by: Hans Verkuil <hverkuil+cisco@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/media/v4l2-mem2mem.h |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/include/media/v4l2-mem2mem.h
++++ b/include/media/v4l2-mem2mem.h
+@@ -185,8 +185,7 @@ void v4l2_m2m_try_schedule(struct v4l2_m
+  * other instances to take control of the device.
+  *
+  * This function has to be called only after &v4l2_m2m_ops->device_run
+- * callback has been called on the driver. To prevent recursion, it should
+- * not be called directly from the &v4l2_m2m_ops->device_run callback though.
++ * callback has been called on the driver.
+  */
+ void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev,
+                        struct v4l2_m2m_ctx *m2m_ctx);
diff --git a/queue-6.6/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch b/queue-6.6/mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch
new file mode 100644 (file)
index 0000000..350f1d4
--- /dev/null
@@ -0,0 +1,110 @@
+From ffb8c27b0539dd90262d1021488e7817fae57c42 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 5 Dec 2025 19:55:17 +0100
+Subject: mptcp: avoid deadlock on fallback while reinjecting
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit ffb8c27b0539dd90262d1021488e7817fae57c42 upstream.
+
+Jakub reported an MPTCP deadlock at fallback time:
+
+ WARNING: possible recursive locking detected
+ 6.18.0-rc7-virtme #1 Not tainted
+ --------------------------------------------
+ mptcp_connect/20858 is trying to acquire lock:
+ ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_try_fallback+0xd8/0x280
+
+ but task is already holding lock:
+ ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0
+
+ other info that might help us debug this:
+  Possible unsafe locking scenario:
+
+        CPU0
+        ----
+   lock(&msk->fallback_lock);
+   lock(&msk->fallback_lock);
+
+  *** DEADLOCK ***
+
+  May be due to missing lock nesting notation
+
+ 3 locks held by mptcp_connect/20858:
+  #0: ff1100001da18290 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_sendmsg+0x114/0x1bc0
+  #1: ff1100001db40fd0 (k-sk_lock-AF_INET#2){+.+.}-{0:0}, at: __mptcp_retrans+0x2cb/0xaa0
+  #2: ff1100001da18b60 (&msk->fallback_lock){+.-.}-{3:3}, at: __mptcp_retrans+0x352/0xaa0
+
+ stack backtrace:
+ CPU: 0 UID: 0 PID: 20858 Comm: mptcp_connect Not tainted 6.18.0-rc7-virtme #1 PREEMPT(full)
+ Hardware name: Bochs, BIOS Bochs 01/01/2011
+ Call Trace:
+  <TASK>
+  dump_stack_lvl+0x6f/0xa0
+  print_deadlock_bug.cold+0xc0/0xcd
+  validate_chain+0x2ff/0x5f0
+  __lock_acquire+0x34c/0x740
+  lock_acquire.part.0+0xbc/0x260
+  _raw_spin_lock_bh+0x38/0x50
+  __mptcp_try_fallback+0xd8/0x280
+  mptcp_sendmsg_frag+0x16c2/0x3050
+  __mptcp_retrans+0x421/0xaa0
+  mptcp_release_cb+0x5aa/0xa70
+  release_sock+0xab/0x1d0
+  mptcp_sendmsg+0xd5b/0x1bc0
+  sock_write_iter+0x281/0x4d0
+  new_sync_write+0x3c5/0x6f0
+  vfs_write+0x65e/0xbb0
+  ksys_write+0x17e/0x200
+  do_syscall_64+0xbb/0xfd0
+  entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ RIP: 0033:0x7fa5627cbc5e
+ Code: 4d 89 d8 e8 14 bd 00 00 4c 8b 5d f8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 11 c9 c3 0f 1f 80 00 00 00 00 48 8b 45 10 0f 05 <c9> c3 83 e2 39 83 fa 08 75 e7 e8 13 ff ff ff 0f 1f 00 f3 0f 1e fa
+ RSP: 002b:00007fff1fe14700 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
+ RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fa5627cbc5e
+ RDX: 0000000000001f9c RSI: 00007fff1fe16984 RDI: 0000000000000005
+ RBP: 00007fff1fe14710 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000000 R11: 0000000000000202 R12: 00007fff1fe16920
+ R13: 0000000000002000 R14: 0000000000001f9c R15: 0000000000001f9c
+
+The packet scheduler could attempt a reinjection after receiving an
+MP_FAIL and before the infinite map has been transmitted, causing a
+deadlock since MPTCP needs to do the reinjection atomically from WRT
+fallback.
+
+Address the issue explicitly avoiding the reinjection in the critical
+scenario. Note that this is the only fallback critical section that
+could potentially send packets and hit the double-lock.
+
+Reported-by: Jakub Kicinski <kuba@kernel.org>
+Closes: https://netdev-ctrl.bots.linux.dev/logs/vmksft/mptcp-dbg/results/412720/1-mptcp-join-sh/stderr
+Fixes: f8a1d9b18c5e ("mptcp: make fallback action and fallback decision atomic")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-4-9e4781a6c1b8@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2712,10 +2712,13 @@ static void __mptcp_retrans(struct sock
+                       /*
+                        * make the whole retrans decision, xmit, disallow
+-                       * fallback atomic
++                       * fallback atomic, note that we can't retrans even
++                       * when an infinite fallback is in progress, i.e. new
++                       * subflows are disallowed.
+                        */
+                       spin_lock_bh(&msk->fallback_lock);
+-                      if (__mptcp_check_fallback(msk)) {
++                      if (__mptcp_check_fallback(msk) ||
++                          !msk->allow_subflows) {
+                               spin_unlock_bh(&msk->fallback_lock);
+                               release_sock(ssk);
+                               goto clear_scheduled;
diff --git a/queue-6.6/mptcp-schedule-rtx-timer-only-after-pushing-data.patch b/queue-6.6/mptcp-schedule-rtx-timer-only-after-pushing-data.patch
new file mode 100644 (file)
index 0000000..7c0b053
--- /dev/null
@@ -0,0 +1,71 @@
+From 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Fri, 5 Dec 2025 19:55:16 +0100
+Subject: mptcp: schedule rtx timer only after pushing data
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 2ea6190f42d0416a4310e60a7fcb0b49fcbbd4fb upstream.
+
+The MPTCP protocol usually schedule the retransmission timer only
+when there is some chances for such retransmissions to happen.
+
+With a notable exception: __mptcp_push_pending() currently schedule
+such timer unconditionally, potentially leading to unnecessary rtx
+timer expiration.
+
+The issue is present since the blamed commit below but become easily
+reproducible after commit 27b0e701d387 ("mptcp: drop bogus optimization
+in __mptcp_check_push()")
+
+Fixes: 33d41c9cd74c ("mptcp: more accurate timeout")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Link: https://patch.msgid.link/20251205-net-mptcp-misc-fixes-6-19-rc1-v1-3-9e4781a6c1b8@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mptcp/protocol.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1630,7 +1630,7 @@ void __mptcp_push_pending(struct sock *s
+       struct mptcp_sendmsg_info info = {
+                               .flags = flags,
+       };
+-      bool do_check_data_fin = false;
++      bool copied = false;
+       int push_count = 1;
+       while (mptcp_send_head(sk) && (push_count > 0)) {
+@@ -1672,7 +1672,7 @@ void __mptcp_push_pending(struct sock *s
+                                               push_count--;
+                                       continue;
+                               }
+-                              do_check_data_fin = true;
++                              copied = true;
+                       }
+               }
+       }
+@@ -1681,11 +1681,14 @@ void __mptcp_push_pending(struct sock *s
+       if (ssk)
+               mptcp_push_release(ssk, &info);
+-      /* ensure the rtx timer is running */
+-      if (!mptcp_rtx_timer_pending(sk))
+-              mptcp_reset_rtx_timer(sk);
+-      if (do_check_data_fin)
++      /* Avoid scheduling the rtx timer if no data has been pushed; the timer
++       * will be updated on positive acks by __mptcp_cleanup_una().
++       */
++      if (copied) {
++              if (!mptcp_rtx_timer_pending(sk))
++                      mptcp_reset_rtx_timer(sk);
+               mptcp_check_send_data_fin(sk);
++      }
+ }
+ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
index 69d083df3c533fe5f0c02062e1288764bde60e56..803e4162e711afec8972ba8ef58fd2a9093f1df2 100644 (file)
@@ -419,3 +419,12 @@ floppy-fix-for-page_size-4kb.patch
 kallsyms-fix-wrong-big-kernel-symbol-type-read-from-procfs.patch
 fs-ntfs3-fix-mount-failure-for-sparse-runs-in-run_unpack.patch
 ktest.pl-fix-uninitialized-var-in-config-bisect.pl.patch
+ext4-xattr-fix-null-pointer-deref-in-ext4_raw_inode.patch
+ext4-clear-i_state_flags-when-alloc-inode.patch
+ext4-fix-incorrect-group-number-assertion-in-mb_check_buddy.patch
+ext4-align-max-orphan-file-size-with-e2fsprogs-limit.patch
+jbd2-use-a-per-journal-lock_class_key-for-jbd2_trans_commit_key.patch
+jbd2-use-a-weaker-annotation-in-journal-handling.patch
+media-v4l2-mem2mem-fix-outdated-documentation.patch
+mptcp-schedule-rtx-timer-only-after-pushing-data.patch
+mptcp-avoid-deadlock-on-fallback-while-reinjecting.patch