]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 7 Feb 2026 15:43:29 +0000 (16:43 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 7 Feb 2026 15:43:29 +0000 (16:43 +0100)
added patches:
hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_uni2asc.patch
netfilter-nft_set_pipapo-clamp-maximum-map-bucket-size-to-int_max.patch
sched-rt-fix-race-in-push_rt_task.patch
ublk-fix-deadlock-when-reading-partition-table.patch

queue-6.6/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_uni2asc.patch [new file with mode: 0644]
queue-6.6/netfilter-nft_set_pipapo-clamp-maximum-map-bucket-size-to-int_max.patch [new file with mode: 0644]
queue-6.6/sched-rt-fix-race-in-push_rt_task.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/ublk-fix-deadlock-when-reading-partition-table.patch [new file with mode: 0644]

diff --git a/queue-6.6/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_uni2asc.patch b/queue-6.6/hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_uni2asc.patch
new file mode 100644 (file)
index 0000000..e84dc21
--- /dev/null
@@ -0,0 +1,176 @@
+From bea3e1d4467bcf292c8e54f080353d556d355e26 Mon Sep 17 00:00:00 2001
+From: Kang Chen <k.chen@smail.nju.edu.cn>
+Date: Tue, 9 Sep 2025 11:13:16 +0800
+Subject: hfsplus: fix slab-out-of-bounds read in hfsplus_uni2asc()
+
+From: Kang Chen <k.chen@smail.nju.edu.cn>
+
+commit bea3e1d4467bcf292c8e54f080353d556d355e26 upstream.
+
+BUG: KASAN: slab-out-of-bounds in hfsplus_uni2asc+0xa71/0xb90 fs/hfsplus/unicode.c:186
+Read of size 2 at addr ffff8880289ef218 by task syz.6.248/14290
+
+CPU: 0 UID: 0 PID: 14290 Comm: syz.6.248 Not tainted 6.16.4 #1 PREEMPT(full)
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:94 [inline]
+ dump_stack_lvl+0x116/0x1b0 lib/dump_stack.c:120
+ print_address_description mm/kasan/report.c:378 [inline]
+ print_report+0xca/0x5f0 mm/kasan/report.c:482
+ kasan_report+0xca/0x100 mm/kasan/report.c:595
+ hfsplus_uni2asc+0xa71/0xb90 fs/hfsplus/unicode.c:186
+ hfsplus_listxattr+0x5b6/0xbd0 fs/hfsplus/xattr.c:738
+ vfs_listxattr+0xbe/0x140 fs/xattr.c:493
+ listxattr+0xee/0x190 fs/xattr.c:924
+ filename_listxattr fs/xattr.c:958 [inline]
+ path_listxattrat+0x143/0x360 fs/xattr.c:988
+ do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+ do_syscall_64+0xcb/0x4c0 arch/x86/entry/syscall_64.c:94
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+RIP: 0033:0x7fe0e9fae16d
+Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007fe0eae67f98 EFLAGS: 00000246 ORIG_RAX: 00000000000000c3
+RAX: ffffffffffffffda RBX: 00007fe0ea205fa0 RCX: 00007fe0e9fae16d
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000200000000000
+RBP: 00007fe0ea0480f0 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007fe0ea206038 R14: 00007fe0ea205fa0 R15: 00007fe0eae48000
+ </TASK>
+
+Allocated by task 14290:
+ kasan_save_stack+0x24/0x50 mm/kasan/common.c:47
+ kasan_save_track+0x14/0x30 mm/kasan/common.c:68
+ poison_kmalloc_redzone mm/kasan/common.c:377 [inline]
+ __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:394
+ kasan_kmalloc include/linux/kasan.h:260 [inline]
+ __do_kmalloc_node mm/slub.c:4333 [inline]
+ __kmalloc_noprof+0x219/0x540 mm/slub.c:4345
+ kmalloc_noprof include/linux/slab.h:909 [inline]
+ hfsplus_find_init+0x95/0x1f0 fs/hfsplus/bfind.c:21
+ hfsplus_listxattr+0x331/0xbd0 fs/hfsplus/xattr.c:697
+ vfs_listxattr+0xbe/0x140 fs/xattr.c:493
+ listxattr+0xee/0x190 fs/xattr.c:924
+ filename_listxattr fs/xattr.c:958 [inline]
+ path_listxattrat+0x143/0x360 fs/xattr.c:988
+ do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+ do_syscall_64+0xcb/0x4c0 arch/x86/entry/syscall_64.c:94
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+When hfsplus_uni2asc is called from hfsplus_listxattr,
+it actually passes in a struct hfsplus_attr_unistr*.
+The size of the corresponding structure is different from that of hfsplus_unistr,
+so the previous fix (94458781aee6) is insufficient.
+The pointer on the unicode buffer is still going beyond the allocated memory.
+
+This patch introduces two warpper functions hfsplus_uni2asc_xattr_str and
+hfsplus_uni2asc_str to process two unicode buffers,
+struct hfsplus_attr_unistr* and struct hfsplus_unistr* respectively.
+When ustrlen value is bigger than the allocated memory size,
+the ustrlen value is limited to an safe size.
+
+Fixes: 94458781aee6 ("hfsplus: fix slab-out-of-bounds read in hfsplus_uni2asc()")
+Signed-off-by: Kang Chen <k.chen@smail.nju.edu.cn>
+Reviewed-by: Viacheslav Dubeyko <slava@dubeyko.com>
+Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
+Link: https://lore.kernel.org/r/20250909031316.1647094-1-k.chen@smail.nju.edu.cn
+Signed-off-by: Viacheslav Dubeyko <slava@dubeyko.com>
+Signed-off-by: Jianqiang kang <jianqkang@sina.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hfsplus/dir.c        |    2 +-
+ fs/hfsplus/hfsplus_fs.h |    8 ++++++--
+ fs/hfsplus/unicode.c    |   24 +++++++++++++++++++-----
+ fs/hfsplus/xattr.c      |    6 +++---
+ 4 files changed, 29 insertions(+), 11 deletions(-)
+
+--- a/fs/hfsplus/dir.c
++++ b/fs/hfsplus/dir.c
+@@ -204,7 +204,7 @@ static int hfsplus_readdir(struct file *
+                       fd.entrylength);
+               type = be16_to_cpu(entry.type);
+               len = NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN;
+-              err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len);
++              err = hfsplus_uni2asc_str(sb, &fd.key->cat.name, strbuf, &len);
+               if (err)
+                       goto out;
+               if (type == HFSPLUS_FOLDER) {
+--- a/fs/hfsplus/hfsplus_fs.h
++++ b/fs/hfsplus/hfsplus_fs.h
+@@ -518,8 +518,12 @@ int hfsplus_strcasecmp(const struct hfsp
+                      const struct hfsplus_unistr *s2);
+ int hfsplus_strcmp(const struct hfsplus_unistr *s1,
+                  const struct hfsplus_unistr *s2);
+-int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr,
+-                  char *astr, int *len_p);
++int hfsplus_uni2asc_str(struct super_block *sb,
++                      const struct hfsplus_unistr *ustr, char *astr,
++                      int *len_p);
++int hfsplus_uni2asc_xattr_str(struct super_block *sb,
++                            const struct hfsplus_attr_unistr *ustr,
++                            char *astr, int *len_p);
+ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
+                   int max_unistr_len, const char *astr, int len);
+ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str);
+--- a/fs/hfsplus/unicode.c
++++ b/fs/hfsplus/unicode.c
+@@ -143,9 +143,8 @@ static u16 *hfsplus_compose_lookup(u16 *
+       return NULL;
+ }
+-int hfsplus_uni2asc(struct super_block *sb,
+-              const struct hfsplus_unistr *ustr,
+-              char *astr, int *len_p)
++static int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr,
++                  int max_len, char *astr, int *len_p)
+ {
+       const hfsplus_unichr *ip;
+       struct nls_table *nls = HFSPLUS_SB(sb)->nls;
+@@ -158,8 +157,8 @@ int hfsplus_uni2asc(struct super_block *
+       ip = ustr->unicode;
+       ustrlen = be16_to_cpu(ustr->length);
+-      if (ustrlen > HFSPLUS_MAX_STRLEN) {
+-              ustrlen = HFSPLUS_MAX_STRLEN;
++      if (ustrlen > max_len) {
++              ustrlen = max_len;
+               pr_err("invalid length %u has been corrected to %d\n",
+                       be16_to_cpu(ustr->length), ustrlen);
+       }
+@@ -280,6 +279,21 @@ out:
+       return res;
+ }
++inline int hfsplus_uni2asc_str(struct super_block *sb,
++                             const struct hfsplus_unistr *ustr, char *astr,
++                             int *len_p)
++{
++      return hfsplus_uni2asc(sb, ustr, HFSPLUS_MAX_STRLEN, astr, len_p);
++}
++
++inline int hfsplus_uni2asc_xattr_str(struct super_block *sb,
++                                   const struct hfsplus_attr_unistr *ustr,
++                                   char *astr, int *len_p)
++{
++      return hfsplus_uni2asc(sb, (const struct hfsplus_unistr *)ustr,
++                             HFSPLUS_ATTR_MAX_STRLEN, astr, len_p);
++}
++
+ /*
+  * Convert one or more ASCII characters into a single unicode character.
+  * Returns the number of ASCII characters corresponding to the unicode char.
+--- a/fs/hfsplus/xattr.c
++++ b/fs/hfsplus/xattr.c
+@@ -737,9 +737,9 @@ ssize_t hfsplus_listxattr(struct dentry
+                       goto end_listxattr;
+               xattr_name_len = NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN;
+-              if (hfsplus_uni2asc(inode->i_sb,
+-                      (const struct hfsplus_unistr *)&fd.key->attr.key_name,
+-                                      strbuf, &xattr_name_len)) {
++              if (hfsplus_uni2asc_xattr_str(inode->i_sb,
++                                            &fd.key->attr.key_name, strbuf,
++                                            &xattr_name_len)) {
+                       pr_err("unicode conversion failed\n");
+                       res = -EIO;
+                       goto end_listxattr;
diff --git a/queue-6.6/netfilter-nft_set_pipapo-clamp-maximum-map-bucket-size-to-int_max.patch b/queue-6.6/netfilter-nft_set_pipapo-clamp-maximum-map-bucket-size-to-int_max.patch
new file mode 100644 (file)
index 0000000..332e07b
--- /dev/null
@@ -0,0 +1,49 @@
+From b85e3367a5716ed3662a4fe266525190d2af76df Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 22 Apr 2025 21:52:44 +0200
+Subject: netfilter: nft_set_pipapo: clamp maximum map bucket size to INT_MAX
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit b85e3367a5716ed3662a4fe266525190d2af76df upstream.
+
+Otherwise, it is possible to hit WARN_ON_ONCE in __kvmalloc_node_noprof()
+when resizing hashtable because __GFP_NOWARN is unset.
+
+Similar to:
+
+  b541ba7d1f5a ("netfilter: conntrack: clamp maximum hashtable size to INT_MAX")
+
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+[ Keerthana: Handle freeing new_lt ]
+Signed-off-by: Keerthana K <keerthana.kalyanasundaram@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_pipapo.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -667,6 +667,11 @@ static int pipapo_resize(struct nft_pipa
+       }
+ mt:
++      if (rules > (INT_MAX / sizeof(*new_mt))) {
++              kvfree(new_lt);
++              return -ENOMEM;
++      }
++
+       new_mt = kvmalloc(rules * sizeof(*new_mt), GFP_KERNEL);
+       if (!new_mt) {
+               kvfree(new_lt);
+@@ -1359,6 +1364,9 @@ static struct nft_pipapo_match *pipapo_c
+                      src->bsize * sizeof(*dst->lt) *
+                      src->groups * NFT_PIPAPO_BUCKETS(src->bb));
++              if (src->rules > (INT_MAX / sizeof(*src->mt)))
++                      goto out_mt;
++
+               dst->mt = kvmalloc(src->rules * sizeof(*src->mt), GFP_KERNEL);
+               if (!dst->mt)
+                       goto out_mt;
diff --git a/queue-6.6/sched-rt-fix-race-in-push_rt_task.patch b/queue-6.6/sched-rt-fix-race-in-push_rt_task.patch
new file mode 100644 (file)
index 0000000..7b22273
--- /dev/null
@@ -0,0 +1,292 @@
+From 690e47d1403e90b7f2366f03b52ed3304194c793 Mon Sep 17 00:00:00 2001
+From: Harshit Agarwal <harshit@nutanix.com>
+Date: Tue, 25 Feb 2025 18:05:53 +0000
+Subject: sched/rt: Fix race in push_rt_task
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Harshit Agarwal <harshit@nutanix.com>
+
+commit 690e47d1403e90b7f2366f03b52ed3304194c793 upstream.
+
+Overview
+========
+When a CPU chooses to call push_rt_task and picks a task to push to
+another CPU's runqueue then it will call find_lock_lowest_rq method
+which would take a double lock on both CPUs' runqueues. If one of the
+locks aren't readily available, it may lead to dropping the current
+runqueue lock and reacquiring both the locks at once. During this window
+it is possible that the task is already migrated and is running on some
+other CPU. These cases are already handled. However, if the task is
+migrated and has already been executed and another CPU is now trying to
+wake it up (ttwu) such that it is queued again on the runqeue
+(on_rq is 1) and also if the task was run by the same CPU, then the
+current checks will pass even though the task was migrated out and is no
+longer in the pushable tasks list.
+
+Crashes
+=======
+This bug resulted in quite a few flavors of crashes triggering kernel
+panics with various crash signatures such as assert failures, page
+faults, null pointer dereferences, and queue corruption errors all
+coming from scheduler itself.
+
+Some of the crashes:
+-> kernel BUG at kernel/sched/rt.c:1616! BUG_ON(idx >= MAX_RT_PRIO)
+   Call Trace:
+   ? __die_body+0x1a/0x60
+   ? die+0x2a/0x50
+   ? do_trap+0x85/0x100
+   ? pick_next_task_rt+0x6e/0x1d0
+   ? do_error_trap+0x64/0xa0
+   ? pick_next_task_rt+0x6e/0x1d0
+   ? exc_invalid_op+0x4c/0x60
+   ? pick_next_task_rt+0x6e/0x1d0
+   ? asm_exc_invalid_op+0x12/0x20
+   ? pick_next_task_rt+0x6e/0x1d0
+   __schedule+0x5cb/0x790
+   ? update_ts_time_stats+0x55/0x70
+   schedule_idle+0x1e/0x40
+   do_idle+0x15e/0x200
+   cpu_startup_entry+0x19/0x20
+   start_secondary+0x117/0x160
+   secondary_startup_64_no_verify+0xb0/0xbb
+
+-> BUG: kernel NULL pointer dereference, address: 00000000000000c0
+   Call Trace:
+   ? __die_body+0x1a/0x60
+   ? no_context+0x183/0x350
+   ? __warn+0x8a/0xe0
+   ? exc_page_fault+0x3d6/0x520
+   ? asm_exc_page_fault+0x1e/0x30
+   ? pick_next_task_rt+0xb5/0x1d0
+   ? pick_next_task_rt+0x8c/0x1d0
+   __schedule+0x583/0x7e0
+   ? update_ts_time_stats+0x55/0x70
+   schedule_idle+0x1e/0x40
+   do_idle+0x15e/0x200
+   cpu_startup_entry+0x19/0x20
+   start_secondary+0x117/0x160
+   secondary_startup_64_no_verify+0xb0/0xbb
+
+-> BUG: unable to handle page fault for address: ffff9464daea5900
+   kernel BUG at kernel/sched/rt.c:1861! BUG_ON(rq->cpu != task_cpu(p))
+
+-> kernel BUG at kernel/sched/rt.c:1055! BUG_ON(!rq->nr_running)
+   Call Trace:
+   ? __die_body+0x1a/0x60
+   ? die+0x2a/0x50
+   ? do_trap+0x85/0x100
+   ? dequeue_top_rt_rq+0xa2/0xb0
+   ? do_error_trap+0x64/0xa0
+   ? dequeue_top_rt_rq+0xa2/0xb0
+   ? exc_invalid_op+0x4c/0x60
+   ? dequeue_top_rt_rq+0xa2/0xb0
+   ? asm_exc_invalid_op+0x12/0x20
+   ? dequeue_top_rt_rq+0xa2/0xb0
+   dequeue_rt_entity+0x1f/0x70
+   dequeue_task_rt+0x2d/0x70
+   __schedule+0x1a8/0x7e0
+   ? blk_finish_plug+0x25/0x40
+   schedule+0x3c/0xb0
+   futex_wait_queue_me+0xb6/0x120
+   futex_wait+0xd9/0x240
+   do_futex+0x344/0xa90
+   ? get_mm_exe_file+0x30/0x60
+   ? audit_exe_compare+0x58/0x70
+   ? audit_filter_rules.constprop.26+0x65e/0x1220
+   __x64_sys_futex+0x148/0x1f0
+   do_syscall_64+0x30/0x80
+   entry_SYSCALL_64_after_hwframe+0x62/0xc7
+
+-> BUG: unable to handle page fault for address: ffff8cf3608bc2c0
+   Call Trace:
+   ? __die_body+0x1a/0x60
+   ? no_context+0x183/0x350
+   ? spurious_kernel_fault+0x171/0x1c0
+   ? exc_page_fault+0x3b6/0x520
+   ? plist_check_list+0x15/0x40
+   ? plist_check_list+0x2e/0x40
+   ? asm_exc_page_fault+0x1e/0x30
+   ? _cond_resched+0x15/0x30
+   ? futex_wait_queue_me+0xc8/0x120
+   ? futex_wait+0xd9/0x240
+   ? try_to_wake_up+0x1b8/0x490
+   ? futex_wake+0x78/0x160
+   ? do_futex+0xcd/0xa90
+   ? plist_check_list+0x15/0x40
+   ? plist_check_list+0x2e/0x40
+   ? plist_del+0x6a/0xd0
+   ? plist_check_list+0x15/0x40
+   ? plist_check_list+0x2e/0x40
+   ? dequeue_pushable_task+0x20/0x70
+   ? __schedule+0x382/0x7e0
+   ? asm_sysvec_reschedule_ipi+0xa/0x20
+   ? schedule+0x3c/0xb0
+   ? exit_to_user_mode_prepare+0x9e/0x150
+   ? irqentry_exit_to_user_mode+0x5/0x30
+   ? asm_sysvec_reschedule_ipi+0x12/0x20
+
+Above are some of the common examples of the crashes that were observed
+due to this issue.
+
+Details
+=======
+Let's look at the following scenario to understand this race.
+
+1) CPU A enters push_rt_task
+  a) CPU A has chosen next_task = task p.
+  b) CPU A calls find_lock_lowest_rq(Task p, CPU Z’s rq).
+  c) CPU A identifies CPU X as a destination CPU (X < Z).
+  d) CPU A enters double_lock_balance(CPU Z’s rq, CPU X’s rq).
+  e) Since X is lower than Z, CPU A unlocks CPU Z’s rq. Someone else has
+     locked CPU X’s rq, and thus, CPU A must wait.
+
+2) At CPU Z
+  a) Previous task has completed execution and thus, CPU Z enters
+     schedule, locks its own rq after CPU A releases it.
+  b) CPU Z dequeues previous task and begins executing task p.
+  c) CPU Z unlocks its rq.
+  d) Task p yields the CPU (ex. by doing IO or waiting to acquire a
+     lock) which triggers the schedule function on CPU Z.
+  e) CPU Z enters schedule again, locks its own rq, and dequeues task p.
+  f) As part of dequeue, it sets p.on_rq = 0 and unlocks its rq.
+
+3) At CPU B
+  a) CPU B enters try_to_wake_up with input task p.
+  b) Since CPU Z dequeued task p, p.on_rq = 0, and CPU B updates
+     B.state = WAKING.
+  c) CPU B via select_task_rq determines CPU Y as the target CPU.
+
+4) The race
+  a) CPU A acquires CPU X’s lock and relocks CPU Z.
+  b) CPU A reads task p.cpu = Z and incorrectly concludes task p is
+     still on CPU Z.
+  c) CPU A failed to notice task p had been dequeued from CPU Z while
+     CPU A was waiting for locks in double_lock_balance. If CPU A knew
+     that task p had been dequeued, it would return NULL forcing
+     push_rt_task to give up the task p's migration.
+  d) CPU B updates task p.cpu = Y and calls ttwu_queue.
+  e) CPU B locks Ys rq. CPU B enqueues task p onto Y and sets task
+     p.on_rq = 1.
+  f) CPU B unlocks CPU Y, triggering memory synchronization.
+  g) CPU A reads task p.on_rq = 1, cementing its assumption that task p
+     has not migrated.
+  h) CPU A decides to migrate p to CPU X.
+
+This leads to A dequeuing p from Y's queue and various crashes down the
+line.
+
+Solution
+========
+The solution here is fairly simple. After obtaining the lock (at 4a),
+the check is enhanced to make sure that the task is still at the head of
+the pushable tasks list. If not, then it is anyway not suitable for
+being pushed out.
+
+Testing
+=======
+The fix is tested on a cluster of 3 nodes, where the panics due to this
+are hit every couple of days. A fix similar to this was deployed on such
+cluster and was stable for more than 30 days.
+
+Co-developed-by: Jon Kohler <jon@nutanix.com>
+Signed-off-by: Jon Kohler <jon@nutanix.com>
+Co-developed-by: Gauri Patwardhan <gauri.patwardhan@nutanix.com>
+Signed-off-by: Gauri Patwardhan <gauri.patwardhan@nutanix.com>
+Co-developed-by: Rahul Chunduru <rahul.chunduru@nutanix.com>
+Signed-off-by: Rahul Chunduru <rahul.chunduru@nutanix.com>
+Signed-off-by: Harshit Agarwal <harshit@nutanix.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Tested-by: Will Ton <william.ton@nutanix.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20250225180553.167995-1-harshit@nutanix.com
+Signed-off-by: Rajani Kantha <681739313@139.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/sched/rt.c |   52 +++++++++++++++++++++++++---------------------------
+ 1 file changed, 25 insertions(+), 27 deletions(-)
+
+--- a/kernel/sched/rt.c
++++ b/kernel/sched/rt.c
+@@ -1963,6 +1963,26 @@ static int find_lowest_rq(struct task_st
+       return -1;
+ }
++static struct task_struct *pick_next_pushable_task(struct rq *rq)
++{
++      struct task_struct *p;
++
++      if (!has_pushable_tasks(rq))
++              return NULL;
++
++      p = plist_first_entry(&rq->rt.pushable_tasks,
++                            struct task_struct, pushable_tasks);
++
++      BUG_ON(rq->cpu != task_cpu(p));
++      BUG_ON(task_current(rq, p));
++      BUG_ON(p->nr_cpus_allowed <= 1);
++
++      BUG_ON(!task_on_rq_queued(p));
++      BUG_ON(!rt_task(p));
++
++      return p;
++}
++
+ /* Will lock the rq it finds */
+ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
+ {
+@@ -1993,18 +2013,16 @@ static struct rq *find_lock_lowest_rq(st
+                       /*
+                        * We had to unlock the run queue. In
+                        * the mean time, task could have
+-                       * migrated already or had its affinity changed.
+-                       * Also make sure that it wasn't scheduled on its rq.
++                       * migrated already or had its affinity changed,
++                       * therefore check if the task is still at the
++                       * head of the pushable tasks list.
+                        * It is possible the task was scheduled, set
+                        * "migrate_disabled" and then got preempted, so we must
+                        * check the task migration disable flag here too.
+                        */
+-                      if (unlikely(task_rq(task) != rq ||
++                      if (unlikely(is_migration_disabled(task) ||
+                                    !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
+-                                   task_on_cpu(rq, task) ||
+-                                   !rt_task(task) ||
+-                                   is_migration_disabled(task) ||
+-                                   !task_on_rq_queued(task))) {
++                                   task != pick_next_pushable_task(rq))) {
+                               double_unlock_balance(rq, lowest_rq);
+                               lowest_rq = NULL;
+@@ -2024,26 +2042,6 @@ static struct rq *find_lock_lowest_rq(st
+       return lowest_rq;
+ }
+-static struct task_struct *pick_next_pushable_task(struct rq *rq)
+-{
+-      struct task_struct *p;
+-
+-      if (!has_pushable_tasks(rq))
+-              return NULL;
+-
+-      p = plist_first_entry(&rq->rt.pushable_tasks,
+-                            struct task_struct, pushable_tasks);
+-
+-      BUG_ON(rq->cpu != task_cpu(p));
+-      BUG_ON(task_current(rq, p));
+-      BUG_ON(p->nr_cpus_allowed <= 1);
+-
+-      BUG_ON(!task_on_rq_queued(p));
+-      BUG_ON(!rt_task(p));
+-
+-      return p;
+-}
+-
+ /*
+  * If the current CPU has more than one RT task, see if the non
+  * running task can migrate over to a CPU that is running a task
index a5128ece6edcd6eee716b9fc27fa3b941b56fb2a..c6185d1a89ee9cd9715bd9a946291b6344e75db7 100644 (file)
@@ -10,3 +10,7 @@ rbd-check-for-eod-after-exclusive-lock-is-ensured-to-be-held.patch
 arm-9468-1-fix-memset64-on-big-endian.patch
 revert-drm-amd-check-if-aspm-is-enabled-from-pcie-subsystem.patch
 kvm-don-t-clobber-irqfd-routing-type-when-deassigning-irqfd.patch
+netfilter-nft_set_pipapo-clamp-maximum-map-bucket-size-to-int_max.patch
+hfsplus-fix-slab-out-of-bounds-read-in-hfsplus_uni2asc.patch
+ublk-fix-deadlock-when-reading-partition-table.patch
+sched-rt-fix-race-in-push_rt_task.patch
diff --git a/queue-6.6/ublk-fix-deadlock-when-reading-partition-table.patch b/queue-6.6/ublk-fix-deadlock-when-reading-partition-table.patch
new file mode 100644 (file)
index 0000000..b716b95
--- /dev/null
@@ -0,0 +1,107 @@
+From c258f5c4502c9667bccf5d76fa731ab9c96687c1 Mon Sep 17 00:00:00 2001
+From: Ming Lei <ming.lei@redhat.com>
+Date: Fri, 12 Dec 2025 22:34:15 +0800
+Subject: ublk: fix deadlock when reading partition table
+
+From: Ming Lei <ming.lei@redhat.com>
+
+commit c258f5c4502c9667bccf5d76fa731ab9c96687c1 upstream.
+
+When one process(such as udev) opens ublk block device (e.g., to read
+the partition table via bdev_open()), a deadlock[1] can occur:
+
+1. bdev_open() grabs disk->open_mutex
+2. The process issues read I/O to ublk backend to read partition table
+3. In __ublk_complete_rq(), blk_update_request() or blk_mq_end_request()
+   runs bio->bi_end_io() callbacks
+4. If this triggers fput() on file descriptor of ublk block device, the
+   work may be deferred to current task's task work (see fput() implementation)
+5. This eventually calls blkdev_release() from the same context
+6. blkdev_release() tries to grab disk->open_mutex again
+7. Deadlock: same task waiting for a mutex it already holds
+
+The fix is to run blk_update_request() and blk_mq_end_request() with bottom
+halves disabled. This forces blkdev_release() to run in kernel work-queue
+context instead of current task work context, and allows ublk server to make
+forward progress, and avoids the deadlock.
+
+Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver")
+Link: https://github.com/ublk-org/ublksrv/issues/170 [1]
+Signed-off-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
+[axboe: rewrite comment in ublk]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[ The fix omits the change in __ublk_do_auto_buf_reg() since this function
+  doesn't exist in Linux 6.6. ]
+Signed-off-by: Alva Lan <alvalan9@foxmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/ublk_drv.c |   30 +++++++++++++++++++++++++++---
+ 1 file changed, 27 insertions(+), 3 deletions(-)
+
+--- a/drivers/block/ublk_drv.c
++++ b/drivers/block/ublk_drv.c
+@@ -1050,6 +1050,13 @@ static inline bool ubq_daemon_is_dying(s
+       return ubq->ubq_daemon->flags & PF_EXITING;
+ }
++static void ublk_end_request(struct request *req, blk_status_t error)
++{
++      local_bh_disable();
++      blk_mq_end_request(req, error);
++      local_bh_enable();
++}
++
+ /* todo: handle partial completion */
+ static inline void __ublk_complete_rq(struct request *req)
+ {
+@@ -1057,6 +1064,7 @@ static inline void __ublk_complete_rq(st
+       struct ublk_io *io = &ubq->ios[req->tag];
+       unsigned int unmapped_bytes;
+       blk_status_t res = BLK_STS_OK;
++      bool requeue;
+       /* called from ublk_abort_queue() code path */
+       if (io->flags & UBLK_IO_FLAG_ABORTED) {
+@@ -1094,14 +1102,30 @@ static inline void __ublk_complete_rq(st
+       if (unlikely(unmapped_bytes < io->res))
+               io->res = unmapped_bytes;
+-      if (blk_update_request(req, BLK_STS_OK, io->res))
++      /*
++       * Run bio->bi_end_io() with softirqs disabled. If the final fput
++       * happens off this path, then that will prevent ublk's blkdev_release()
++       * from being called on current's task work, see fput() implementation.
++       *
++       * Otherwise, ublk server may not provide forward progress in case of
++       * reading the partition table from bdev_open() with disk->open_mutex
++       * held, and causes dead lock as we could already be holding
++       * disk->open_mutex here.
++       *
++       * Preferably we would not be doing IO with a mutex held that is also
++       * used for release, but this work-around will suffice for now.
++       */
++      local_bh_disable();
++      requeue = blk_update_request(req, BLK_STS_OK, io->res);
++      local_bh_enable();
++      if (requeue)
+               blk_mq_requeue_request(req, true);
+       else
+               __blk_mq_end_request(req, BLK_STS_OK);
+       return;
+ exit:
+-      blk_mq_end_request(req, res);
++      ublk_end_request(req, res);
+ }
+ static void ublk_complete_rq(struct kref *ref)
+@@ -1160,7 +1184,7 @@ static inline void __ublk_abort_rq(struc
+       if (ublk_queue_can_use_recovery(ubq))
+               blk_mq_requeue_request(rq, false);
+       else
+-              blk_mq_end_request(rq, BLK_STS_IOERR);
++              ublk_end_request(rq, BLK_STS_IOERR);
+       mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0);
+ }