From: Greg Kroah-Hartman Date: Fri, 23 Jun 2023 09:50:47 +0000 (+0200) Subject: 4.14-stable patches X-Git-Tag: v4.14.320~48 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=366b9b47ce06ff2a9cdc255dea958e415f10258f;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch --- diff --git a/queue-4.14/cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch b/queue-4.14/cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch new file mode 100644 index 00000000000..1ab06069903 --- /dev/null +++ b/queue-4.14/cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch @@ -0,0 +1,125 @@ +From 6f363f5aa845561f7ea496d8b1175e3204470486 Mon Sep 17 00:00:00 2001 +From: Xiu Jianfeng +Date: Sat, 10 Jun 2023 17:26:43 +0800 +Subject: cgroup: Do not corrupt task iteration when rebinding subsystem +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Xiu Jianfeng + +commit 6f363f5aa845561f7ea496d8b1175e3204470486 upstream. + +We found a refcount UAF bug as follows: + +refcount_t: addition on 0; use-after-free. +WARNING: CPU: 1 PID: 342 at lib/refcount.c:25 refcount_warn_saturate+0xa0/0x148 +Workqueue: events cpuset_hotplug_workfn +Call trace: + refcount_warn_saturate+0xa0/0x148 + __refcount_add.constprop.0+0x5c/0x80 + css_task_iter_advance_css_set+0xd8/0x210 + css_task_iter_advance+0xa8/0x120 + css_task_iter_next+0x94/0x158 + update_tasks_root_domain+0x58/0x98 + rebuild_root_domains+0xa0/0x1b0 + rebuild_sched_domains_locked+0x144/0x188 + cpuset_hotplug_workfn+0x138/0x5a0 + process_one_work+0x1e8/0x448 + worker_thread+0x228/0x3e0 + kthread+0xe0/0xf0 + ret_from_fork+0x10/0x20 + +then a kernel panic will be triggered as below: + +Unable to handle kernel paging request at virtual address 00000000c0000010 +Call trace: + cgroup_apply_control_disable+0xa4/0x16c + rebind_subsystems+0x224/0x590 + cgroup_destroy_root+0x64/0x2e0 + css_free_rwork_fn+0x198/0x2a0 + process_one_work+0x1d4/0x4bc + worker_thread+0x158/0x410 + kthread+0x108/0x13c + ret_from_fork+0x10/0x18 + +The race that cause this bug can be shown as below: + +(hotplug cpu) | (umount cpuset) +mutex_lock(&cpuset_mutex) | mutex_lock(&cgroup_mutex) +cpuset_hotplug_workfn | + rebuild_root_domains | rebind_subsystems + update_tasks_root_domain | spin_lock_irq(&css_set_lock) + css_task_iter_start | list_move_tail(&cset->e_cset_node[ss->id] + while(css_task_iter_next) | &dcgrp->e_csets[ss->id]); + css_task_iter_end | spin_unlock_irq(&css_set_lock) +mutex_unlock(&cpuset_mutex) | mutex_unlock(&cgroup_mutex) + +Inside css_task_iter_start/next/end, css_set_lock is hold and then +released, so when iterating task(left side), the css_set may be moved to +another list(right side), then it->cset_head points to the old list head +and it->cset_pos->next points to the head node of new list, which can't +be used as struct css_set. + +To fix this issue, switch from all css_sets to only scgrp's css_sets to +patch in-flight iterators to preserve correct iteration, and then +update it->cset_head as well. + +Reported-by: Gaosheng Cui +Link: https://www.spinics.net/lists/cgroups/msg37935.html +Suggested-by: Michal Koutný +Link: https://lore.kernel.org/all/20230526114139.70274-1-xiujianfeng@huaweicloud.com/ +Signed-off-by: Xiu Jianfeng +Fixes: 2d8f243a5e6e ("cgroup: implement cgroup->e_csets[]") +Cc: stable@vger.kernel.org # v3.16+ +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/cgroup/cgroup.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -1600,7 +1600,7 @@ int rebind_subsystems(struct cgroup_root + { + struct cgroup *dcgrp = &dst_root->cgrp; + struct cgroup_subsys *ss; +- int ssid, i, ret; ++ int ssid, ret; + u16 dfl_disable_ss_mask = 0; + + lockdep_assert_held(&cgroup_mutex); +@@ -1644,7 +1644,8 @@ int rebind_subsystems(struct cgroup_root + struct cgroup_root *src_root = ss->root; + struct cgroup *scgrp = &src_root->cgrp; + struct cgroup_subsys_state *css = cgroup_css(scgrp, ss); +- struct css_set *cset; ++ struct css_set *cset, *cset_pos; ++ struct css_task_iter *it; + + WARN_ON(!css || cgroup_css(dcgrp, ss)); + +@@ -1662,9 +1663,22 @@ int rebind_subsystems(struct cgroup_root + css->cgroup = dcgrp; + + spin_lock_irq(&css_set_lock); +- hash_for_each(css_set_table, i, cset, hlist) ++ WARN_ON(!list_empty(&dcgrp->e_csets[ss->id])); ++ list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id], ++ e_cset_node[ss->id]) { + list_move_tail(&cset->e_cset_node[ss->id], + &dcgrp->e_csets[ss->id]); ++ /* ++ * all css_sets of scgrp together in same order to dcgrp, ++ * patch in-flight iterators to preserve correct iteration. ++ * since the iterator is always advanced right away and ++ * finished when it->cset_pos meets it->cset_head, so only ++ * update it->cset_head is enough here. ++ */ ++ list_for_each_entry(it, &cset->task_iters, iters_node) ++ if (it->cset_head == &scgrp->e_csets[ss->id]) ++ it->cset_head = &dcgrp->e_csets[ss->id]; ++ } + spin_unlock_irq(&css_set_lock); + + /* default hierarchy doesn't enable controllers by default */ diff --git a/queue-4.14/drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch b/queue-4.14/drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch new file mode 100644 index 00000000000..d09755fe30c --- /dev/null +++ b/queue-4.14/drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch @@ -0,0 +1,84 @@ +From 320805ab61e5f1e2a5729ae266e16bec2904050c Mon Sep 17 00:00:00 2001 +From: Michael Kelley +Date: Thu, 18 May 2023 08:13:52 -0700 +Subject: Drivers: hv: vmbus: Fix vmbus_wait_for_unload() to scan present CPUs + +From: Michael Kelley + +commit 320805ab61e5f1e2a5729ae266e16bec2904050c upstream. + +vmbus_wait_for_unload() may be called in the panic path after other +CPUs are stopped. vmbus_wait_for_unload() currently loops through +online CPUs looking for the UNLOAD response message. But the values of +CONFIG_KEXEC_CORE and crash_kexec_post_notifiers affect the path used +to stop the other CPUs, and in one of the paths the stopped CPUs +are removed from cpu_online_mask. This removal happens in both +x86/x64 and arm64 architectures. In such a case, vmbus_wait_for_unload() +only checks the panic'ing CPU, and misses the UNLOAD response message +except when the panic'ing CPU is CPU 0. vmbus_wait_for_unload() +eventually times out, but only after waiting 100 seconds. + +Fix this by looping through *present* CPUs in vmbus_wait_for_unload(). +The cpu_present_mask is not modified by stopping the other CPUs in the +panic path, nor should it be. + +Also, in a CoCo VM the synic_message_page is not allocated in +hv_synic_alloc(), but is set and cleared in hv_synic_enable_regs() +and hv_synic_disable_regs() such that it is set only when the CPU is +online. If not all present CPUs are online when vmbus_wait_for_unload() +is called, the synic_message_page might be NULL. Add a check for this. + +Fixes: cd95aad55793 ("Drivers: hv: vmbus: handle various crash scenarios") +Cc: stable@vger.kernel.org +Reported-by: John Starks +Signed-off-by: Michael Kelley +Reviewed-by: Vitaly Kuznetsov +Link: https://lore.kernel.org/r/1684422832-38476-1-git-send-email-mikelley@microsoft.com +Signed-off-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/hv/channel_mgmt.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/drivers/hv/channel_mgmt.c ++++ b/drivers/hv/channel_mgmt.c +@@ -803,11 +803,22 @@ static void vmbus_wait_for_unload(void) + if (completion_done(&vmbus_connection.unload_event)) + goto completed; + +- for_each_online_cpu(cpu) { ++ for_each_present_cpu(cpu) { + struct hv_per_cpu_context *hv_cpu + = per_cpu_ptr(hv_context.cpu_context, cpu); + ++ /* ++ * In a CoCo VM the synic_message_page is not allocated ++ * in hv_synic_alloc(). Instead it is set/cleared in ++ * hv_synic_enable_regs() and hv_synic_disable_regs() ++ * such that it is set only when the CPU is online. If ++ * not all present CPUs are online, the message page ++ * might be NULL, so skip such CPUs. ++ */ + page_addr = hv_cpu->synic_message_page; ++ if (!page_addr) ++ continue; ++ + msg = (struct hv_message *)page_addr + + VMBUS_MESSAGE_SINT; + +@@ -841,11 +852,14 @@ completed: + * maybe-pending messages on all CPUs to be able to receive new + * messages after we reconnect. + */ +- for_each_online_cpu(cpu) { ++ for_each_present_cpu(cpu) { + struct hv_per_cpu_context *hv_cpu + = per_cpu_ptr(hv_context.cpu_context, cpu); + + page_addr = hv_cpu->synic_message_page; ++ if (!page_addr) ++ continue; ++ + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; + msg->header.message_type = HVMSG_NONE; + } diff --git a/queue-4.14/nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch b/queue-4.14/nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch new file mode 100644 index 00000000000..c0cdec31e0a --- /dev/null +++ b/queue-4.14/nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch @@ -0,0 +1,147 @@ +From 679bd7ebdd315bf457a4740b306ae99f1d0a403d Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Fri, 9 Jun 2023 12:57:32 +0900 +Subject: nilfs2: fix buffer corruption due to concurrent device reads + +From: Ryusuke Konishi + +commit 679bd7ebdd315bf457a4740b306ae99f1d0a403d upstream. + +As a result of analysis of a syzbot report, it turned out that in three +cases where nilfs2 allocates block device buffers directly via sb_getblk, +concurrent reads to the device can corrupt the allocated buffers. + +Nilfs2 uses sb_getblk for segment summary blocks, that make up a log +header, and the super root block, that is the trailer, and when moving and +writing the second super block after fs resize. + +In any of these, since the uptodate flag is not set when storing metadata +to be written in the allocated buffers, the stored metadata will be +overwritten if a device read of the same block occurs concurrently before +the write. This causes metadata corruption and misbehavior in the log +write itself, causing warnings in nilfs_btree_assign() as reported. + +Fix these issues by setting an uptodate flag on the buffer head on the +first or before modifying each buffer obtained with sb_getblk, and +clearing the flag on failure. + +When setting the uptodate flag, the lock_buffer/unlock_buffer pair is used +to perform necessary exclusive control, and the buffer is filled to ensure +that uninitialized bytes are not mixed into the data read from others. As +for buffers for segment summary blocks, they are filled incrementally, so +if the uptodate flag was unset on their allocation, set the flag and zero +fill the buffer once at that point. + +Also, regarding the superblock move routine, the starting point of the +memset call to zerofill the block is incorrectly specified, which can +cause a buffer overflow on file systems with block sizes greater than +4KiB. In addition, if the superblock is moved within a large block, it is +necessary to assume the possibility that the data in the superblock will +be destroyed by zero-filling before copying. So fix these potential +issues as well. + +Link: https://lkml.kernel.org/r/20230609035732.20426-1-konishi.ryusuke@gmail.com +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+31837fe952932efc8fb9@syzkaller.appspotmail.com +Closes: https://lkml.kernel.org/r/00000000000030000a05e981f475@google.com +Tested-by: Ryusuke Konishi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/segbuf.c | 6 ++++++ + fs/nilfs2/segment.c | 7 +++++++ + fs/nilfs2/super.c | 23 ++++++++++++++++++++++- + 3 files changed, 35 insertions(+), 1 deletion(-) + +--- a/fs/nilfs2/segbuf.c ++++ b/fs/nilfs2/segbuf.c +@@ -110,6 +110,12 @@ int nilfs_segbuf_extend_segsum(struct ni + if (unlikely(!bh)) + return -ENOMEM; + ++ lock_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ memset(bh->b_data, 0, bh->b_size); ++ set_buffer_uptodate(bh); ++ } ++ unlock_buffer(bh); + nilfs_segbuf_add_segsum_buffer(segbuf, bh); + return 0; + } +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -997,10 +997,13 @@ static void nilfs_segctor_fill_in_super_ + unsigned int isz, srsz; + + bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; ++ ++ lock_buffer(bh_sr); + raw_sr = (struct nilfs_super_root *)bh_sr->b_data; + isz = nilfs->ns_inode_size; + srsz = NILFS_SR_BYTES(isz); + ++ raw_sr->sr_sum = 0; /* Ensure initialization within this update */ + raw_sr->sr_bytes = cpu_to_le16(srsz); + raw_sr->sr_nongc_ctime + = cpu_to_le64(nilfs_doing_gc() ? +@@ -1014,6 +1017,8 @@ static void nilfs_segctor_fill_in_super_ + nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + + NILFS_SR_SUFILE_OFFSET(isz), 1); + memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); ++ set_buffer_uptodate(bh_sr); ++ unlock_buffer(bh_sr); + } + + static void nilfs_redirty_inodes(struct list_head *head) +@@ -1791,6 +1796,7 @@ static void nilfs_abort_logs(struct list + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + b_assoc_buffers) { ++ clear_buffer_uptodate(bh); + if (bh->b_page != bd_page) { + if (bd_page) + end_page_writeback(bd_page); +@@ -1802,6 +1808,7 @@ static void nilfs_abort_logs(struct list + b_assoc_buffers) { + clear_buffer_async_write(bh); + if (bh == segbuf->sb_super_root) { ++ clear_buffer_uptodate(bh); + if (bh->b_page != bd_page) { + end_page_writeback(bd_page); + bd_page = bh->b_page; +--- a/fs/nilfs2/super.c ++++ b/fs/nilfs2/super.c +@@ -384,10 +384,31 @@ static int nilfs_move_2nd_super(struct s + goto out; + } + nsbp = (void *)nsbh->b_data + offset; +- memset(nsbp, 0, nilfs->ns_blocksize); + ++ lock_buffer(nsbh); + if (sb2i >= 0) { ++ /* ++ * The position of the second superblock only changes by 4KiB, ++ * which is larger than the maximum superblock data size ++ * (= 1KiB), so there is no need to use memmove() to allow ++ * overlap between source and destination. ++ */ + memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize); ++ ++ /* ++ * Zero fill after copy to avoid overwriting in case of move ++ * within the same block. ++ */ ++ memset(nsbh->b_data, 0, offset); ++ memset((void *)nsbp + nilfs->ns_sbsize, 0, ++ nsbh->b_size - offset - nilfs->ns_sbsize); ++ } else { ++ memset(nsbh->b_data, 0, nsbh->b_size); ++ } ++ set_buffer_uptodate(nsbh); ++ unlock_buffer(nsbh); ++ ++ if (sb2i >= 0) { + brelse(nilfs->ns_sbh[sb2i]); + nilfs->ns_sbh[sb2i] = nsbh; + nilfs->ns_sbp[sb2i] = nsbp; diff --git a/queue-4.14/series b/queue-4.14/series index be077cc7c6c..a78de12d8c3 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -1,2 +1,5 @@ serial-lantiq-add-missing-interrupt-ack.patch nilfs2-reject-devices-with-insufficient-block-count.patch +nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch +drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch +cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch