4.14-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 23 Jun 2023 09:50:47 +0000 (11:50 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 23 Jun 2023 09:50:47 +0000 (11:50 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Jun 2023 09:50:47 +0000 (11:50 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 23 Jun 2023 09:50:47 +0000 (11:50 +0200)
diff --git a/queue-4.14/cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch b/queue-4.14/cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch

new file mode 100644 (file)

index 0000000..1ab0606
--- /dev/null
+++ b/queue-4.14/cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch
@@ -0,0 +1,125 @@
+From 6f363f5aa845561f7ea496d8b1175e3204470486 Mon Sep 17 00:00:00 2001
+From: Xiu Jianfeng <xiujianfeng@huawei.com>
+Date: Sat, 10 Jun 2023 17:26:43 +0800
+Subject: cgroup: Do not corrupt task iteration when rebinding subsystem
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Xiu Jianfeng <xiujianfeng@huawei.com>
+
+commit 6f363f5aa845561f7ea496d8b1175e3204470486 upstream.
+
+We found a refcount UAF bug as follows:
+
+refcount_t: addition on 0; use-after-free.
+WARNING: CPU: 1 PID: 342 at lib/refcount.c:25 refcount_warn_saturate+0xa0/0x148
+Workqueue: events cpuset_hotplug_workfn
+Call trace:
+ refcount_warn_saturate+0xa0/0x148
+ __refcount_add.constprop.0+0x5c/0x80
+ css_task_iter_advance_css_set+0xd8/0x210
+ css_task_iter_advance+0xa8/0x120
+ css_task_iter_next+0x94/0x158
+ update_tasks_root_domain+0x58/0x98
+ rebuild_root_domains+0xa0/0x1b0
+ rebuild_sched_domains_locked+0x144/0x188
+ cpuset_hotplug_workfn+0x138/0x5a0
+ process_one_work+0x1e8/0x448
+ worker_thread+0x228/0x3e0
+ kthread+0xe0/0xf0
+ ret_from_fork+0x10/0x20
+
+then a kernel panic will be triggered as below:
+
+Unable to handle kernel paging request at virtual address 00000000c0000010
+Call trace:
+ cgroup_apply_control_disable+0xa4/0x16c
+ rebind_subsystems+0x224/0x590
+ cgroup_destroy_root+0x64/0x2e0
+ css_free_rwork_fn+0x198/0x2a0
+ process_one_work+0x1d4/0x4bc
+ worker_thread+0x158/0x410
+ kthread+0x108/0x13c
+ ret_from_fork+0x10/0x18
+
+The race that cause this bug can be shown as below:
+
+(hotplug cpu)                | (umount cpuset)
+mutex_lock(&cpuset_mutex)    | mutex_lock(&cgroup_mutex)
+cpuset_hotplug_workfn        |
+ rebuild_root_domains        |  rebind_subsystems
+  update_tasks_root_domain   |   spin_lock_irq(&css_set_lock)
+   css_task_iter_start       |    list_move_tail(&cset->e_cset_node[ss->id]
+   while(css_task_iter_next) |                  &dcgrp->e_csets[ss->id]);
+   css_task_iter_end         |   spin_unlock_irq(&css_set_lock)
+mutex_unlock(&cpuset_mutex)  | mutex_unlock(&cgroup_mutex)
+
+Inside css_task_iter_start/next/end, css_set_lock is hold and then
+released, so when iterating task(left side), the css_set may be moved to
+another list(right side), then it->cset_head points to the old list head
+and it->cset_pos->next points to the head node of new list, which can't
+be used as struct css_set.
+
+To fix this issue, switch from all css_sets to only scgrp's css_sets to
+patch in-flight iterators to preserve correct iteration, and then
+update it->cset_head as well.
+
+Reported-by: Gaosheng Cui <cuigaosheng1@huawei.com>
+Link: https://www.spinics.net/lists/cgroups/msg37935.html
+Suggested-by: Michal Koutný <mkoutny@suse.com>
+Link: https://lore.kernel.org/all/20230526114139.70274-1-xiujianfeng@huaweicloud.com/
+Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
+Fixes: 2d8f243a5e6e ("cgroup: implement cgroup->e_csets[]")
+Cc: stable@vger.kernel.org # v3.16+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup.c |   20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -1600,7 +1600,7 @@ int rebind_subsystems(struct cgroup_root
+ {
+       struct cgroup *dcgrp = &dst_root->cgrp;
+       struct cgroup_subsys *ss;
+-      int ssid, i, ret;
++      int ssid, ret;
+       u16 dfl_disable_ss_mask = 0;
+ 
+       lockdep_assert_held(&cgroup_mutex);
+@@ -1644,7 +1644,8 @@ int rebind_subsystems(struct cgroup_root
+               struct cgroup_root *src_root = ss->root;
+               struct cgroup *scgrp = &src_root->cgrp;
+               struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
+-              struct css_set *cset;
++              struct css_set *cset, *cset_pos;
++              struct css_task_iter *it;
+ 
+               WARN_ON(!css || cgroup_css(dcgrp, ss));
+ 
+@@ -1662,9 +1663,22 @@ int rebind_subsystems(struct cgroup_root
+               css->cgroup = dcgrp;
+ 
+               spin_lock_irq(&css_set_lock);
+-              hash_for_each(css_set_table, i, cset, hlist)
++              WARN_ON(!list_empty(&dcgrp->e_csets[ss->id]));
++              list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id],
++                                       e_cset_node[ss->id]) {
+                       list_move_tail(&cset->e_cset_node[ss->id],
+                                      &dcgrp->e_csets[ss->id]);
++                      /*
++                       * all css_sets of scgrp together in same order to dcgrp,
++                       * patch in-flight iterators to preserve correct iteration.
++                       * since the iterator is always advanced right away and
++                       * finished when it->cset_pos meets it->cset_head, so only
++                       * update it->cset_head is enough here.
++                       */
++                      list_for_each_entry(it, &cset->task_iters, iters_node)
++                              if (it->cset_head == &scgrp->e_csets[ss->id])
++                                      it->cset_head = &dcgrp->e_csets[ss->id];
++              }
+               spin_unlock_irq(&css_set_lock);
+ 
+               /* default hierarchy doesn't enable controllers by default */
diff --git a/queue-4.14/drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch b/queue-4.14/drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch

new file mode 100644 (file)

index 0000000..d09755f
--- /dev/null
+++ b/queue-4.14/drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch
@@ -0,0 +1,84 @@
+From 320805ab61e5f1e2a5729ae266e16bec2904050c Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mikelley@microsoft.com>
+Date: Thu, 18 May 2023 08:13:52 -0700
+Subject: Drivers: hv: vmbus: Fix vmbus_wait_for_unload() to scan present CPUs
+
+From: Michael Kelley <mikelley@microsoft.com>
+
+commit 320805ab61e5f1e2a5729ae266e16bec2904050c upstream.
+
+vmbus_wait_for_unload() may be called in the panic path after other
+CPUs are stopped. vmbus_wait_for_unload() currently loops through
+online CPUs looking for the UNLOAD response message. But the values of
+CONFIG_KEXEC_CORE and crash_kexec_post_notifiers affect the path used
+to stop the other CPUs, and in one of the paths the stopped CPUs
+are removed from cpu_online_mask. This removal happens in both
+x86/x64 and arm64 architectures. In such a case, vmbus_wait_for_unload()
+only checks the panic'ing CPU, and misses the UNLOAD response message
+except when the panic'ing CPU is CPU 0. vmbus_wait_for_unload()
+eventually times out, but only after waiting 100 seconds.
+
+Fix this by looping through *present* CPUs in vmbus_wait_for_unload().
+The cpu_present_mask is not modified by stopping the other CPUs in the
+panic path, nor should it be.
+
+Also, in a CoCo VM the synic_message_page is not allocated in
+hv_synic_alloc(), but is set and cleared in hv_synic_enable_regs()
+and hv_synic_disable_regs() such that it is set only when the CPU is
+online.  If not all present CPUs are online when vmbus_wait_for_unload()
+is called, the synic_message_page might be NULL. Add a check for this.
+
+Fixes: cd95aad55793 ("Drivers: hv: vmbus: handle various crash scenarios")
+Cc: stable@vger.kernel.org
+Reported-by: John Starks <jostarks@microsoft.com>
+Signed-off-by: Michael Kelley <mikelley@microsoft.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://lore.kernel.org/r/1684422832-38476-1-git-send-email-mikelley@microsoft.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hv/channel_mgmt.c |   18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/drivers/hv/channel_mgmt.c
++++ b/drivers/hv/channel_mgmt.c
+@@ -803,11 +803,22 @@ static void vmbus_wait_for_unload(void)
+               if (completion_done(&vmbus_connection.unload_event))
+                       goto completed;
+ 
+-              for_each_online_cpu(cpu) {
++              for_each_present_cpu(cpu) {
+                       struct hv_per_cpu_context *hv_cpu
+                               = per_cpu_ptr(hv_context.cpu_context, cpu);
+ 
++                      /*
++                       * In a CoCo VM the synic_message_page is not allocated
++                       * in hv_synic_alloc(). Instead it is set/cleared in
++                       * hv_synic_enable_regs() and hv_synic_disable_regs()
++                       * such that it is set only when the CPU is online. If
++                       * not all present CPUs are online, the message page
++                       * might be NULL, so skip such CPUs.
++                       */
+                       page_addr = hv_cpu->synic_message_page;
++                      if (!page_addr)
++                              continue;
++
+                       msg = (struct hv_message *)page_addr
+                               + VMBUS_MESSAGE_SINT;
+ 
+@@ -841,11 +852,14 @@ completed:
+        * maybe-pending messages on all CPUs to be able to receive new
+        * messages after we reconnect.
+        */
+-      for_each_online_cpu(cpu) {
++      for_each_present_cpu(cpu) {
+               struct hv_per_cpu_context *hv_cpu
+                       = per_cpu_ptr(hv_context.cpu_context, cpu);
+ 
+               page_addr = hv_cpu->synic_message_page;
++              if (!page_addr)
++                      continue;
++
+               msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
+               msg->header.message_type = HVMSG_NONE;
+       }
diff --git a/queue-4.14/nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch b/queue-4.14/nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch

new file mode 100644 (file)

index 0000000..c0cdec3
--- /dev/null
+++ b/queue-4.14/nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch
@@ -0,0 +1,147 @@
+From 679bd7ebdd315bf457a4740b306ae99f1d0a403d Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Fri, 9 Jun 2023 12:57:32 +0900
+Subject: nilfs2: fix buffer corruption due to concurrent device reads
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit 679bd7ebdd315bf457a4740b306ae99f1d0a403d upstream.
+
+As a result of analysis of a syzbot report, it turned out that in three
+cases where nilfs2 allocates block device buffers directly via sb_getblk,
+concurrent reads to the device can corrupt the allocated buffers.
+
+Nilfs2 uses sb_getblk for segment summary blocks, that make up a log
+header, and the super root block, that is the trailer, and when moving and
+writing the second super block after fs resize.
+
+In any of these, since the uptodate flag is not set when storing metadata
+to be written in the allocated buffers, the stored metadata will be
+overwritten if a device read of the same block occurs concurrently before
+the write.  This causes metadata corruption and misbehavior in the log
+write itself, causing warnings in nilfs_btree_assign() as reported.
+
+Fix these issues by setting an uptodate flag on the buffer head on the
+first or before modifying each buffer obtained with sb_getblk, and
+clearing the flag on failure.
+
+When setting the uptodate flag, the lock_buffer/unlock_buffer pair is used
+to perform necessary exclusive control, and the buffer is filled to ensure
+that uninitialized bytes are not mixed into the data read from others.  As
+for buffers for segment summary blocks, they are filled incrementally, so
+if the uptodate flag was unset on their allocation, set the flag and zero
+fill the buffer once at that point.
+
+Also, regarding the superblock move routine, the starting point of the
+memset call to zerofill the block is incorrectly specified, which can
+cause a buffer overflow on file systems with block sizes greater than
+4KiB.  In addition, if the superblock is moved within a large block, it is
+necessary to assume the possibility that the data in the superblock will
+be destroyed by zero-filling before copying.  So fix these potential
+issues as well.
+
+Link: https://lkml.kernel.org/r/20230609035732.20426-1-konishi.ryusuke@gmail.com
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+31837fe952932efc8fb9@syzkaller.appspotmail.com
+Closes: https://lkml.kernel.org/r/00000000000030000a05e981f475@google.com
+Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/segbuf.c  |    6 ++++++
+ fs/nilfs2/segment.c |    7 +++++++
+ fs/nilfs2/super.c   |   23 ++++++++++++++++++++++-
+ 3 files changed, 35 insertions(+), 1 deletion(-)
+
+--- a/fs/nilfs2/segbuf.c
++++ b/fs/nilfs2/segbuf.c
+@@ -110,6 +110,12 @@ int nilfs_segbuf_extend_segsum(struct ni
+       if (unlikely(!bh))
+               return -ENOMEM;
+ 
++      lock_buffer(bh);
++      if (!buffer_uptodate(bh)) {
++              memset(bh->b_data, 0, bh->b_size);
++              set_buffer_uptodate(bh);
++      }
++      unlock_buffer(bh);
+       nilfs_segbuf_add_segsum_buffer(segbuf, bh);
+       return 0;
+ }
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -997,10 +997,13 @@ static void nilfs_segctor_fill_in_super_
+       unsigned int isz, srsz;
+ 
+       bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
++
++      lock_buffer(bh_sr);
+       raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
+       isz = nilfs->ns_inode_size;
+       srsz = NILFS_SR_BYTES(isz);
+ 
++      raw_sr->sr_sum = 0;  /* Ensure initialization within this update */
+       raw_sr->sr_bytes = cpu_to_le16(srsz);
+       raw_sr->sr_nongc_ctime
+               = cpu_to_le64(nilfs_doing_gc() ?
+@@ -1014,6 +1017,8 @@ static void nilfs_segctor_fill_in_super_
+       nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
+                                NILFS_SR_SUFILE_OFFSET(isz), 1);
+       memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
++      set_buffer_uptodate(bh_sr);
++      unlock_buffer(bh_sr);
+ }
+ 
+ static void nilfs_redirty_inodes(struct list_head *head)
+@@ -1791,6 +1796,7 @@ static void nilfs_abort_logs(struct list
+       list_for_each_entry(segbuf, logs, sb_list) {
+               list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
+                                   b_assoc_buffers) {
++                      clear_buffer_uptodate(bh);
+                       if (bh->b_page != bd_page) {
+                               if (bd_page)
+                                       end_page_writeback(bd_page);
+@@ -1802,6 +1808,7 @@ static void nilfs_abort_logs(struct list
+                                   b_assoc_buffers) {
+                       clear_buffer_async_write(bh);
+                       if (bh == segbuf->sb_super_root) {
++                              clear_buffer_uptodate(bh);
+                               if (bh->b_page != bd_page) {
+                                       end_page_writeback(bd_page);
+                                       bd_page = bh->b_page;
+--- a/fs/nilfs2/super.c
++++ b/fs/nilfs2/super.c
+@@ -384,10 +384,31 @@ static int nilfs_move_2nd_super(struct s
+               goto out;
+       }
+       nsbp = (void *)nsbh->b_data + offset;
+-      memset(nsbp, 0, nilfs->ns_blocksize);
+ 
++      lock_buffer(nsbh);
+       if (sb2i >= 0) {
++              /*
++               * The position of the second superblock only changes by 4KiB,
++               * which is larger than the maximum superblock data size
++               * (= 1KiB), so there is no need to use memmove() to allow
++               * overlap between source and destination.
++               */
+               memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
++
++              /*
++               * Zero fill after copy to avoid overwriting in case of move
++               * within the same block.
++               */
++              memset(nsbh->b_data, 0, offset);
++              memset((void *)nsbp + nilfs->ns_sbsize, 0,
++                     nsbh->b_size - offset - nilfs->ns_sbsize);
++      } else {
++              memset(nsbh->b_data, 0, nsbh->b_size);
++      }
++      set_buffer_uptodate(nsbh);
++      unlock_buffer(nsbh);
++
++      if (sb2i >= 0) {
+               brelse(nilfs->ns_sbh[sb2i]);
+               nilfs->ns_sbh[sb2i] = nsbh;
+               nilfs->ns_sbp[sb2i] = nsbp;
diff --git a/queue-4.14/series b/queue-4.14/series

index be077cc7c6c17a027d8eaab06387027e1a17a54c..a78de12d8c337905afc94a693394a9b5d5afa10b 100644 (file)
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -1,2 +1,5 @@
  serial-lantiq-add-missing-interrupt-ack.patch
  nilfs2-reject-devices-with-insufficient-block-count.patch
+nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch
+drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch
+cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 23 Jun 2023 09:50:47 +0000 (11:50 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 23 Jun 2023 09:50:47 +0000 (11:50 +0200)
queue-4.14/cgroup-do-not-corrupt-task-iteration-when-rebinding-subsystem.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/drivers-hv-vmbus-fix-vmbus_wait_for_unload-to-scan-present-cpus.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/nilfs2-fix-buffer-corruption-due-to-concurrent-device-reads.patch	[new file with mode: 0644]	patch \| blob
queue-4.14/series		patch \| blob \| blame \| history