]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 07:56:29 +0000 (09:56 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 07:56:29 +0000 (09:56 +0200)
added patches:
btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
io_uring-fix-race-between-timeout-flush-and-removal.patch
x86-pm-save-the-msr-validity-status-at-context-setup.patch
x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch

queue-5.10/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch [new file with mode: 0644]
queue-5.10/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch [new file with mode: 0644]
queue-5.10/io_uring-fix-race-between-timeout-flush-and-removal.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/x86-pm-save-the-msr-validity-status-at-context-setup.patch [new file with mode: 0644]
queue-5.10/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch [new file with mode: 0644]

diff --git a/queue-5.10/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch b/queue-5.10/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
new file mode 100644 (file)
index 0000000..4763284
--- /dev/null
@@ -0,0 +1,93 @@
+From b642b52d0b50f4d398cb4293f64992d0eed2e2ce Mon Sep 17 00:00:00 2001
+From: Ethan Lien <ethanlien@synology.com>
+Date: Mon, 7 Mar 2022 18:00:04 +0800
+Subject: btrfs: fix qgroup reserve overflow the qgroup limit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ethan Lien <ethanlien@synology.com>
+
+commit b642b52d0b50f4d398cb4293f64992d0eed2e2ce upstream.
+
+We use extent_changeset->bytes_changed in qgroup_reserve_data() to record
+how many bytes we set for EXTENT_QGROUP_RESERVED state. Currently the
+bytes_changed is set as "unsigned int", and it will overflow if we try to
+fallocate a range larger than 4GiB. The result is we reserve less bytes
+and eventually break the qgroup limit.
+
+Unlike regular buffered/direct write, which we use one changeset for
+each ordered extent, which can never be larger than 256M.  For
+fallocate, we use one changeset for the whole range, thus it no longer
+respects the 256M per extent limit, and caused the problem.
+
+The following example test script reproduces the problem:
+
+  $ cat qgroup-overflow.sh
+  #!/bin/bash
+
+  DEV=/dev/sdj
+  MNT=/mnt/sdj
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+
+  # Set qgroup limit to 2GiB.
+  btrfs quota enable $MNT
+  btrfs qgroup limit 2G $MNT
+
+  # Try to fallocate a 3GiB file. This should fail.
+  echo
+  echo "Try to fallocate a 3GiB file..."
+  fallocate -l 3G $MNT/3G.file
+
+  # Try to fallocate a 5GiB file.
+  echo
+  echo "Try to fallocate a 5GiB file..."
+  fallocate -l 5G $MNT/5G.file
+
+  # See we break the qgroup limit.
+  echo
+  sync
+  btrfs qgroup show -r $MNT
+
+  umount $MNT
+
+When running the test:
+
+  $ ./qgroup-overflow.sh
+  (...)
+
+  Try to fallocate a 3GiB file...
+  fallocate: fallocate failed: Disk quota exceeded
+
+  Try to fallocate a 5GiB file...
+
+  qgroupid         rfer         excl     max_rfer
+  --------         ----         ----     --------
+  0/5           5.00GiB      5.00GiB      2.00GiB
+
+Since we have no control of how bytes_changed is used, it's better to
+set it to u64.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Ethan Lien <ethanlien@synology.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -121,7 +121,7 @@ struct extent_buffer {
+  */
+ struct extent_changeset {
+       /* How many bytes are set/cleared in this operation */
+-      unsigned int bytes_changed;
++      u64 bytes_changed;
+       /* Changed ranges */
+       struct ulist range_changed;
diff --git a/queue-5.10/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch b/queue-5.10/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
new file mode 100644 (file)
index 0000000..5cf09a2
--- /dev/null
@@ -0,0 +1,91 @@
+From 60021bd754c6ca0addc6817994f20290a321d8d6 Mon Sep 17 00:00:00 2001
+From: Kaiwen Hu <kevinhu@synology.com>
+Date: Wed, 23 Mar 2022 15:10:32 +0800
+Subject: btrfs: prevent subvol with swapfile from being deleted
+
+From: Kaiwen Hu <kevinhu@synology.com>
+
+commit 60021bd754c6ca0addc6817994f20290a321d8d6 upstream.
+
+A subvolume with an active swapfile must not be deleted otherwise it
+would not be possible to deactivate it.
+
+After the subvolume is deleted, we cannot swapoff the swapfile in this
+deleted subvolume because the path is unreachable.  The swapfile is
+still active and holding references, the filesystem cannot be unmounted.
+
+The test looks like this:
+
+  mkfs.btrfs -f $dev > /dev/null
+  mount $dev $mnt
+
+  btrfs sub create $mnt/subvol
+  touch $mnt/subvol/swapfile
+  chmod 600 $mnt/subvol/swapfile
+  chattr +C $mnt/subvol/swapfile
+  dd if=/dev/zero of=$mnt/subvol/swapfile bs=1K count=4096
+  mkswap $mnt/subvol/swapfile
+  swapon $mnt/subvol/swapfile
+
+  btrfs sub delete $mnt/subvol
+  swapoff $mnt/subvol/swapfile  # failed: No such file or directory
+  swapoff --all
+
+  unmount $mnt                  # target is busy.
+
+To prevent above issue, we simply check that whether the subvolume
+contains any active swapfile, and stop the deleting process.  This
+behavior is like snapshot ioctl dealing with a swapfile.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Kaiwen Hu <kevinhu@synology.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |   24 +++++++++++++++++++++++-
+ 1 file changed, 23 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4023,6 +4023,13 @@ int btrfs_delete_subvolume(struct inode
+                          dest->root_key.objectid);
+               return -EPERM;
+       }
++      if (atomic_read(&dest->nr_swapfiles)) {
++              spin_unlock(&dest->root_item_lock);
++              btrfs_warn(fs_info,
++                         "attempt to delete subvolume %llu with active swapfile",
++                         root->root_key.objectid);
++              return -EPERM;
++      }
+       root_flags = btrfs_root_flags(&dest->root_item);
+       btrfs_set_root_flags(&dest->root_item,
+                            root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+@@ -10215,8 +10222,23 @@ static int btrfs_swap_activate(struct sw
+        * set. We use this counter to prevent snapshots. We must increment it
+        * before walking the extents because we don't want a concurrent
+        * snapshot to run after we've already checked the extents.
+-       */
++       *
++       * It is possible that subvolume is marked for deletion but still not
++       * removed yet. To prevent this race, we check the root status before
++       * activating the swapfile.
++       */
++      spin_lock(&root->root_item_lock);
++      if (btrfs_root_dead(root)) {
++              spin_unlock(&root->root_item_lock);
++
++              btrfs_exclop_finish(fs_info);
++              btrfs_warn(fs_info,
++              "cannot activate swapfile because subvolume %llu is being deleted",
++                      root->root_key.objectid);
++              return -EPERM;
++      }
+       atomic_inc(&root->nr_swapfiles);
++      spin_unlock(&root->root_item_lock);
+       isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
diff --git a/queue-5.10/io_uring-fix-race-between-timeout-flush-and-removal.patch b/queue-5.10/io_uring-fix-race-between-timeout-flush-and-removal.patch
new file mode 100644 (file)
index 0000000..e175e5f
--- /dev/null
@@ -0,0 +1,80 @@
+From e677edbcabee849bfdd43f1602bccbecf736a646 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 8 Apr 2022 11:08:58 -0600
+Subject: io_uring: fix race between timeout flush and removal
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit e677edbcabee849bfdd43f1602bccbecf736a646 upstream.
+
+io_flush_timeouts() assumes the timeout isn't in progress of triggering
+or being removed/canceled, so it unconditionally removes it from the
+timeout list and attempts to cancel it.
+
+Leave it on the list and let the normal timeout cancelation take care
+of it.
+
+Cc: stable@vger.kernel.org # 5.5+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1556,6 +1556,7 @@ static void __io_queue_deferred(struct i
+ static void io_flush_timeouts(struct io_ring_ctx *ctx)
+ {
++      struct io_kiocb *req, *tmp;
+       u32 seq;
+       if (list_empty(&ctx->timeout_list))
+@@ -1563,10 +1564,8 @@ static void io_flush_timeouts(struct io_
+       seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+-      do {
++      list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+               u32 events_needed, events_got;
+-              struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
+-                                              struct io_kiocb, timeout.list);
+               if (io_is_timeout_noseq(req))
+                       break;
+@@ -1583,9 +1582,8 @@ static void io_flush_timeouts(struct io_
+               if (events_got < events_needed)
+                       break;
+-              list_del_init(&req->timeout.list);
+               io_kill_timeout(req, 0);
+-      } while (!list_empty(&ctx->timeout_list));
++      }
+       ctx->cq_last_tm_flush = seq;
+ }
+@@ -5639,6 +5637,7 @@ static int io_timeout_prep(struct io_kio
+       else
+               data->mode = HRTIMER_MODE_REL;
++      INIT_LIST_HEAD(&req->timeout.list);
+       hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
+       return 0;
+ }
+@@ -6282,12 +6281,12 @@ static enum hrtimer_restart io_link_time
+       if (!list_empty(&req->link_list)) {
+               prev = list_entry(req->link_list.prev, struct io_kiocb,
+                                 link_list);
+-              if (refcount_inc_not_zero(&prev->refs))
+-                      list_del_init(&req->link_list);
+-              else
++              list_del_init(&req->link_list);
++              if (!refcount_inc_not_zero(&prev->refs))
+                       prev = NULL;
+       }
++      list_del(&req->timeout.list);
+       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       if (prev) {
index 97bf09a8c7181c611744dfde4de11b34609c3efe..a6af97914575589f85c1eb19f9276b1db638525c 100644 (file)
@@ -138,3 +138,8 @@ mmc-renesas_sdhi-don-t-overwrite-tap-settings-when-hs400-tuning-is-complete.patc
 lz4-fix-lz4_decompress_safe_partial-read-out-of-bound.patch
 mmmremap.c-avoid-pointless-invalidate_range_start-end-on-mremap-old_size-0.patch
 mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace.patch
+io_uring-fix-race-between-timeout-flush-and-removal.patch
+x86-pm-save-the-msr-validity-status-at-context-setup.patch
+x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
+btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
+btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
diff --git a/queue-5.10/x86-pm-save-the-msr-validity-status-at-context-setup.patch b/queue-5.10/x86-pm-save-the-msr-validity-status-at-context-setup.patch
new file mode 100644 (file)
index 0000000..84378a7
--- /dev/null
@@ -0,0 +1,55 @@
+From 73924ec4d560257004d5b5116b22a3647661e364 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Apr 2022 17:34:19 -0700
+Subject: x86/pm: Save the MSR validity status at context setup
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 73924ec4d560257004d5b5116b22a3647661e364 upstream.
+
+The mechanism to save/restore MSRs during S3 suspend/resume checks for
+the MSR validity during suspend, and only restores the MSR if its a
+valid MSR.  This is not optimal, as an invalid MSR will unnecessarily
+throw an exception for every suspend cycle.  The more invalid MSRs,
+higher the impact will be.
+
+Check and save the MSR validity at setup.  This ensures that only valid
+MSRs that are guaranteed to not throw an exception will be attempted
+during suspend.
+
+Fixes: 7a9c2dd08ead ("x86/pm: Introduce quirk framework to save/restore extra MSR registers around suspend/resume")
+Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -40,7 +40,8 @@ static void msr_save_context(struct save
+       struct saved_msr *end = msr + ctxt->saved_msrs.num;
+       while (msr < end) {
+-              msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
++              if (msr->valid)
++                      rdmsrl(msr->info.msr_no, msr->info.reg.q);
+               msr++;
+       }
+ }
+@@ -427,8 +428,10 @@ static int msr_build_context(const u32 *
+       }
+       for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
++              u64 dummy;
++
+               msr_array[i].info.msr_no        = msr_id[j];
+-              msr_array[i].valid              = false;
++              msr_array[i].valid              = !rdmsrl_safe(msr_id[j], &dummy);
+               msr_array[i].info.reg.q         = 0;
+       }
+       saved_msrs->num   = total_num;
diff --git a/queue-5.10/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch b/queue-5.10/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
new file mode 100644 (file)
index 0000000..cfa1693
--- /dev/null
@@ -0,0 +1,60 @@
+From e2a1256b17b16f9b9adf1b6fea56819e7b68e463 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Apr 2022 17:35:45 -0700
+Subject: x86/speculation: Restore speculation related MSRs during S3 resume
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit e2a1256b17b16f9b9adf1b6fea56819e7b68e463 upstream.
+
+After resuming from suspend-to-RAM, the MSRs that control CPU's
+speculative execution behavior are not being restored on the boot CPU.
+
+These MSRs are used to mitigate speculative execution vulnerabilities.
+Not restoring them correctly may leave the CPU vulnerable.  Secondary
+CPU's MSRs are correctly being restored at S3 resume by
+identify_secondary_cpu().
+
+During S3 resume, restore these MSRs for boot CPU when restoring its
+processor state.
+
+Fixes: 772439717dbf ("x86/bugs/intel: Set proper CPU features and setup RDS")
+Reported-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -506,10 +506,24 @@ static int pm_cpu_check(const struct x86
+       return ret;
+ }
++static void pm_save_spec_msr(void)
++{
++      u32 spec_msr_id[] = {
++              MSR_IA32_SPEC_CTRL,
++              MSR_IA32_TSX_CTRL,
++              MSR_TSX_FORCE_ABORT,
++              MSR_IA32_MCU_OPT_CTRL,
++              MSR_AMD64_LS_CFG,
++      };
++
++      msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
++}
++
+ static int pm_check_save_msr(void)
+ {
+       dmi_check_system(msr_save_dmi_table);
+       pm_cpu_check(msr_save_cpu_table);
++      pm_save_spec_msr();
+       return 0;
+ }