5.17-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 11 Apr 2022 07:56:49 +0000 (09:56 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 11 Apr 2022 07:56:49 +0000 (09:56 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 07:56:49 +0000 (09:56 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 11 Apr 2022 07:56:49 +0000 (09:56 +0200)
diff --git a/queue-5.17/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch b/queue-5.17/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch

new file mode 100644 (file)

index 0000000..8dc4ae9
--- /dev/null
+++ b/queue-5.17/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch
@@ -0,0 +1,107 @@
+From 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 15 Mar 2022 19:28:05 +0800
+Subject: btrfs: avoid defragging extents whose next extents are not targets
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a upstream.
+
+[BUG]
+There is a report that autodefrag is defragging single sector, which
+is completely waste of IO, and no help for defragging:
+
+   btrfs-cleaner-808 defrag_one_locked_range: root=256 ino=651122 start=0 len=4096
+
+[CAUSE]
+In defrag_collect_targets(), we check if the current range (A) can be merged
+with next one (B).
+
+If mergeable, we will add range A into target for defrag.
+
+However there is a catch for autodefrag, when checking mergeability
+against range B, we intentionally pass 0 as @newer_than, hoping to get a
+higher chance to merge with the next extent.
+
+But in the next iteration, range B will looked up by defrag_lookup_extent(),
+with non-zero @newer_than.
+
+And if range B is not really newer, it will rejected directly, causing
+only range A being defragged, while we expect to defrag both range A and
+B.
+
+[FIX]
+Since the root cause is the difference in check condition of
+defrag_check_next_extent() and defrag_collect_targets(), we fix it by:
+
+1. Pass @newer_than to defrag_check_next_extent()
+2. Pass @extent_thresh to defrag_check_next_extent()
+
+This makes the check between defrag_collect_targets() and
+defrag_check_next_extent() more consistent.
+
+While there is still some minor difference, the remaining checks are
+focus on runtime flags like writeback/delalloc, which are mostly
+transient and safe to be checked only in defrag_collect_targets().
+
+Link: https://github.com/btrfs/linux/issues/423#issuecomment-1066981856
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1215,7 +1215,7 @@ static u32 get_extent_max_capacity(const
+ }
+ 
+ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
+-                                   bool locked)
++                                   u32 extent_thresh, u64 newer_than, bool locked)
+ {
+       struct extent_map *next;
+       bool ret = false;
+@@ -1225,11 +1225,12 @@ static bool defrag_check_next_extent(str
+               return false;
+ 
+       /*
+-       * We want to check if the next extent can be merged with the current
+-       * one, which can be an extent created in a past generation, so we pass
+-       * a minimum generation of 0 to defrag_lookup_extent().
++       * Here we need to pass @newer_then when checking the next extent, or
++       * we will hit a case we mark current extent for defrag, but the next
++       * one will not be a target.
++       * This will just cause extra IO without really reducing the fragments.
+        */
+-      next = defrag_lookup_extent(inode, em->start + em->len, 0, locked);
++      next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked);
+       /* No more em or hole */
+       if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+               goto out;
+@@ -1241,6 +1242,13 @@ static bool defrag_check_next_extent(str
+        */
+       if (next->len >= get_extent_max_capacity(em))
+               goto out;
++      /* Skip older extent */
++      if (next->generation < newer_than)
++              goto out;
++      /* Also check extent size */
++      if (next->len >= extent_thresh)
++              goto out;
++
+       ret = true;
+ out:
+       free_extent_map(next);
+@@ -1446,7 +1454,7 @@ static int defrag_collect_targets(struct
+                       goto next;
+ 
+               next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
+-                                                        locked);
++                                              extent_thresh, newer_than, locked);
+               if (!next_mergeable) {
+                       struct defrag_target_range *last;
+ 
diff --git a/queue-5.17/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch b/queue-5.17/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch

new file mode 100644 (file)

index 0000000..d639464
--- /dev/null
+++ b/queue-5.17/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
@@ -0,0 +1,93 @@
+From b642b52d0b50f4d398cb4293f64992d0eed2e2ce Mon Sep 17 00:00:00 2001
+From: Ethan Lien <ethanlien@synology.com>
+Date: Mon, 7 Mar 2022 18:00:04 +0800
+Subject: btrfs: fix qgroup reserve overflow the qgroup limit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ethan Lien <ethanlien@synology.com>
+
+commit b642b52d0b50f4d398cb4293f64992d0eed2e2ce upstream.
+
+We use extent_changeset->bytes_changed in qgroup_reserve_data() to record
+how many bytes we set for EXTENT_QGROUP_RESERVED state. Currently the
+bytes_changed is set as "unsigned int", and it will overflow if we try to
+fallocate a range larger than 4GiB. The result is we reserve less bytes
+and eventually break the qgroup limit.
+
+Unlike regular buffered/direct write, which we use one changeset for
+each ordered extent, which can never be larger than 256M.  For
+fallocate, we use one changeset for the whole range, thus it no longer
+respects the 256M per extent limit, and caused the problem.
+
+The following example test script reproduces the problem:
+
+  $ cat qgroup-overflow.sh
+  #!/bin/bash
+
+  DEV=/dev/sdj
+  MNT=/mnt/sdj
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+
+  # Set qgroup limit to 2GiB.
+  btrfs quota enable $MNT
+  btrfs qgroup limit 2G $MNT
+
+  # Try to fallocate a 3GiB file. This should fail.
+  echo
+  echo "Try to fallocate a 3GiB file..."
+  fallocate -l 3G $MNT/3G.file
+
+  # Try to fallocate a 5GiB file.
+  echo
+  echo "Try to fallocate a 5GiB file..."
+  fallocate -l 5G $MNT/5G.file
+
+  # See we break the qgroup limit.
+  echo
+  sync
+  btrfs qgroup show -r $MNT
+
+  umount $MNT
+
+When running the test:
+
+  $ ./qgroup-overflow.sh
+  (...)
+
+  Try to fallocate a 3GiB file...
+  fallocate: fallocate failed: Disk quota exceeded
+
+  Try to fallocate a 5GiB file...
+
+  qgroupid         rfer         excl     max_rfer
+  --------         ----         ----     --------
+  0/5           5.00GiB      5.00GiB      2.00GiB
+
+Since we have no control of how bytes_changed is used, it's better to
+set it to u64.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Ethan Lien <ethanlien@synology.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -118,7 +118,7 @@ struct btrfs_bio_ctrl {
+  */
+ struct extent_changeset {
+       /* How many bytes are set/cleared in this operation */
+-      unsigned int bytes_changed;
++      u64 bytes_changed;
+ 
+       /* Changed ranges */
+       struct ulist range_changed;
diff --git a/queue-5.17/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch b/queue-5.17/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch

new file mode 100644 (file)

index 0000000..bd16ad9
--- /dev/null
+++ b/queue-5.17/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
@@ -0,0 +1,91 @@
+From 60021bd754c6ca0addc6817994f20290a321d8d6 Mon Sep 17 00:00:00 2001
+From: Kaiwen Hu <kevinhu@synology.com>
+Date: Wed, 23 Mar 2022 15:10:32 +0800
+Subject: btrfs: prevent subvol with swapfile from being deleted
+
+From: Kaiwen Hu <kevinhu@synology.com>
+
+commit 60021bd754c6ca0addc6817994f20290a321d8d6 upstream.
+
+A subvolume with an active swapfile must not be deleted otherwise it
+would not be possible to deactivate it.
+
+After the subvolume is deleted, we cannot swapoff the swapfile in this
+deleted subvolume because the path is unreachable.  The swapfile is
+still active and holding references, the filesystem cannot be unmounted.
+
+The test looks like this:
+
+  mkfs.btrfs -f $dev > /dev/null
+  mount $dev $mnt
+
+  btrfs sub create $mnt/subvol
+  touch $mnt/subvol/swapfile
+  chmod 600 $mnt/subvol/swapfile
+  chattr +C $mnt/subvol/swapfile
+  dd if=/dev/zero of=$mnt/subvol/swapfile bs=1K count=4096
+  mkswap $mnt/subvol/swapfile
+  swapon $mnt/subvol/swapfile
+
+  btrfs sub delete $mnt/subvol
+  swapoff $mnt/subvol/swapfile  # failed: No such file or directory
+  swapoff --all
+
+  unmount $mnt                  # target is busy.
+
+To prevent above issue, we simply check that whether the subvolume
+contains any active swapfile, and stop the deleting process.  This
+behavior is like snapshot ioctl dealing with a swapfile.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Kaiwen Hu <kevinhu@synology.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |   24 +++++++++++++++++++++++-
+ 1 file changed, 23 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4466,6 +4466,13 @@ int btrfs_delete_subvolume(struct inode
+                          dest->root_key.objectid);
+               return -EPERM;
+       }
++      if (atomic_read(&dest->nr_swapfiles)) {
++              spin_unlock(&dest->root_item_lock);
++              btrfs_warn(fs_info,
++                         "attempt to delete subvolume %llu with active swapfile",
++                         root->root_key.objectid);
++              return -EPERM;
++      }
+       root_flags = btrfs_root_flags(&dest->root_item);
+       btrfs_set_root_flags(&dest->root_item,
+                            root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+@@ -10424,8 +10431,23 @@ static int btrfs_swap_activate(struct sw
+        * set. We use this counter to prevent snapshots. We must increment it
+        * before walking the extents because we don't want a concurrent
+        * snapshot to run after we've already checked the extents.
+-       */
++       *
++       * It is possible that subvolume is marked for deletion but still not
++       * removed yet. To prevent this race, we check the root status before
++       * activating the swapfile.
++       */
++      spin_lock(&root->root_item_lock);
++      if (btrfs_root_dead(root)) {
++              spin_unlock(&root->root_item_lock);
++
++              btrfs_exclop_finish(fs_info);
++              btrfs_warn(fs_info,
++              "cannot activate swapfile because subvolume %llu is being deleted",
++                      root->root_key.objectid);
++              return -EPERM;
++      }
+       atomic_inc(&root->nr_swapfiles);
++      spin_unlock(&root->root_item_lock);
+ 
+       isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
+ 
diff --git a/queue-5.17/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch b/queue-5.17/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch

new file mode 100644 (file)

index 0000000..ac2c884
--- /dev/null
+++ b/queue-5.17/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch
@@ -0,0 +1,216 @@
+From bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 8 Mar 2022 13:36:38 +0800
+Subject: btrfs: remove device item and update super block in the same transaction
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 upstream.
+
+[BUG]
+There is a report that a btrfs has a bad super block num devices.
+
+This makes btrfs to reject the fs completely.
+
+  BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here
+  BTRFS error (device sdd3): failed to read chunk tree: -22
+  BTRFS error (device sdd3): open_ctree failed
+
+[CAUSE]
+During btrfs device removal, chunk tree and super block num devs are
+updated in two different transactions:
+
+  btrfs_rm_device()
+  |- btrfs_rm_dev_item(device)
+  |  |- trans = btrfs_start_transaction()
+  |  |  Now we got transaction X
+  |  |
+  |  |- btrfs_del_item()
+  |  |  Now device item is removed from chunk tree
+  |  |
+  |  |- btrfs_commit_transaction()
+  |     Transaction X got committed, super num devs untouched,
+  |     but device item removed from chunk tree.
+  |     (AKA, super num devs is already incorrect)
+  |
+  |- cur_devices->num_devices--;
+  |- cur_devices->total_devices--;
+  |- btrfs_set_super_num_devices()
+     All those operations are not in transaction X, thus it will
+     only be written back to disk in next transaction.
+
+So after the transaction X in btrfs_rm_dev_item() committed, but before
+transaction X+1 (which can be minutes away), a power loss happen, then
+we got the super num mismatch.
+
+[FIX]
+Instead of starting and committing a transaction inside
+btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
+pass it to btrfs_rm_dev_item().
+
+And only commit the transaction after everything is done.
+
+Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/volumes.c |   65 ++++++++++++++++++++++-------------------------------
+ 1 file changed, 28 insertions(+), 37 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -1945,23 +1945,18 @@ static void update_dev_time(const char *
+       path_put(&path);
+ }
+ 
+-static int btrfs_rm_dev_item(struct btrfs_device *device)
++static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
++                           struct btrfs_device *device)
+ {
+       struct btrfs_root *root = device->fs_info->chunk_root;
+       int ret;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+-      struct btrfs_trans_handle *trans;
+ 
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+ 
+-      trans = btrfs_start_transaction(root, 0);
+-      if (IS_ERR(trans)) {
+-              btrfs_free_path(path);
+-              return PTR_ERR(trans);
+-      }
+       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+       key.type = BTRFS_DEV_ITEM_KEY;
+       key.offset = device->devid;
+@@ -1972,21 +1967,12 @@ static int btrfs_rm_dev_item(struct btrf
+       if (ret) {
+               if (ret > 0)
+                       ret = -ENOENT;
+-              btrfs_abort_transaction(trans, ret);
+-              btrfs_end_transaction(trans);
+               goto out;
+       }
+ 
+       ret = btrfs_del_item(trans, root, path);
+-      if (ret) {
+-              btrfs_abort_transaction(trans, ret);
+-              btrfs_end_transaction(trans);
+-      }
+-
+ out:
+       btrfs_free_path(path);
+-      if (!ret)
+-              ret = btrfs_commit_transaction(trans);
+       return ret;
+ }
+ 
+@@ -2127,6 +2113,7 @@ int btrfs_rm_device(struct btrfs_fs_info
+                   struct btrfs_dev_lookup_args *args,
+                   struct block_device **bdev, fmode_t *mode)
+ {
++      struct btrfs_trans_handle *trans;
+       struct btrfs_device *device;
+       struct btrfs_fs_devices *cur_devices;
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+@@ -2142,7 +2129,7 @@ int btrfs_rm_device(struct btrfs_fs_info
+ 
+       ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
+       if (ret)
+-              goto out;
++              return ret;
+ 
+       device = btrfs_find_device(fs_info->fs_devices, args);
+       if (!device) {
+@@ -2150,27 +2137,22 @@ int btrfs_rm_device(struct btrfs_fs_info
+                       ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
+               else
+                       ret = -ENOENT;
+-              goto out;
++              return ret;
+       }
+ 
+       if (btrfs_pinned_by_swapfile(fs_info, device)) {
+               btrfs_warn_in_rcu(fs_info,
+                 "cannot remove device %s (devid %llu) due to active swapfile",
+                                 rcu_str_deref(device->name), device->devid);
+-              ret = -ETXTBSY;
+-              goto out;
++              return -ETXTBSY;
+       }
+ 
+-      if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
+-              ret = BTRFS_ERROR_DEV_TGT_REPLACE;
+-              goto out;
+-      }
++      if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
++              return BTRFS_ERROR_DEV_TGT_REPLACE;
+ 
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
+-          fs_info->fs_devices->rw_devices == 1) {
+-              ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
+-              goto out;
+-      }
++          fs_info->fs_devices->rw_devices == 1)
++              return BTRFS_ERROR_DEV_ONLY_WRITABLE;
+ 
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
+               mutex_lock(&fs_info->chunk_mutex);
+@@ -2183,14 +2165,22 @@ int btrfs_rm_device(struct btrfs_fs_info
+       if (ret)
+               goto error_undo;
+ 
+-      /*
+-       * TODO: the superblock still includes this device in its num_devices
+-       * counter although write_all_supers() is not locked out. This
+-       * could give a filesystem state which requires a degraded mount.
+-       */
+-      ret = btrfs_rm_dev_item(device);
+-      if (ret)
++      trans = btrfs_start_transaction(fs_info->chunk_root, 0);
++      if (IS_ERR(trans)) {
++              ret = PTR_ERR(trans);
+               goto error_undo;
++      }
++
++      ret = btrfs_rm_dev_item(trans, device);
++      if (ret) {
++              /* Any error in dev item removal is critical */
++              btrfs_crit(fs_info,
++                         "failed to remove device item for devid %llu: %d",
++                         device->devid, ret);
++              btrfs_abort_transaction(trans, ret);
++              btrfs_end_transaction(trans);
++              return ret;
++      }
+ 
+       clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+       btrfs_scrub_cancel_dev(device);
+@@ -2273,7 +2263,8 @@ int btrfs_rm_device(struct btrfs_fs_info
+               free_fs_devices(cur_devices);
+       }
+ 
+-out:
++      ret = btrfs_commit_transaction(trans);
++
+       return ret;
+ 
+ error_undo:
+@@ -2284,7 +2275,7 @@ error_undo:
+               device->fs_devices->rw_devices++;
+               mutex_unlock(&fs_info->chunk_mutex);
+       }
+-      goto out;
++      return ret;
+ }
+ 
+ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
diff --git a/queue-5.17/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch b/queue-5.17/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch

new file mode 100644 (file)

index 0000000..e64e55a
--- /dev/null
+++ b/queue-5.17/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch
@@ -0,0 +1,151 @@
+From 0b9e66762aa0cda2a9c2d5542d64e04dac528fa6 Mon Sep 17 00:00:00 2001
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Date: Mon, 7 Mar 2022 02:47:17 -0800
+Subject: btrfs: zoned: traverse devices under chunk_mutex in btrfs_can_activate_zone
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+commit 0b9e66762aa0cda2a9c2d5542d64e04dac528fa6 upstream.
+
+btrfs_can_activate_zone() can be called with the device_list_mutex already
+held, which will lead to a deadlock:
+
+insert_dev_extents() // Takes device_list_mutex
+`-> insert_dev_extent()
+ `-> btrfs_insert_empty_item()
+  `-> btrfs_insert_empty_items()
+   `-> btrfs_search_slot()
+    `-> btrfs_cow_block()
+     `-> __btrfs_cow_block()
+      `-> btrfs_alloc_tree_block()
+       `-> btrfs_reserve_extent()
+        `-> find_free_extent()
+         `-> find_free_extent_update_loop()
+          `-> can_allocate_chunk()
+           `-> btrfs_can_activate_zone() // Takes device_list_mutex again
+
+Instead of using the RCU on fs_devices->device_list we
+can use fs_devices->alloc_list, protected by the chunk_mutex to traverse
+the list of active devices.
+
+We are in the chunk allocation thread. The newer chunk allocation
+happens from the devices in the fs_device->alloc_list protected by the
+chunk_mutex.
+
+  btrfs_create_chunk()
+    lockdep_assert_held(&info->chunk_mutex);
+    gather_device_info
+      list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list)
+
+Also, a device that reappears after the mount won't join the alloc_list
+yet and, it will be in the dev_list, which we don't want to consider in
+the context of the chunk alloc.
+
+  [15.166572] WARNING: possible recursive locking detected
+  [15.167117] 5.17.0-rc6-dennis #79 Not tainted
+  [15.167487] --------------------------------------------
+  [15.167733] kworker/u8:3/146 is trying to acquire lock:
+  [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.167733]
+  [15.167733] but task is already holding lock:
+  [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs]
+  [15.167733]
+  [15.167733] other info that might help us debug this:
+  [15.167733]  Possible unsafe locking scenario:
+  [15.167733]
+  [15.171834]        CPU0
+  [15.171834]        ----
+  [15.171834]   lock(&fs_devs->device_list_mutex);
+  [15.171834]   lock(&fs_devs->device_list_mutex);
+  [15.171834]
+  [15.171834]  *** DEADLOCK ***
+  [15.171834]
+  [15.171834]  May be due to missing lock nesting notation
+  [15.171834]
+  [15.171834] 5 locks held by kworker/u8:3/146:
+  [15.171834]  #0: ffff888100050938 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0
+  [15.171834]  #1: ffffc9000067be80 ((work_completion)(&fs_info->async_data_reclaim_work)){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0
+  [15.176244]  #2: ffff88810521e620 (sb_internal){.+.+}-{0:0}, at: flush_space+0x335/0x600 [btrfs]
+  [15.176244]  #3: ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs]
+  [15.176244]  #4: ffff8881152e4b78 (btrfs-dev-00){++++}-{3:3}, at: __btrfs_tree_lock+0x27/0x130 [btrfs]
+  [15.179641]
+  [15.179641] stack backtrace:
+  [15.179641] CPU: 1 PID: 146 Comm: kworker/u8:3 Not tainted 5.17.0-rc6-dennis #79
+  [15.179641] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014
+  [15.179641] Workqueue: events_unbound btrfs_async_reclaim_data_space [btrfs]
+  [15.179641] Call Trace:
+  [15.179641]  <TASK>
+  [15.179641]  dump_stack_lvl+0x45/0x59
+  [15.179641]  __lock_acquire.cold+0x217/0x2b2
+  [15.179641]  lock_acquire+0xbf/0x2b0
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  __mutex_lock+0x8e/0x970
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  ? lock_is_held_type+0xd7/0x130
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  ? _raw_spin_unlock+0x24/0x40
+  [15.183838]  ? btrfs_get_alloc_profile+0x106/0x230 [btrfs]
+  [15.187601]  btrfs_reserve_extent+0x131/0x260 [btrfs]
+  [15.187601]  btrfs_alloc_tree_block+0xb5/0x3b0 [btrfs]
+  [15.187601]  __btrfs_cow_block+0x138/0x600 [btrfs]
+  [15.187601]  btrfs_cow_block+0x10f/0x230 [btrfs]
+  [15.187601]  btrfs_search_slot+0x55f/0xbc0 [btrfs]
+  [15.187601]  ? lock_is_held_type+0xd7/0x130
+  [15.187601]  btrfs_insert_empty_items+0x2d/0x60 [btrfs]
+  [15.187601]  btrfs_create_pending_block_groups+0x2b3/0x560 [btrfs]
+  [15.187601]  __btrfs_end_transaction+0x36/0x2a0 [btrfs]
+  [15.192037]  flush_space+0x374/0x600 [btrfs]
+  [15.192037]  ? find_held_lock+0x2b/0x80
+  [15.192037]  ? btrfs_async_reclaim_data_space+0x49/0x180 [btrfs]
+  [15.192037]  ? lock_release+0x131/0x2b0
+  [15.192037]  btrfs_async_reclaim_data_space+0x70/0x180 [btrfs]
+  [15.192037]  process_one_work+0x24c/0x5a0
+  [15.192037]  worker_thread+0x4a/0x3d0
+
+Fixes: a85f05e59bc1 ("btrfs: zoned: avoid chunk allocation if active block group has enough space")
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1927,18 +1927,19 @@ int btrfs_zone_finish(struct btrfs_block
+ 
+ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+ {
++      struct btrfs_fs_info *fs_info = fs_devices->fs_info;
+       struct btrfs_device *device;
+       bool ret = false;
+ 
+-      if (!btrfs_is_zoned(fs_devices->fs_info))
++      if (!btrfs_is_zoned(fs_info))
+               return true;
+ 
+       /* Non-single profiles are not supported yet */
+       ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0);
+ 
+       /* Check if there is a device with active zones left */
+-      mutex_lock(&fs_devices->device_list_mutex);
+-      list_for_each_entry(device, &fs_devices->devices, dev_list) {
++      mutex_lock(&fs_info->chunk_mutex);
++      list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+               struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ 
+               if (!device->bdev)
+@@ -1950,7 +1951,7 @@ bool btrfs_can_activate_zone(struct btrf
+                       break;
+               }
+       }
+-      mutex_unlock(&fs_devices->device_list_mutex);
++      mutex_unlock(&fs_info->chunk_mutex);
+ 
+       return ret;
+ }
diff --git a/queue-5.17/cifs-force-new-session-setup-and-tcon-for-dfs.patch b/queue-5.17/cifs-force-new-session-setup-and-tcon-for-dfs.patch

new file mode 100644 (file)

index 0000000..bf356b1
--- /dev/null
+++ b/queue-5.17/cifs-force-new-session-setup-and-tcon-for-dfs.patch
@@ -0,0 +1,57 @@
+From fb39d30e227233498c8debe6a9fe3e7cf575c85f Mon Sep 17 00:00:00 2001
+From: Paulo Alcantara <pc@cjr.nz>
+Date: Fri, 1 Apr 2022 13:51:34 -0300
+Subject: cifs: force new session setup and tcon for dfs
+
+From: Paulo Alcantara <pc@cjr.nz>
+
+commit fb39d30e227233498c8debe6a9fe3e7cf575c85f upstream.
+
+Do not reuse existing sessions and tcons in DFS failover as it might
+connect to different servers and shares.
+
+Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Cc: stable@vger.kernel.org
+Reviewed-by: Enzo Matsumiya <ematsumiya@suse.de>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/cifs/connect.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -453,9 +453,7 @@ static int reconnect_target_unlocked(str
+       return rc;
+ }
+ 
+-static int
+-reconnect_dfs_server(struct TCP_Server_Info *server,
+-                   bool mark_smb_session)
++static int reconnect_dfs_server(struct TCP_Server_Info *server)
+ {
+       int rc = 0;
+       const char *refpath = server->current_fullpath + 1;
+@@ -479,7 +477,12 @@ reconnect_dfs_server(struct TCP_Server_I
+       if (!cifs_tcp_ses_needs_reconnect(server, num_targets))
+               return 0;
+ 
+-      cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
++      /*
++       * Unconditionally mark all sessions & tcons for reconnect as we might be connecting to a
++       * different server or share during failover.  It could be improved by adding some logic to
++       * only do that in case it connects to a different server or share, though.
++       */
++      cifs_mark_tcp_ses_conns_for_reconnect(server, true);
+ 
+       cifs_abort_connection(server);
+ 
+@@ -537,7 +540,7 @@ int cifs_reconnect(struct TCP_Server_Inf
+       }
+       spin_unlock(&cifs_tcp_ses_lock);
+ 
+-      return reconnect_dfs_server(server, mark_smb_session);
++      return reconnect_dfs_server(server);
+ }
+ #else
+ int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session)
diff --git a/queue-5.17/io_uring-fix-race-between-timeout-flush-and-removal.patch b/queue-5.17/io_uring-fix-race-between-timeout-flush-and-removal.patch

new file mode 100644 (file)

index 0000000..0891bc5
--- /dev/null
+++ b/queue-5.17/io_uring-fix-race-between-timeout-flush-and-removal.patch
@@ -0,0 +1,56 @@
+From e677edbcabee849bfdd43f1602bccbecf736a646 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 8 Apr 2022 11:08:58 -0600
+Subject: io_uring: fix race between timeout flush and removal
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit e677edbcabee849bfdd43f1602bccbecf736a646 upstream.
+
+io_flush_timeouts() assumes the timeout isn't in progress of triggering
+or being removed/canceled, so it unconditionally removes it from the
+timeout list and attempts to cancel it.
+
+Leave it on the list and let the normal timeout cancelation take care
+of it.
+
+Cc: stable@vger.kernel.org # 5.5+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1644,12 +1644,11 @@ static __cold void io_flush_timeouts(str
+       __must_hold(&ctx->completion_lock)
+ {
+       u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
++      struct io_kiocb *req, *tmp;
+ 
+       spin_lock_irq(&ctx->timeout_lock);
+-      while (!list_empty(&ctx->timeout_list)) {
++      list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+               u32 events_needed, events_got;
+-              struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
+-                                              struct io_kiocb, timeout.list);
+ 
+               if (io_is_timeout_noseq(req))
+                       break;
+@@ -1666,7 +1665,6 @@ static __cold void io_flush_timeouts(str
+               if (events_got < events_needed)
+                       break;
+ 
+-              list_del_init(&req->timeout.list);
+               io_kill_timeout(req, 0);
+       }
+       ctx->cq_last_tm_flush = seq;
+@@ -6276,6 +6274,7 @@ static int io_timeout_prep(struct io_kio
+       if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
+               return -EINVAL;
+ 
++      INIT_LIST_HEAD(&req->timeout.list);
+       data->mode = io_translate_timeout_mode(flags);
+       hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
+ 
diff --git a/queue-5.17/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch b/queue-5.17/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch

new file mode 100644 (file)

index 0000000..0aa8e6c
--- /dev/null
+++ b/queue-5.17/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch
@@ -0,0 +1,39 @@
+From 0f5e4b83b37a96e3643951588ed7176b9b187c0a Mon Sep 17 00:00:00 2001
+From: Eugene Syromiatnikov <esyr@redhat.com>
+Date: Wed, 6 Apr 2022 13:55:33 +0200
+Subject: io_uring: implement compat handling for IORING_REGISTER_IOWQ_AFF
+
+From: Eugene Syromiatnikov <esyr@redhat.com>
+
+commit 0f5e4b83b37a96e3643951588ed7176b9b187c0a upstream.
+
+Similarly to the way it is done im mbind syscall.
+
+Cc: stable@vger.kernel.org # 5.14
+Fixes: fe76421d1da1dcdb ("io_uring: allow user configurable IO thread CPU affinity")
+Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -10860,7 +10860,15 @@ static __cold int io_register_iowq_aff(s
+       if (len > cpumask_size())
+               len = cpumask_size();
+ 
+-      if (copy_from_user(new_mask, arg, len)) {
++      if (in_compat_syscall()) {
++              ret = compat_get_bitmap(cpumask_bits(new_mask),
++                                      (const compat_ulong_t __user *)arg,
++                                      len * 8 /* CHAR_BIT */);
++      } else {
++              ret = copy_from_user(new_mask, arg, len);
++      }
++
++      if (ret) {
+               free_cpumask_var(new_mask);
+               return -EFAULT;
+       }
diff --git a/queue-5.17/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch b/queue-5.17/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch

new file mode 100644 (file)

index 0000000..584600c
--- /dev/null
+++ b/queue-5.17/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch
@@ -0,0 +1,36 @@
+From e590928de7547454469693da9bc7ffd562e54b7e Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Mon, 28 Mar 2022 08:49:03 -0700
+Subject: perf/x86/intel: Update the FRONTEND MSR mask on Sapphire Rapids
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit e590928de7547454469693da9bc7ffd562e54b7e upstream.
+
+On Sapphire Rapids, the FRONTEND_RETIRED.MS_FLOWS event requires the
+FRONTEND MSR value 0x8. However, the current FRONTEND MSR mask doesn't
+support it.
+
+Update intel_spr_extra_regs[] to support it.
+
+Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids")
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/1648482543-14923-2-git-send-email-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -281,7 +281,7 @@ static struct extra_reg intel_spr_extra_
+       INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+-      INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
++      INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+       INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+       INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+       EVENT_EXTRA_END
diff --git a/queue-5.17/qed-fix-ethtool-register-dump.patch b/queue-5.17/qed-fix-ethtool-register-dump.patch

new file mode 100644 (file)

index 0000000..ae21eaf
--- /dev/null
+++ b/queue-5.17/qed-fix-ethtool-register-dump.patch
@@ -0,0 +1,45 @@
+From 20921c0c86092b4082c91bd7c88305da74e5520b Mon Sep 17 00:00:00 2001
+From: Manish Chopra <manishc@marvell.com>
+Date: Fri, 1 Apr 2022 11:53:04 -0700
+Subject: qed: fix ethtool register dump
+
+From: Manish Chopra <manishc@marvell.com>
+
+commit 20921c0c86092b4082c91bd7c88305da74e5520b upstream.
+
+To fix a coverity complain, commit d5ac07dfbd2b
+("qed: Initialize debug string array") removed "sw-platform"
+(one of the common global parameters) from the dump as this
+was used in the dump with an uninitialized string, however
+it did not reduce the number of common global parameters
+which caused the incorrect (unable to parse) register dump
+
+this patch fixes it with reducing NUM_COMMON_GLOBAL_PARAMS
+bye one.
+
+Cc: stable@vger.kernel.org
+Cc: Tim Gardner <tim.gardner@canonical.com>
+Cc: "David S. Miller" <davem@davemloft.net>
+Fixes: d5ac07dfbd2b ("qed: Initialize debug string array")
+Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
+Signed-off-by: Alok Prasad <palok@marvell.com>
+Signed-off-by: Ariel Elior <aelior@marvell.com>
+Signed-off-by: Manish Chopra <manishc@marvell.com>
+Reviewed-by: Tim Gardner <tim.gardner@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_debug.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
+@@ -489,7 +489,7 @@ struct split_type_defs {
+ 
+ #define STATIC_DEBUG_LINE_DWORDS      9
+ 
+-#define NUM_COMMON_GLOBAL_PARAMS      11
++#define NUM_COMMON_GLOBAL_PARAMS      10
+ 
+ #define MAX_RECURSION_DEPTH           10
+ 
diff --git a/queue-5.17/series b/queue-5.17/series

index 319e71e2a23ea1502a4cb87d9b8396b970ef8085..ad8da991dac87e9327890dfeafbbbd0a5c65e4a2 100644 (file)
--- a/queue-5.17/series
+++ b/queue-5.17/series
@@ -275,3 +275,16 @@ mmmremap.c-avoid-pointless-invalidate_range_start-end-on-mremap-old_size-0.patch
  mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace.patch
  io_uring-don-t-check-req-file-in-io_fsync_prep.patch
  io_uring-defer-splice-tee-file-validity-check-until-command-issue.patch
+io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch
+io_uring-fix-race-between-timeout-flush-and-removal.patch
+x86-pm-save-the-msr-validity-status-at-context-setup.patch
+x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
+perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch
+btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
+btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch
+btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch
+btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch
+btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
+spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch
+cifs-force-new-session-setup-and-tcon-for-dfs.patch
+qed-fix-ethtool-register-dump.patch
diff --git a/queue-5.17/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch b/queue-5.17/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch

new file mode 100644 (file)

index 0000000..f40c5ef
--- /dev/null
+++ b/queue-5.17/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch
@@ -0,0 +1,41 @@
+From 409543cec01a84610029d6440c480c3fdd7214fb Mon Sep 17 00:00:00 2001
+From: Vinod Koul <vkoul@kernel.org>
+Date: Wed, 6 Apr 2022 18:52:38 +0530
+Subject: spi: core: add dma_map_dev for __spi_unmap_msg()
+
+From: Vinod Koul <vkoul@kernel.org>
+
+commit 409543cec01a84610029d6440c480c3fdd7214fb upstream.
+
+Commit b470e10eb43f ("spi: core: add dma_map_dev for dma device") added
+dma_map_dev for _spi_map_msg() but missed to add for unmap routine,
+__spi_unmap_msg(), so add it now.
+
+Fixes: b470e10eb43f ("spi: core: add dma_map_dev for dma device")
+Cc: stable@vger.kernel.org # v5.14+
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Link: https://lore.kernel.org/r/20220406132238.1029249-1-vkoul@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/spi/spi.c
++++ b/drivers/spi/spi.c
+@@ -1149,11 +1149,15 @@ static int __spi_unmap_msg(struct spi_co
+ 
+       if (ctlr->dma_tx)
+               tx_dev = ctlr->dma_tx->device->dev;
++      else if (ctlr->dma_map_dev)
++              tx_dev = ctlr->dma_map_dev;
+       else
+               tx_dev = ctlr->dev.parent;
+ 
+       if (ctlr->dma_rx)
+               rx_dev = ctlr->dma_rx->device->dev;
++      else if (ctlr->dma_map_dev)
++              rx_dev = ctlr->dma_map_dev;
+       else
+               rx_dev = ctlr->dev.parent;
+ 
diff --git a/queue-5.17/x86-pm-save-the-msr-validity-status-at-context-setup.patch b/queue-5.17/x86-pm-save-the-msr-validity-status-at-context-setup.patch

new file mode 100644 (file)

index 0000000..47adc22
--- /dev/null
+++ b/queue-5.17/x86-pm-save-the-msr-validity-status-at-context-setup.patch
@@ -0,0 +1,55 @@
+From 73924ec4d560257004d5b5116b22a3647661e364 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Apr 2022 17:34:19 -0700
+Subject: x86/pm: Save the MSR validity status at context setup
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 73924ec4d560257004d5b5116b22a3647661e364 upstream.
+
+The mechanism to save/restore MSRs during S3 suspend/resume checks for
+the MSR validity during suspend, and only restores the MSR if its a
+valid MSR.  This is not optimal, as an invalid MSR will unnecessarily
+throw an exception for every suspend cycle.  The more invalid MSRs,
+higher the impact will be.
+
+Check and save the MSR validity at setup.  This ensures that only valid
+MSRs that are guaranteed to not throw an exception will be attempted
+during suspend.
+
+Fixes: 7a9c2dd08ead ("x86/pm: Introduce quirk framework to save/restore extra MSR registers around suspend/resume")
+Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -40,7 +40,8 @@ static void msr_save_context(struct save
+       struct saved_msr *end = msr + ctxt->saved_msrs.num;
+ 
+       while (msr < end) {
+-              msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
++              if (msr->valid)
++                      rdmsrl(msr->info.msr_no, msr->info.reg.q);
+               msr++;
+       }
+ }
+@@ -424,8 +425,10 @@ static int msr_build_context(const u32 *
+       }
+ 
+       for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
++              u64 dummy;
++
+               msr_array[i].info.msr_no        = msr_id[j];
+-              msr_array[i].valid              = false;
++              msr_array[i].valid              = !rdmsrl_safe(msr_id[j], &dummy);
+               msr_array[i].info.reg.q         = 0;
+       }
+       saved_msrs->num   = total_num;
diff --git a/queue-5.17/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch b/queue-5.17/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch

new file mode 100644 (file)

index 0000000..807ce86
--- /dev/null
+++ b/queue-5.17/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
@@ -0,0 +1,60 @@
+From e2a1256b17b16f9b9adf1b6fea56819e7b68e463 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Apr 2022 17:35:45 -0700
+Subject: x86/speculation: Restore speculation related MSRs during S3 resume
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit e2a1256b17b16f9b9adf1b6fea56819e7b68e463 upstream.
+
+After resuming from suspend-to-RAM, the MSRs that control CPU's
+speculative execution behavior are not being restored on the boot CPU.
+
+These MSRs are used to mitigate speculative execution vulnerabilities.
+Not restoring them correctly may leave the CPU vulnerable.  Secondary
+CPU's MSRs are correctly being restored at S3 resume by
+identify_secondary_cpu().
+
+During S3 resume, restore these MSRs for boot CPU when restoring its
+processor state.
+
+Fixes: 772439717dbf ("x86/bugs/intel: Set proper CPU features and setup RDS")
+Reported-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -503,10 +503,24 @@ static int pm_cpu_check(const struct x86
+       return ret;
+ }
+ 
++static void pm_save_spec_msr(void)
++{
++      u32 spec_msr_id[] = {
++              MSR_IA32_SPEC_CTRL,
++              MSR_IA32_TSX_CTRL,
++              MSR_TSX_FORCE_ABORT,
++              MSR_IA32_MCU_OPT_CTRL,
++              MSR_AMD64_LS_CFG,
++      };
++
++      msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
++}
++
+ static int pm_check_save_msr(void)
+ {
+       dmi_check_system(msr_save_dmi_table);
+       pm_cpu_check(msr_save_cpu_table);
++      pm_save_spec_msr();
+ 
+       return 0;
+ }
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 11 Apr 2022 07:56:49 +0000 (09:56 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 11 Apr 2022 07:56:49 +0000 (09:56 +0200)
queue-5.17/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/cifs-force-new-session-setup-and-tcon-for-dfs.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/io_uring-fix-race-between-timeout-flush-and-removal.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/qed-fix-ethtool-register-dump.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/series		patch \| blob \| blame \| history
queue-5.17/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/x86-pm-save-the-msr-validity-status-at-context-setup.patch	[new file with mode: 0644]	patch \| blob
queue-5.17/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch	[new file with mode: 0644]	patch \| blob