5.18-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 25 Jun 2022 14:35:21 +0000 (16:35 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 25 Jun 2022 14:35:21 +0000 (16:35 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 25 Jun 2022 14:35:21 +0000 (16:35 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 25 Jun 2022 14:35:21 +0000 (16:35 +0200)
diff --git a/queue-5.18/9p-fix-ebadf-errors-in-cached-mode.patch b/queue-5.18/9p-fix-ebadf-errors-in-cached-mode.patch

new file mode 100644 (file)

index 0000000..a8f46ad
--- /dev/null
+++ b/queue-5.18/9p-fix-ebadf-errors-in-cached-mode.patch
@@ -0,0 +1,55 @@
+From b0017602fdf6bd3f344dd49eaee8b6ffeed6dbac Mon Sep 17 00:00:00 2001
+From: Dominique Martinet <asmadeus@codewreck.org>
+Date: Tue, 14 Jun 2022 12:19:02 +0900
+Subject: 9p: fix EBADF errors in cached mode
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+commit b0017602fdf6bd3f344dd49eaee8b6ffeed6dbac upstream.
+
+cached operations sometimes need to do invalid operations (e.g. read
+on a write only file)
+Historic fscache had added a "writeback fid", a special handle opened
+RW as root, for this. The conversion to new fscache missed that bit.
+
+This commit reinstates a slightly lesser variant of the original code
+that uses the writeback fid for partial pages backfills if the regular
+user fid had been open as WRONLY, and thus would lack read permissions.
+
+Link: https://lkml.kernel.org/r/20220614033802.1606738-1-asmadeus@codewreck.org
+Fixes: eb497943fa21 ("9p: Convert to using the netfs helper lib to do reads and caching")
+Cc: stable@vger.kernel.org
+Cc: David Howells <dhowells@redhat.com>
+Reported-By: Christian Schoenebeck <linux_oss@crudebyte.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Tested-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/9p/vfs_addr.c |   13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/fs/9p/vfs_addr.c
++++ b/fs/9p/vfs_addr.c
+@@ -58,8 +58,21 @@ static void v9fs_issue_read(struct netfs
+  */
+ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
+ {
++      struct inode *inode = file_inode(file);
++      struct v9fs_inode *v9inode = V9FS_I(inode);
+       struct p9_fid *fid = file->private_data;
+ 
++      BUG_ON(!fid);
++
++      /* we might need to read from a fid that was opened write-only
++       * for read-modify-write of page cache, use the writeback fid
++       * for that */
++      if (rreq->origin == NETFS_READ_FOR_WRITE &&
++                      (fid->mode & O_ACCMODE) == O_WRONLY) {
++              fid = v9inode->writeback_fid;
++              BUG_ON(!fid);
++      }
++
+       refcount_inc(&fid->count);
+       rreq->netfs_priv = fid;
+       return 0;
diff --git a/queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_atomic_open_dotl.patch b/queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_atomic_open_dotl.patch

new file mode 100644 (file)

index 0000000..08abf02
--- /dev/null
+++ b/queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_atomic_open_dotl.patch
@@ -0,0 +1,51 @@
+From beca774fc51a9ba8abbc869cf0c3d965ff17cd24 Mon Sep 17 00:00:00 2001
+From: Dominique Martinet <asmadeus@codewreck.org>
+Date: Sun, 12 Jun 2022 16:00:05 +0900
+Subject: 9p: fix fid refcount leak in v9fs_vfs_atomic_open_dotl
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+commit beca774fc51a9ba8abbc869cf0c3d965ff17cd24 upstream.
+
+We need to release directory fid if we fail halfway through open
+
+This fixes fid leaking with xfstests generic 531
+
+Link: https://lkml.kernel.org/r/20220612085330.1451496-2-asmadeus@codewreck.org
+Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct")
+Cc: stable@vger.kernel.org
+Reported-by: Tyler Hicks <tyhicks@linux.microsoft.com>
+Reviewed-by: Tyler Hicks <tyhicks@linux.microsoft.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/9p/vfs_inode_dotl.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/9p/vfs_inode_dotl.c
++++ b/fs/9p/vfs_inode_dotl.c
+@@ -274,6 +274,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *
+       if (IS_ERR(ofid)) {
+               err = PTR_ERR(ofid);
+               p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
++              p9_client_clunk(dfid);
+               goto out;
+       }
+ 
+@@ -285,6 +286,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *
+       if (err) {
+               p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
+                        err);
++              p9_client_clunk(dfid);
+               goto error;
+       }
+       err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
+@@ -292,6 +294,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *
+       if (err < 0) {
+               p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
+                        err);
++              p9_client_clunk(dfid);
+               goto error;
+       }
+       v9fs_invalidate_inode_attr(dir);
diff --git a/queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_get_link.patch b/queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_get_link.patch

new file mode 100644 (file)

index 0000000..51026b6
--- /dev/null
+++ b/queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_get_link.patch
@@ -0,0 +1,45 @@
+From e5690f263208c5abce7451370b7786eb25b405eb Mon Sep 17 00:00:00 2001
+From: Dominique Martinet <asmadeus@codewreck.org>
+Date: Sun, 12 Jun 2022 17:14:55 +0900
+Subject: 9p: fix fid refcount leak in v9fs_vfs_get_link
+
+From: Dominique Martinet <asmadeus@codewreck.org>
+
+commit e5690f263208c5abce7451370b7786eb25b405eb upstream.
+
+we check for protocol version later than required, after a fid has
+been obtained. Just move the version check earlier.
+
+Link: https://lkml.kernel.org/r/20220612085330.1451496-3-asmadeus@codewreck.org
+Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct")
+Cc: stable@vger.kernel.org
+Reviewed-by: Tyler Hicks <tyhicks@linux.microsoft.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/9p/vfs_inode.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/fs/9p/vfs_inode.c
++++ b/fs/9p/vfs_inode.c
+@@ -1250,15 +1250,15 @@ static const char *v9fs_vfs_get_link(str
+               return ERR_PTR(-ECHILD);
+ 
+       v9ses = v9fs_dentry2v9ses(dentry);
+-      fid = v9fs_fid_lookup(dentry);
++      if (!v9fs_proto_dotu(v9ses))
++              return ERR_PTR(-EBADF);
++
+       p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
++      fid = v9fs_fid_lookup(dentry);
+ 
+       if (IS_ERR(fid))
+               return ERR_CAST(fid);
+ 
+-      if (!v9fs_proto_dotu(v9ses))
+-              return ERR_PTR(-EBADF);
+-
+       st = p9_client_stat(fid);
+       p9_client_clunk(fid);
+       if (IS_ERR(st))
diff --git a/queue-5.18/9p-fix-refcounting-during-full-path-walks-for-fid-lookups.patch b/queue-5.18/9p-fix-refcounting-during-full-path-walks-for-fid-lookups.patch

new file mode 100644 (file)

index 0000000..3cd3552
--- /dev/null
+++ b/queue-5.18/9p-fix-refcounting-during-full-path-walks-for-fid-lookups.patch
@@ -0,0 +1,103 @@
+From 2a3dcbccd64ba35c045fac92272ff981c4cbef44 Mon Sep 17 00:00:00 2001
+From: Tyler Hicks <tyhicks@linux.microsoft.com>
+Date: Thu, 26 May 2022 18:59:59 -0500
+Subject: 9p: Fix refcounting during full path walks for fid lookups
+
+From: Tyler Hicks <tyhicks@linux.microsoft.com>
+
+commit 2a3dcbccd64ba35c045fac92272ff981c4cbef44 upstream.
+
+Decrement the refcount of the parent dentry's fid after walking
+each path component during a full path walk for a lookup. Failure to do
+so can lead to fids that are not clunked until the filesystem is
+unmounted, as indicated by this warning:
+
+ 9pnet: found fid 3 not clunked
+
+The improper refcounting after walking resulted in open(2) returning
+-EIO on any directories underneath the mount point when using the virtio
+transport. When using the fd transport, there's no apparent issue until
+the filesytem is unmounted and the warning above is emitted to the logs.
+
+In some cases, the user may not yet be attached to the filesystem and a
+new root fid, associated with the user, is created and attached to the
+root dentry before the full path walk is performed. Increment the new
+root fid's refcount to two in that situation so that it can be safely
+decremented to one after it is used for the walk operation. The new fid
+will still be attached to the root dentry when
+v9fs_fid_lookup_with_uid() returns so a final refcount of one is
+correct/expected.
+
+Link: https://lkml.kernel.org/r/20220527000003.355812-2-tyhicks@linux.microsoft.com
+Link: https://lkml.kernel.org/r/20220612085330.1451496-4-asmadeus@codewreck.org
+Fixes: 6636b6dcc3db ("9p: add refcount to p9_fid struct")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+[Dominique: fix clunking fid multiple times discussed in second link]
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/9p/fid.c |   22 +++++++++-------------
+ 1 file changed, 9 insertions(+), 13 deletions(-)
+
+--- a/fs/9p/fid.c
++++ b/fs/9p/fid.c
+@@ -152,7 +152,7 @@ static struct p9_fid *v9fs_fid_lookup_wi
+       const unsigned char **wnames, *uname;
+       int i, n, l, clone, access;
+       struct v9fs_session_info *v9ses;
+-      struct p9_fid *fid, *old_fid = NULL;
++      struct p9_fid *fid, *old_fid;
+ 
+       v9ses = v9fs_dentry2v9ses(dentry);
+       access = v9ses->flags & V9FS_ACCESS_MASK;
+@@ -194,13 +194,12 @@ static struct p9_fid *v9fs_fid_lookup_wi
+               if (IS_ERR(fid))
+                       return fid;
+ 
++              refcount_inc(&fid->count);
+               v9fs_fid_add(dentry->d_sb->s_root, fid);
+       }
+       /* If we are root ourself just return that */
+-      if (dentry->d_sb->s_root == dentry) {
+-              refcount_inc(&fid->count);
++      if (dentry->d_sb->s_root == dentry)
+               return fid;
+-      }
+       /*
+        * Do a multipath walk with attached root.
+        * When walking parent we need to make sure we
+@@ -212,6 +211,7 @@ static struct p9_fid *v9fs_fid_lookup_wi
+               fid = ERR_PTR(n);
+               goto err_out;
+       }
++      old_fid = fid;
+       clone = 1;
+       i = 0;
+       while (i < n) {
+@@ -221,19 +221,15 @@ static struct p9_fid *v9fs_fid_lookup_wi
+                * walk to ensure none of the patch component change
+                */
+               fid = p9_client_walk(fid, l, &wnames[i], clone);
++              /* non-cloning walk will return the same fid */
++              if (fid != old_fid) {
++                      p9_client_clunk(old_fid);
++                      old_fid = fid;
++              }
+               if (IS_ERR(fid)) {
+-                      if (old_fid) {
+-                              /*
+-                               * If we fail, clunk fid which are mapping
+-                               * to path component and not the last component
+-                               * of the path.
+-                               */
+-                              p9_client_clunk(old_fid);
+-                      }
+                       kfree(wnames);
+                       goto err_out;
+               }
+-              old_fid = fid;
+               i += l;
+               clone = 0;
+       }
diff --git a/queue-5.18/ata-libata-add-qc-flags-in-ata_qc_complete_template-tracepoint.patch b/queue-5.18/ata-libata-add-qc-flags-in-ata_qc_complete_template-tracepoint.patch

new file mode 100644 (file)

index 0000000..9ccf550
--- /dev/null
+++ b/queue-5.18/ata-libata-add-qc-flags-in-ata_qc_complete_template-tracepoint.patch
@@ -0,0 +1,30 @@
+From 540a92bfe6dab7310b9df2e488ba247d784d0163 Mon Sep 17 00:00:00 2001
+From: Edward Wu <edwardwu@realtek.com>
+Date: Fri, 17 Jun 2022 11:32:20 +0800
+Subject: ata: libata: add qc->flags in ata_qc_complete_template tracepoint
+
+From: Edward Wu <edwardwu@realtek.com>
+
+commit 540a92bfe6dab7310b9df2e488ba247d784d0163 upstream.
+
+Add flags value to check the result of ata completion
+
+Fixes: 255c03d15a29 ("libata: Add tracepoints")
+Cc: stable@vger.kernel.org
+Signed-off-by: Edward Wu <edwardwu@realtek.com>
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/libata.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/trace/events/libata.h
++++ b/include/trace/events/libata.h
+@@ -288,6 +288,7 @@ DECLARE_EVENT_CLASS(ata_qc_complete_temp
+               __entry->hob_feature    = qc->result_tf.hob_feature;
+               __entry->nsect          = qc->result_tf.nsect;
+               __entry->hob_nsect      = qc->result_tf.hob_nsect;
++              __entry->flags          = qc->flags;
+       ),
+ 
+       TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \
diff --git a/queue-5.18/btrfs-add-error-messages-to-all-unrecognized-mount-options.patch b/queue-5.18/btrfs-add-error-messages-to-all-unrecognized-mount-options.patch

new file mode 100644 (file)

index 0000000..e86d748
--- /dev/null
+++ b/queue-5.18/btrfs-add-error-messages-to-all-unrecognized-mount-options.patch
@@ -0,0 +1,150 @@
+From e3a4167c880cf889f66887a152799df4d609dd21 Mon Sep 17 00:00:00 2001
+From: David Sterba <dsterba@suse.com>
+Date: Thu, 2 Jun 2022 23:57:17 +0200
+Subject: btrfs: add error messages to all unrecognized mount options
+
+From: David Sterba <dsterba@suse.com>
+
+commit e3a4167c880cf889f66887a152799df4d609dd21 upstream.
+
+Almost none of the errors stemming from a valid mount option but wrong
+value prints a descriptive message which would help to identify why
+mount failed. Like in the linked report:
+
+  $ uname -r
+  v4.19
+  $ mount -o compress=zstd /dev/sdb /mnt
+  mount: /mnt: wrong fs type, bad option, bad superblock on
+  /dev/sdb, missing codepage or helper program, or other error.
+  $ dmesg
+  ...
+  BTRFS error (device sdb): open_ctree failed
+
+Errors caused by memory allocation failures are left out as it's not a
+user error so reporting that would be confusing.
+
+Link: https://lore.kernel.org/linux-btrfs/9c3fec36-fc61-3a33-4977-a7e207c3fa4e@gmx.de/
+CC: stable@vger.kernel.org # 4.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/super.c |   39 ++++++++++++++++++++++++++++++++-------
+ 1 file changed, 32 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -763,6 +763,8 @@ int btrfs_parse_options(struct btrfs_fs_
+                               compress_force = false;
+                               no_compress++;
+                       } else {
++                              btrfs_err(info, "unrecognized compression value %s",
++                                        args[0].from);
+                               ret = -EINVAL;
+                               goto out;
+                       }
+@@ -821,8 +823,11 @@ int btrfs_parse_options(struct btrfs_fs_
+               case Opt_thread_pool:
+                       ret = match_int(&args[0], &intarg);
+                       if (ret) {
++                              btrfs_err(info, "unrecognized thread_pool value %s",
++                                        args[0].from);
+                               goto out;
+                       } else if (intarg == 0) {
++                              btrfs_err(info, "invalid value 0 for thread_pool");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+@@ -883,8 +888,11 @@ int btrfs_parse_options(struct btrfs_fs_
+                       break;
+               case Opt_ratio:
+                       ret = match_int(&args[0], &intarg);
+-                      if (ret)
++                      if (ret) {
++                              btrfs_err(info, "unrecognized metadata_ratio value %s",
++                                        args[0].from);
+                               goto out;
++                      }
+                       info->metadata_ratio = intarg;
+                       btrfs_info(info, "metadata ratio %u",
+                                  info->metadata_ratio);
+@@ -901,6 +909,8 @@ int btrfs_parse_options(struct btrfs_fs_
+                               btrfs_set_and_info(info, DISCARD_ASYNC,
+                                                  "turning on async discard");
+                       } else {
++                              btrfs_err(info, "unrecognized discard mode value %s",
++                                        args[0].from);
+                               ret = -EINVAL;
+                               goto out;
+                       }
+@@ -933,6 +943,8 @@ int btrfs_parse_options(struct btrfs_fs_
+                               btrfs_set_and_info(info, FREE_SPACE_TREE,
+                                                  "enabling free space tree");
+                       } else {
++                              btrfs_err(info, "unrecognized space_cache value %s",
++                                        args[0].from);
+                               ret = -EINVAL;
+                               goto out;
+                       }
+@@ -1014,8 +1026,12 @@ int btrfs_parse_options(struct btrfs_fs_
+                       break;
+               case Opt_check_integrity_print_mask:
+                       ret = match_int(&args[0], &intarg);
+-                      if (ret)
++                      if (ret) {
++                              btrfs_err(info,
++                              "unrecognized check_integrity_print_mask value %s",
++                                      args[0].from);
+                               goto out;
++                      }
+                       info->check_integrity_print_mask = intarg;
+                       btrfs_info(info, "check_integrity_print_mask 0x%x",
+                                  info->check_integrity_print_mask);
+@@ -1030,13 +1046,15 @@ int btrfs_parse_options(struct btrfs_fs_
+                       goto out;
+ #endif
+               case Opt_fatal_errors:
+-                      if (strcmp(args[0].from, "panic") == 0)
++                      if (strcmp(args[0].from, "panic") == 0) {
+                               btrfs_set_opt(info->mount_opt,
+                                             PANIC_ON_FATAL_ERROR);
+-                      else if (strcmp(args[0].from, "bug") == 0)
++                      } else if (strcmp(args[0].from, "bug") == 0) {
+                               btrfs_clear_opt(info->mount_opt,
+                                             PANIC_ON_FATAL_ERROR);
+-                      else {
++                      } else {
++                              btrfs_err(info, "unrecognized fatal_errors value %s",
++                                        args[0].from);
+                               ret = -EINVAL;
+                               goto out;
+                       }
+@@ -1044,8 +1062,12 @@ int btrfs_parse_options(struct btrfs_fs_
+               case Opt_commit_interval:
+                       intarg = 0;
+                       ret = match_int(&args[0], &intarg);
+-                      if (ret)
++                      if (ret) {
++                              btrfs_err(info, "unrecognized commit_interval value %s",
++                                        args[0].from);
++                              ret = -EINVAL;
+                               goto out;
++                      }
+                       if (intarg == 0) {
+                               btrfs_info(info,
+                                          "using default commit interval %us",
+@@ -1059,8 +1081,11 @@ int btrfs_parse_options(struct btrfs_fs_
+                       break;
+               case Opt_rescue:
+                       ret = parse_rescue_options(info, args[0].from);
+-                      if (ret < 0)
++                      if (ret < 0) {
++                              btrfs_err(info, "unrecognized rescue value %s",
++                                        args[0].from);
+                               goto out;
++                      }
+                       break;
+ #ifdef CONFIG_BTRFS_DEBUG
+               case Opt_fragment_all:
diff --git a/queue-5.18/btrfs-fix-hang-during-unmount-when-block-group-reclaim-task-is-running.patch b/queue-5.18/btrfs-fix-hang-during-unmount-when-block-group-reclaim-task-is-running.patch

new file mode 100644 (file)

index 0000000..e95a7a9
--- /dev/null
+++ b/queue-5.18/btrfs-fix-hang-during-unmount-when-block-group-reclaim-task-is-running.patch
@@ -0,0 +1,142 @@
+From 31e70e527806c546a72262f2fc3d982ee23c42d3 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 18 May 2022 10:41:48 +0100
+Subject: btrfs: fix hang during unmount when block group reclaim task is running
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 31e70e527806c546a72262f2fc3d982ee23c42d3 upstream.
+
+When we start an unmount, at close_ctree(), if we have the reclaim task
+running and in the middle of a data block group relocation, we can trigger
+a deadlock when stopping an async reclaim task, producing a trace like the
+following:
+
+[629724.498185] task:kworker/u16:7   state:D stack:    0 pid:681170 ppid:     2 flags:0x00004000
+[629724.499760] Workqueue: events_unbound btrfs_async_reclaim_metadata_space [btrfs]
+[629724.501267] Call Trace:
+[629724.501759]  <TASK>
+[629724.502174]  __schedule+0x3cb/0xed0
+[629724.502842]  schedule+0x4e/0xb0
+[629724.503447]  btrfs_wait_on_delayed_iputs+0x7c/0xc0 [btrfs]
+[629724.504534]  ? prepare_to_wait_exclusive+0xc0/0xc0
+[629724.505442]  flush_space+0x423/0x630 [btrfs]
+[629724.506296]  ? rcu_read_unlock_trace_special+0x20/0x50
+[629724.507259]  ? lock_release+0x220/0x4a0
+[629724.507932]  ? btrfs_get_alloc_profile+0xb3/0x290 [btrfs]
+[629724.508940]  ? do_raw_spin_unlock+0x4b/0xa0
+[629724.509688]  btrfs_async_reclaim_metadata_space+0x139/0x320 [btrfs]
+[629724.510922]  process_one_work+0x252/0x5a0
+[629724.511694]  ? process_one_work+0x5a0/0x5a0
+[629724.512508]  worker_thread+0x52/0x3b0
+[629724.513220]  ? process_one_work+0x5a0/0x5a0
+[629724.514021]  kthread+0xf2/0x120
+[629724.514627]  ? kthread_complete_and_exit+0x20/0x20
+[629724.515526]  ret_from_fork+0x22/0x30
+[629724.516236]  </TASK>
+[629724.516694] task:umount          state:D stack:    0 pid:719055 ppid:695412 flags:0x00004000
+[629724.518269] Call Trace:
+[629724.518746]  <TASK>
+[629724.519160]  __schedule+0x3cb/0xed0
+[629724.519835]  schedule+0x4e/0xb0
+[629724.520467]  schedule_timeout+0xed/0x130
+[629724.521221]  ? lock_release+0x220/0x4a0
+[629724.521946]  ? lock_acquired+0x19c/0x420
+[629724.522662]  ? trace_hardirqs_on+0x1b/0xe0
+[629724.523411]  __wait_for_common+0xaf/0x1f0
+[629724.524189]  ? usleep_range_state+0xb0/0xb0
+[629724.524997]  __flush_work+0x26d/0x530
+[629724.525698]  ? flush_workqueue_prep_pwqs+0x140/0x140
+[629724.526580]  ? lock_acquire+0x1a0/0x310
+[629724.527324]  __cancel_work_timer+0x137/0x1c0
+[629724.528190]  close_ctree+0xfd/0x531 [btrfs]
+[629724.529000]  ? evict_inodes+0x166/0x1c0
+[629724.529510]  generic_shutdown_super+0x74/0x120
+[629724.530103]  kill_anon_super+0x14/0x30
+[629724.530611]  btrfs_kill_super+0x12/0x20 [btrfs]
+[629724.531246]  deactivate_locked_super+0x31/0xa0
+[629724.531817]  cleanup_mnt+0x147/0x1c0
+[629724.532319]  task_work_run+0x5c/0xa0
+[629724.532984]  exit_to_user_mode_prepare+0x1a6/0x1b0
+[629724.533598]  syscall_exit_to_user_mode+0x16/0x40
+[629724.534200]  do_syscall_64+0x48/0x90
+[629724.534667]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+[629724.535318] RIP: 0033:0x7fa2b90437a7
+[629724.535804] RSP: 002b:00007ffe0b7e4458 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
+[629724.536912] RAX: 0000000000000000 RBX: 00007fa2b9182264 RCX: 00007fa2b90437a7
+[629724.538156] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000555d6cf20dd0
+[629724.539053] RBP: 0000555d6cf20ba0 R08: 0000000000000000 R09: 00007ffe0b7e3200
+[629724.539956] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+[629724.540883] R13: 0000555d6cf20dd0 R14: 0000555d6cf20cb0 R15: 0000000000000000
+[629724.541796]  </TASK>
+
+This happens because:
+
+1) Before entering close_ctree() we have the async block group reclaim
+   task running and relocating a data block group;
+
+2) There's an async metadata (or data) space reclaim task running;
+
+3) We enter close_ctree() and park the cleaner kthread;
+
+4) The async space reclaim task is at flush_space() and runs all the
+   existing delayed iputs;
+
+5) Before the async space reclaim task calls
+   btrfs_wait_on_delayed_iputs(), the block group reclaim task which is
+   doing the data block group relocation, creates a delayed iput at
+   replace_file_extents() (called when COWing leaves that have file extent
+   items pointing to relocated data extents, during the merging phase
+   of relocation roots);
+
+6) The async reclaim space reclaim task blocks at
+   btrfs_wait_on_delayed_iputs(), since we have a new delayed iput;
+
+7) The task at close_ctree() then calls cancel_work_sync() to stop the
+   async space reclaim task, but it blocks since that task is waiting for
+   the delayed iput to be run;
+
+8) The delayed iput is never run because the cleaner kthread is parked,
+   and no one else runs delayed iputs, resulting in a hang.
+
+So fix this by stopping the async block group reclaim task before we
+park the cleaner kthread.
+
+Fixes: 18bb8bbf13c183 ("btrfs: zoned: automatically reclaim zones")
+CC: stable@vger.kernel.org # 5.15+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -4639,6 +4639,17 @@ void __cold close_ctree(struct btrfs_fs_
+       int ret;
+ 
+       set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
++
++      /*
++       * We may have the reclaim task running and relocating a data block group,
++       * in which case it may create delayed iputs. So stop it before we park
++       * the cleaner kthread otherwise we can get new delayed iputs after
++       * parking the cleaner, and that can make the async reclaim task to hang
++       * if it's waiting for delayed iputs to complete, since the cleaner is
++       * parked and can not run delayed iputs - this will make us hang when
++       * trying to stop the async reclaim task.
++       */
++      cancel_work_sync(&fs_info->reclaim_bgs_work);
+       /*
+        * We don't want the cleaner to start new transactions, add more delayed
+        * iputs, etc. while we're closing. We can't use kthread_stop() yet
+@@ -4679,8 +4690,6 @@ void __cold close_ctree(struct btrfs_fs_
+       cancel_work_sync(&fs_info->async_data_reclaim_work);
+       cancel_work_sync(&fs_info->preempt_reclaim_work);
+ 
+-      cancel_work_sync(&fs_info->reclaim_bgs_work);
+-
+       /* Cancel or finish ongoing discard work */
+       btrfs_discard_cleanup(fs_info);
+ 
diff --git a/queue-5.18/btrfs-prevent-remounting-to-v1-space-cache-for-subpage-mount.patch b/queue-5.18/btrfs-prevent-remounting-to-v1-space-cache-for-subpage-mount.patch

new file mode 100644 (file)

index 0000000..334b086
--- /dev/null
+++ b/queue-5.18/btrfs-prevent-remounting-to-v1-space-cache-for-subpage-mount.patch
@@ -0,0 +1,49 @@
+From 0591f04036218d572d54349ea8c7914ad9c82b2b Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 18 May 2022 13:03:09 +0800
+Subject: btrfs: prevent remounting to v1 space cache for subpage mount
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 0591f04036218d572d54349ea8c7914ad9c82b2b upstream.
+
+Upstream commit 9f73f1aef98b ("btrfs: force v2 space cache usage for
+subpage mount") forces subpage mount to use v2 cache, to avoid
+deprecated v1 cache which doesn't support subpage properly.
+
+But there is a loophole that user can still remount to v1 cache.
+
+The existing check will only give users a warning, but does not really
+prevent to do the remount.
+
+Although remounting to v1 will not cause any problems since the v1 cache
+will always be marked invalid when mounted with a different page size,
+it's still better to prevent v1 cache at all for subpage mounts.
+
+Fixes: 9f73f1aef98b ("btrfs: force v2 space cache usage for subpage mount")
+CC: stable@vger.kernel.org # 5.15+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/super.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -1986,6 +1986,14 @@ static int btrfs_remount(struct super_bl
+       if (ret)
+               goto restore;
+ 
++      /* V1 cache is not supported for subpage mount. */
++      if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
++              btrfs_warn(fs_info,
++      "v1 space cache is not supported for page size %lu with sectorsize %u",
++                         PAGE_SIZE, fs_info->sectorsize);
++              ret = -EINVAL;
++              goto restore;
++      }
+       btrfs_remount_begin(fs_info, old_opts, *flags);
+       btrfs_resize_thread_pool(fs_info,
+               fs_info->thread_pool_size, old_thread_pool_size);
diff --git a/queue-5.18/dm-do-not-return-early-from-dm_io_complete-if-blk_sts_again-without-polling.patch b/queue-5.18/dm-do-not-return-early-from-dm_io_complete-if-blk_sts_again-without-polling.patch

new file mode 100644 (file)

index 0000000..74bbb90
--- /dev/null
+++ b/queue-5.18/dm-do-not-return-early-from-dm_io_complete-if-blk_sts_again-without-polling.patch
@@ -0,0 +1,41 @@
+From 78ccef91234ba331c04d71f3ecb1377451d21056 Mon Sep 17 00:00:00 2001
+From: Mike Snitzer <snitzer@kernel.org>
+Date: Tue, 21 Jun 2022 13:37:06 -0400
+Subject: dm: do not return early from dm_io_complete if BLK_STS_AGAIN without polling
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+commit 78ccef91234ba331c04d71f3ecb1377451d21056 upstream.
+
+Commit 5291984004edf ("dm: fix bio polling to handle possibile
+BLK_STS_AGAIN") inadvertently introduced an early return from
+dm_io_complete() without first queueing the bio to DM if BLK_STS_AGAIN
+occurs and bio-polling is _not_ being used.
+
+Fix this by only returning early from dm_io_complete() if the bio has
+first been properly queued to DM. Otherwise, the bio will never finish
+via bio_endio.
+
+Fixes: 5291984004edf ("dm: fix bio polling to handle possibile BLK_STS_AGAIN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -899,9 +899,11 @@ static void dm_io_complete(struct dm_io
+                       if (io_error == BLK_STS_AGAIN) {
+                               /* io_uring doesn't handle BLK_STS_AGAIN (yet) */
+                               queue_io(md, bio);
++                              return;
+                       }
+               }
+-              return;
++              if (io_error == BLK_STS_DM_REQUEUE)
++                      return;
+       }
+ 
+       if (bio_is_flush_with_data(bio)) {
diff --git a/queue-5.18/dm-era-commit-metadata-in-postsuspend-after-worker-stops.patch b/queue-5.18/dm-era-commit-metadata-in-postsuspend-after-worker-stops.patch

new file mode 100644 (file)

index 0000000..c90a53f
--- /dev/null
+++ b/queue-5.18/dm-era-commit-metadata-in-postsuspend-after-worker-stops.patch
@@ -0,0 +1,91 @@
+From 9ae6e8b1c9bbf6874163d1243e393137313762b7 Mon Sep 17 00:00:00 2001
+From: Nikos Tsironis <ntsironis@arrikto.com>
+Date: Tue, 21 Jun 2022 15:24:03 +0300
+Subject: dm era: commit metadata in postsuspend after worker stops
+
+From: Nikos Tsironis <ntsironis@arrikto.com>
+
+commit 9ae6e8b1c9bbf6874163d1243e393137313762b7 upstream.
+
+During postsuspend dm-era does the following:
+
+1. Archives the current era
+2. Commits the metadata, as part of the RPC call for archiving the
+   current era
+3. Stops the worker
+
+Until the worker stops, it might write to the metadata again. Moreover,
+these writes are not flushed to disk immediately, but are cached by the
+dm-bufio client, which writes them back asynchronously.
+
+As a result, the committed metadata of a suspended dm-era device might
+not be consistent with the in-core metadata.
+
+In some cases, this can result in the corruption of the on-disk
+metadata. Suppose the following sequence of events:
+
+1. Load a new table, e.g. a snapshot-origin table, to a device with a
+   dm-era table
+2. Suspend the device
+3. dm-era commits its metadata, but the worker does a few more metadata
+   writes until it stops, as part of digesting an archived writeset
+4. These writes are cached by the dm-bufio client
+5. Load the dm-era table to another device.
+6. The new instance of the dm-era target loads the committed, on-disk
+   metadata, which don't include the extra writes done by the worker
+   after the metadata commit.
+7. Resume the new device
+8. The new dm-era target instance starts using the metadata
+9. Resume the original device
+10. The destructor of the old dm-era target instance is called and
+    destroys the dm-bufio client, which results in flushing the cached
+    writes to disk
+11. These writes might overwrite the writes done by the new dm-era
+    instance, hence corrupting its metadata.
+
+Fix this by committing the metadata after the worker stops running.
+
+stop_worker uses flush_workqueue to flush the current work. However, the
+work item may re-queue itself and flush_workqueue doesn't wait for
+re-queued works to finish.
+
+This could result in the worker changing the metadata after they have
+been committed, or writing to the metadata concurrently with the commit
+in the postsuspend thread.
+
+Use drain_workqueue instead, which waits until the work and all
+re-queued works finish.
+
+Fixes: eec40579d8487 ("dm: add era target")
+Cc: stable@vger.kernel.org # v3.15+
+Signed-off-by: Nikos Tsironis <ntsironis@arrikto.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-era-target.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/md/dm-era-target.c
++++ b/drivers/md/dm-era-target.c
+@@ -1400,7 +1400,7 @@ static void start_worker(struct era *era
+ static void stop_worker(struct era *era)
+ {
+       atomic_set(&era->suspended, 1);
+-      flush_workqueue(era->wq);
++      drain_workqueue(era->wq);
+ }
+ 
+ /*----------------------------------------------------------------
+@@ -1570,6 +1570,12 @@ static void era_postsuspend(struct dm_ta
+       }
+ 
+       stop_worker(era);
++
++      r = metadata_commit(era->md);
++      if (r) {
++              DMERR("%s: metadata_commit failed", __func__);
++              /* FIXME: fail mode */
++      }
+ }
+ 
+ static int era_preresume(struct dm_target *ti)
diff --git a/queue-5.18/dm-mirror-log-clear-log-bits-up-to-bits_per_long-boundary.patch b/queue-5.18/dm-mirror-log-clear-log-bits-up-to-bits_per_long-boundary.patch

new file mode 100644 (file)

index 0000000..951b112
--- /dev/null
+++ b/queue-5.18/dm-mirror-log-clear-log-bits-up-to-bits_per_long-boundary.patch
@@ -0,0 +1,42 @@
+From 90736eb3232d208ee048493f371075e4272e0944 Mon Sep 17 00:00:00 2001
+From: Mikulas Patocka <mpatocka@redhat.com>
+Date: Thu, 23 Jun 2022 14:53:25 -0400
+Subject: dm mirror log: clear log bits up to BITS_PER_LONG boundary
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+commit 90736eb3232d208ee048493f371075e4272e0944 upstream.
+
+Commit 85e123c27d5c ("dm mirror log: round up region bitmap size to
+BITS_PER_LONG") introduced a regression on 64-bit architectures in the
+lvm testsuite tests: lvcreate-mirror, mirror-names and vgsplit-operation.
+
+If the device is shrunk, we need to clear log bits beyond the end of the
+device. The code clears bits up to a 32-bit boundary and then calculates
+lc->sync_count by summing set bits up to a 64-bit boundary (the commit
+changed that; previously, this boundary was 32-bit too). So, it was using
+some non-zeroed bits in the calculation and this caused misbehavior.
+
+Fix this regression by clearing bits up to BITS_PER_LONG boundary.
+
+Fixes: 85e123c27d5c ("dm mirror log: round up region bitmap size to BITS_PER_LONG")
+Cc: stable@vger.kernel.org
+Reported-by: Benjamin Marzinski <bmarzins@redhat.com>
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-log.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/md/dm-log.c
++++ b/drivers/md/dm-log.c
+@@ -615,7 +615,7 @@ static int disk_resume(struct dm_dirty_l
+                       log_clear_bit(lc, lc->clean_bits, i);
+ 
+       /* clear any old bits -- device has shrunk */
+-      for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
++      for (i = lc->region_count; i % BITS_PER_LONG; i++)
+               log_clear_bit(lc, lc->clean_bits, i);
+ 
+       /* copy clean across to sync */
diff --git a/queue-5.18/ipv4-ping-fix-bind-address-validity-check.patch b/queue-5.18/ipv4-ping-fix-bind-address-validity-check.patch

new file mode 100644 (file)

index 0000000..e30b1be
--- /dev/null
+++ b/queue-5.18/ipv4-ping-fix-bind-address-validity-check.patch
@@ -0,0 +1,127 @@
+From b4a028c4d031c27704ad73b1195ca69a1206941e Mon Sep 17 00:00:00 2001
+From: Riccardo Paolo Bestetti <pbl@bestov.io>
+Date: Fri, 17 Jun 2022 10:54:35 +0200
+Subject: ipv4: ping: fix bind address validity check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Riccardo Paolo Bestetti <pbl@bestov.io>
+
+commit b4a028c4d031c27704ad73b1195ca69a1206941e upstream.
+
+Commit 8ff978b8b222 ("ipv4/raw: support binding to nonlocal addresses")
+introduced a helper function to fold duplicated validity checks of bind
+addresses into inet_addr_valid_or_nonlocal(). However, this caused an
+unintended regression in ping_check_bind_addr(), which previously would
+reject binding to multicast and broadcast addresses, but now these are
+both incorrectly allowed as reported in [1].
+
+This patch restores the original check. A simple reordering is done to
+improve readability and make it evident that multicast and broadcast
+addresses should not be allowed. Also, add an early exit for INADDR_ANY
+which replaces lost behavior added by commit 0ce779a9f501 ("net: Avoid
+unnecessary inet_addr_type() call when addr is INADDR_ANY").
+
+Furthermore, this patch introduces regression selftests to catch these
+specific cases.
+
+[1] https://lore.kernel.org/netdev/CANP3RGdkAcDyAZoT1h8Gtuu0saq+eOrrTiWbxnOs+5zn+cpyKg@mail.gmail.com/
+
+Fixes: 8ff978b8b222 ("ipv4/raw: support binding to nonlocal addresses")
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: Carlos Llamas <cmllamas@google.com>
+Signed-off-by: Riccardo Paolo Bestetti <pbl@bestov.io>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ping.c                           |   10 ++++++---
+ tools/testing/selftests/net/fcnal-test.sh |   33 ++++++++++++++++++++++++++++++
+ 2 files changed, 40 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/ping.c
++++ b/net/ipv4/ping.c
+@@ -319,12 +319,16 @@ static int ping_check_bind_addr(struct s
+               pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
+                        sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
+ 
++              if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
++                      return 0;
++
+               tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+               chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
+ 
+-              if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
+-                                               addr->sin_addr.s_addr,
+-                                               chk_addr_ret))
++              if (chk_addr_ret == RTN_MULTICAST ||
++                  chk_addr_ret == RTN_BROADCAST ||
++                  (chk_addr_ret != RTN_LOCAL &&
++                   !inet_can_nonlocal_bind(net, isk)))
+                       return -EADDRNOTAVAIL;
+ 
+ #if IS_ENABLED(CONFIG_IPV6)
+--- a/tools/testing/selftests/net/fcnal-test.sh
++++ b/tools/testing/selftests/net/fcnal-test.sh
+@@ -70,6 +70,10 @@ NSB_LO_IP6=2001:db8:2::2
+ NL_IP=172.17.1.1
+ NL_IP6=2001:db8:4::1
+ 
++# multicast and broadcast addresses
++MCAST_IP=224.0.0.1
++BCAST_IP=255.255.255.255
++
+ MD5_PW=abc123
+ MD5_WRONG_PW=abc1234
+ 
+@@ -308,6 +312,9 @@ addr2str()
+       127.0.0.1) echo "loopback";;
+       ::1) echo "IPv6 loopback";;
+ 
++      ${BCAST_IP}) echo "broadcast";;
++      ${MCAST_IP}) echo "multicast";;
++
+       ${NSA_IP})      echo "ns-A IP";;
+       ${NSA_IP6})     echo "ns-A IPv6";;
+       ${NSA_LO_IP})   echo "ns-A loopback IP";;
+@@ -1801,6 +1808,19 @@ ipv4_addr_bind_novrf()
+       log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind"
+ 
+       #
++      # check that ICMP sockets cannot bind to broadcast and multicast addresses
++      #
++      a=${BCAST_IP}
++      log_start
++      run_cmd nettest -s -R -P icmp -l ${a} -b
++      log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address"
++
++      a=${MCAST_IP}
++      log_start
++      run_cmd nettest -s -R -P icmp -f -l ${a} -b
++      log_test_addr ${a} $? 1 "ICMP socket bind to multicast address"
++
++      #
+       # tcp sockets
+       #
+       a=${NSA_IP}
+@@ -1858,6 +1878,19 @@ ipv4_addr_bind_vrf()
+       log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+ 
+       #
++      # check that ICMP sockets cannot bind to broadcast and multicast addresses
++      #
++      a=${BCAST_IP}
++      log_start
++      run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b
++      log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind"
++
++      a=${MCAST_IP}
++      log_start
++      run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
++      log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind"
++
++      #
+       # tcp sockets
+       #
+       for a in ${NSA_IP} ${VRF_IP}
diff --git a/queue-5.18/maintainers-add-new-iommu-development-mailing-list.patch b/queue-5.18/maintainers-add-new-iommu-development-mailing-list.patch

new file mode 100644 (file)

index 0000000..5a548d1
--- /dev/null
+++ b/queue-5.18/maintainers-add-new-iommu-development-mailing-list.patch
@@ -0,0 +1,113 @@
+From c242507c1b895646b4a25060df13b6214805759f Mon Sep 17 00:00:00 2001
+From: Joerg Roedel <jroedel@suse.de>
+Date: Fri, 24 Jun 2022 14:51:39 +0200
+Subject: MAINTAINERS: Add new IOMMU development mailing list
+
+From: Joerg Roedel <jroedel@suse.de>
+
+commit c242507c1b895646b4a25060df13b6214805759f upstream.
+
+The IOMMU mailing list will move from lists.linux-foundation.org to
+lists.linux.dev. The hard switch of the archive will happen on July
+5th, but add the new list now already so that people start using the
+list when sending patches. After July 5th the old list will disappear.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Link: https://lore.kernel.org/r/20220624125139.412-1-joro@8bytes.org
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ MAINTAINERS |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -427,6 +427,7 @@ ACPI VIOT DRIVER
+ M:    Jean-Philippe Brucker <jean-philippe@linaro.org>
+ L:    linux-acpi@vger.kernel.org
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Maintained
+ F:    drivers/acpi/viot.c
+ F:    include/linux/acpi_viot.h
+@@ -960,6 +961,7 @@ AMD IOMMU (AMD-VI)
+ M:    Joerg Roedel <joro@8bytes.org>
+ R:    Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Maintained
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+ F:    drivers/iommu/amd/
+@@ -5898,6 +5900,7 @@ M:       Christoph Hellwig <hch@lst.de>
+ M:    Marek Szyprowski <m.szyprowski@samsung.com>
+ R:    Robin Murphy <robin.murphy@arm.com>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Supported
+ W:    http://git.infradead.org/users/hch/dma-mapping.git
+ T:    git git://git.infradead.org/users/hch/dma-mapping.git
+@@ -5910,6 +5913,7 @@ F:       kernel/dma/
+ DMA MAPPING BENCHMARK
+ M:    Xiang Chen <chenxiang66@hisilicon.com>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ F:    kernel/dma/map_benchmark.c
+ F:    tools/testing/selftests/dma/
+ 
+@@ -7476,6 +7480,7 @@ F:       drivers/gpu/drm/exynos/exynos_dp*
+ EXYNOS SYSMMU (IOMMU) driver
+ M:    Marek Szyprowski <m.szyprowski@samsung.com>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Maintained
+ F:    drivers/iommu/exynos-iommu.c
+ 
+@@ -9875,6 +9880,7 @@ INTEL IOMMU (VT-d)
+ M:    David Woodhouse <dwmw2@infradead.org>
+ M:    Lu Baolu <baolu.lu@linux.intel.com>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Supported
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+ F:    drivers/iommu/intel/
+@@ -10253,6 +10259,7 @@ IOMMU DRIVERS
+ M:    Joerg Roedel <joro@8bytes.org>
+ M:    Will Deacon <will@kernel.org>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Maintained
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
+ F:    Documentation/devicetree/bindings/iommu/
+@@ -12369,6 +12376,7 @@ F:     drivers/i2c/busses/i2c-mt65xx.c
+ MEDIATEK IOMMU DRIVER
+ M:    Yong Wu <yong.wu@mediatek.com>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ L:    linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+ S:    Supported
+ F:    Documentation/devicetree/bindings/iommu/mediatek*
+@@ -16354,6 +16362,7 @@ F:     drivers/i2c/busses/i2c-qcom-cci.c
+ QUALCOMM IOMMU
+ M:    Rob Clark <robdclark@gmail.com>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ L:    linux-arm-msm@vger.kernel.org
+ S:    Maintained
+ F:    drivers/iommu/arm/arm-smmu/qcom_iommu.c
+@@ -18939,6 +18948,7 @@ F:     arch/x86/boot/video*
+ SWIOTLB SUBSYSTEM
+ M:    Christoph Hellwig <hch@infradead.org>
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Supported
+ W:    http://git.infradead.org/users/hch/dma-mapping.git
+ T:    git git://git.infradead.org/users/hch/dma-mapping.git
+@@ -21609,6 +21619,7 @@ M:     Juergen Gross <jgross@suse.com>
+ M:    Stefano Stabellini <sstabellini@kernel.org>
+ L:    xen-devel@lists.xenproject.org (moderated for non-subscribers)
+ L:    iommu@lists.linux-foundation.org
++L:    iommu@lists.linux.dev
+ S:    Supported
+ F:    arch/x86/xen/*swiotlb*
+ F:    drivers/xen/*swiotlb*
diff --git a/queue-5.18/mmc-mediatek-wait-dma-stop-bit-reset-to-0.patch b/queue-5.18/mmc-mediatek-wait-dma-stop-bit-reset-to-0.patch

new file mode 100644 (file)

index 0000000..1ba4c34
--- /dev/null
+++ b/queue-5.18/mmc-mediatek-wait-dma-stop-bit-reset-to-0.patch
@@ -0,0 +1,88 @@
+From 89bcd9a64b849380ef57e3032b307574e48db524 Mon Sep 17 00:00:00 2001
+From: Mengqi Zhang <mengqi.zhang@mediatek.com>
+Date: Thu, 9 Jun 2022 19:22:39 +0800
+Subject: mmc: mediatek: wait dma stop bit reset to 0
+
+From: Mengqi Zhang <mengqi.zhang@mediatek.com>
+
+commit 89bcd9a64b849380ef57e3032b307574e48db524 upstream.
+
+MediaTek IP requires that after dma stop, it need to wait this dma stop
+bit auto-reset to 0. When bus is in high loading state, it will take a
+while for the dma stop complete. If there is no waiting operation here,
+when program runs to clear fifo and reset, bus will hang.
+
+In addition, there should be no return in msdc_data_xfer_next() if
+there is data need be transferred, because no matter what error occurs
+here, it should continue to excute to the following mmc_request_done.
+Otherwise the core layer may wait complete forever.
+
+Signed-off-by: Mengqi Zhang <mengqi.zhang@mediatek.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220609112239.18911-1-mengqi.zhang@mediatek.com
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/mtk-sd.c |   20 ++++++++++++--------
+ 1 file changed, 12 insertions(+), 8 deletions(-)
+
+--- a/drivers/mmc/host/mtk-sd.c
++++ b/drivers/mmc/host/mtk-sd.c
+@@ -1356,7 +1356,7 @@ static void msdc_data_xfer_next(struct m
+               msdc_request_done(host, mrq);
+ }
+ 
+-static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
++static void msdc_data_xfer_done(struct msdc_host *host, u32 events,
+                               struct mmc_request *mrq, struct mmc_data *data)
+ {
+       struct mmc_command *stop;
+@@ -1376,7 +1376,7 @@ static bool msdc_data_xfer_done(struct m
+       spin_unlock_irqrestore(&host->lock, flags);
+ 
+       if (done)
+-              return true;
++              return;
+       stop = data->stop;
+ 
+       if (check_data || (stop && stop->error)) {
+@@ -1385,12 +1385,15 @@ static bool msdc_data_xfer_done(struct m
+               sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP,
+                               1);
+ 
++              ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CTRL, val,
++                                              !(val & MSDC_DMA_CTRL_STOP), 1, 20000);
++              if (ret)
++                      dev_dbg(host->dev, "DMA stop timed out\n");
++
+               ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CFG, val,
+                                               !(val & MSDC_DMA_CFG_STS), 1, 20000);
+-              if (ret) {
+-                      dev_dbg(host->dev, "DMA stop timed out\n");
+-                      return false;
+-              }
++              if (ret)
++                      dev_dbg(host->dev, "DMA inactive timed out\n");
+ 
+               sdr_clr_bits(host->base + MSDC_INTEN, data_ints_mask);
+               dev_dbg(host->dev, "DMA stop\n");
+@@ -1415,9 +1418,7 @@ static bool msdc_data_xfer_done(struct m
+               }
+ 
+               msdc_data_xfer_next(host, mrq);
+-              done = true;
+       }
+-      return done;
+ }
+ 
+ static void msdc_set_buswidth(struct msdc_host *host, u32 width)
+@@ -2416,6 +2417,9 @@ static void msdc_cqe_disable(struct mmc_
+       if (recovery) {
+               sdr_set_field(host->base + MSDC_DMA_CTRL,
+                             MSDC_DMA_CTRL_STOP, 1);
++              if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CTRL, val,
++                      !(val & MSDC_DMA_CTRL_STOP), 1, 3000)))
++                      return;
+               if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CFG, val,
+                       !(val & MSDC_DMA_CFG_STS), 1, 3000)))
+                       return;
diff --git a/queue-5.18/mmc-sdhci-pci-o2micro-fix-card-detect-by-dealing-with-debouncing.patch b/queue-5.18/mmc-sdhci-pci-o2micro-fix-card-detect-by-dealing-with-debouncing.patch

new file mode 100644 (file)

index 0000000..e073c74
--- /dev/null
+++ b/queue-5.18/mmc-sdhci-pci-o2micro-fix-card-detect-by-dealing-with-debouncing.patch
@@ -0,0 +1,34 @@
+From e591fcf6b4e39335c9b128b17738fcd2fdd278ae Mon Sep 17 00:00:00 2001
+From: Chevron Li <chevron.li@bayhubtech.com>
+Date: Thu, 2 Jun 2022 06:25:43 -0700
+Subject: mmc: sdhci-pci-o2micro: Fix card detect by dealing with debouncing
+
+From: Chevron Li <chevron.li@bayhubtech.com>
+
+commit e591fcf6b4e39335c9b128b17738fcd2fdd278ae upstream.
+
+The result from ->get_cd() may be incorrect as the card detect debouncing
+isn't managed correctly. Let's fix it.
+
+Signed-off-by: Chevron Li<chevron.li@bayhubtech.com>
+Fixes: 7d44061704dd ("mmc: sdhci-pci-o2micro: Fix O2 Host data read/write DLL Lock phase shift issue")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220602132543.596-1-chevron.li@bayhubtech.com
+[Ulf: Updated the commit message]
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-pci-o2micro.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/mmc/host/sdhci-pci-o2micro.c
++++ b/drivers/mmc/host/sdhci-pci-o2micro.c
+@@ -152,6 +152,8 @@ static int sdhci_o2_get_cd(struct mmc_ho
+ 
+       if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS))
+               sdhci_o2_enable_internal_clock(host);
++      else
++              sdhci_o2_wait_card_detect_stable(host);
+ 
+       return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
+ }
diff --git a/queue-5.18/mtd-rawnand-gpmi-fix-setting-busy-timeout-setting.patch b/queue-5.18/mtd-rawnand-gpmi-fix-setting-busy-timeout-setting.patch

new file mode 100644 (file)

index 0000000..368191f
--- /dev/null
+++ b/queue-5.18/mtd-rawnand-gpmi-fix-setting-busy-timeout-setting.patch
@@ -0,0 +1,49 @@
+From 06781a5026350cde699d2d10c9914a25c1524f45 Mon Sep 17 00:00:00 2001
+From: Sascha Hauer <s.hauer@pengutronix.de>
+Date: Tue, 14 Jun 2022 10:31:38 +0200
+Subject: mtd: rawnand: gpmi: Fix setting busy timeout setting
+
+From: Sascha Hauer <s.hauer@pengutronix.de>
+
+commit 06781a5026350cde699d2d10c9914a25c1524f45 upstream.
+
+The DEVICE_BUSY_TIMEOUT value is described in the Reference Manual as:
+
+| Timeout waiting for NAND Ready/Busy or ATA IRQ. Used in WAIT_FOR_READY
+| mode. This value is the number of GPMI_CLK cycles multiplied by 4096.
+
+So instead of multiplying the value in cycles with 4096, we have to
+divide it by that value. Use DIV_ROUND_UP to make sure we are on the
+safe side, especially when the calculated value in cycles is smaller
+than 4096 as typically the case.
+
+This bug likely never triggered because any timeout != 0 usually will
+do. In my case the busy timeout in cycles was originally calculated as
+2408, which multiplied with 4096 is 0x968000. The lower 16 bits were
+taken for the 16 bit wide register field, so the register value was
+0x8000. With 2970bf5a32f0 ("mtd: rawnand: gpmi: fix controller timings
+setting") however the value in cycles became 2384, which multiplied
+with 4096 is 0x950000. The lower 16 bit are 0x0 now resulting in an
+intermediate timeout when reading from NAND.
+
+Fixes: b1206122069aa ("mtd: rawnand: gpmi: use core timings instead of an empirical derivation")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20220614083138.3455683-1-s.hauer@pengutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
++++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+@@ -695,7 +695,7 @@ static int gpmi_nfc_compute_timings(stru
+       hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) |
+                     BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) |
+                     BF_GPMI_TIMING0_DATA_SETUP(data_setup_cycles);
+-      hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(busy_timeout_cycles * 4096);
++      hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(DIV_ROUND_UP(busy_timeout_cycles, 4096));
+ 
+       /*
+        * Derive NFC ideal delay from {3}:
diff --git a/queue-5.18/net-openvswitch-fix-parsing-of-nw_proto-for-ipv6-fragments.patch b/queue-5.18/net-openvswitch-fix-parsing-of-nw_proto-for-ipv6-fragments.patch

new file mode 100644 (file)

index 0000000..e6862c5
--- /dev/null
+++ b/queue-5.18/net-openvswitch-fix-parsing-of-nw_proto-for-ipv6-fragments.patch
@@ -0,0 +1,63 @@
+From 12378a5a75e33f34f8586706eb61cca9e6d4690c Mon Sep 17 00:00:00 2001
+From: Rosemarie O'Riorden <roriorden@redhat.com>
+Date: Tue, 21 Jun 2022 16:48:45 -0400
+Subject: net: openvswitch: fix parsing of nw_proto for IPv6 fragments
+
+From: Rosemarie O'Riorden <roriorden@redhat.com>
+
+commit 12378a5a75e33f34f8586706eb61cca9e6d4690c upstream.
+
+When a packet enters the OVS datapath and does not match any existing
+flows installed in the kernel flow cache, the packet will be sent to
+userspace to be parsed, and a new flow will be created. The kernel and
+OVS rely on each other to parse packet fields in the same way so that
+packets will be handled properly.
+
+As per the design document linked below, OVS expects all later IPv6
+fragments to have nw_proto=44 in the flow key, so they can be correctly
+matched on OpenFlow rules. OpenFlow controllers create pipelines based
+on this design.
+
+This behavior was changed by the commit in the Fixes tag so that
+nw_proto equals the next_header field of the last extension header.
+However, there is no counterpart for this change in OVS userspace,
+meaning that this field is parsed differently between OVS and the
+kernel. This is a problem because OVS creates actions based on what is
+parsed in userspace, but the kernel-provided flow key is used as a match
+criteria, as described in Documentation/networking/openvswitch.rst. This
+leads to issues such as packets incorrectly matching on a flow and thus
+the wrong list of actions being applied to the packet. Such changes in
+packet parsing cannot be implemented without breaking the userspace.
+
+The offending commit is partially reverted to restore the expected
+behavior.
+
+The change technically made sense and there is a good reason that it was
+implemented, but it does not comply with the original design of OVS.
+If in the future someone wants to implement such a change, then it must
+be user-configurable and disabled by default to preserve backwards
+compatibility with existing OVS versions.
+
+Cc: stable@vger.kernel.org
+Fixes: fa642f08839b ("openvswitch: Derive IP protocol number for IPv6 later frags")
+Link: https://docs.openvswitch.org/en/latest/topics/design/#fragments
+Signed-off-by: Rosemarie O'Riorden <roriorden@redhat.com>
+Acked-by: Eelco Chaudron <echaudro@redhat.com>
+Link: https://lore.kernel.org/r/20220621204845.9721-1-roriorden@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/flow.c
++++ b/net/openvswitch/flow.c
+@@ -407,7 +407,7 @@ static int parse_ipv6hdr(struct sk_buff
+       if (flags & IP6_FH_F_FRAG) {
+               if (frag_off) {
+                       key->ip.frag = OVS_FRAG_TYPE_LATER;
+-                      key->ip.proto = nexthdr;
++                      key->ip.proto = NEXTHDR_FRAGMENT;
+                       return 0;
+               }
+               key->ip.frag = OVS_FRAG_TYPE_FIRST;
diff --git a/queue-5.18/scsi-ibmvfc-allocate-free-queue-resource-only-during-probe-remove.patch b/queue-5.18/scsi-ibmvfc-allocate-free-queue-resource-only-during-probe-remove.patch

new file mode 100644 (file)

index 0000000..425b4ab
--- /dev/null
+++ b/queue-5.18/scsi-ibmvfc-allocate-free-queue-resource-only-during-probe-remove.patch
@@ -0,0 +1,234 @@
+From 72ea7fe0db73d65c7d977208842d8ade9b823de9 Mon Sep 17 00:00:00 2001
+From: Tyrel Datwyler <tyreld@linux.ibm.com>
+Date: Thu, 16 Jun 2022 12:11:26 -0700
+Subject: scsi: ibmvfc: Allocate/free queue resource only during probe/remove
+
+From: Tyrel Datwyler <tyreld@linux.ibm.com>
+
+commit 72ea7fe0db73d65c7d977208842d8ade9b823de9 upstream.
+
+Currently, the sub-queues and event pool resources are allocated/freed for
+every CRQ connection event such as reset and LPM. This exposes the driver
+to a couple issues. First the inefficiency of freeing and reallocating
+memory that can simply be resued after being sanitized. Further, a system
+under memory pressue runs the risk of allocation failures that could result
+in a crippled driver. Finally, there is a race window where command
+submission/compeletion can try to pull/return elements from/to an event
+pool that is being deleted or already has been deleted due to the lack of
+host state around freeing/allocating resources. The following is an example
+of list corruption following a live partition migration (LPM):
+
+Oops: Exception in kernel mode, sig: 5 [#1]
+LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
+Modules linked in: vfat fat isofs cdrom ext4 mbcache jbd2 nft_counter nft_compat nf_tables nfnetlink rpadlpar_io rpaphp xsk_diag nfsv3 nfs_acl nfs lockd grace fscache netfs rfkill bonding tls sunrpc pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c dm_service_time sd_mod t10_pi sg ibmvfc scsi_transport_fc ibmveth vmx_crypto dm_multipath dm_mirror dm_region_hash dm_log dm_mod ipmi_devintf ipmi_msghandler fuse
+CPU: 0 PID: 2108 Comm: ibmvfc_0 Kdump: loaded Not tainted 5.14.0-70.9.1.el9_0.ppc64le #1
+NIP: c0000000007c4bb0 LR: c0000000007c4bac CTR: 00000000005b9a10
+REGS: c00000025c10b760 TRAP: 0700  Not tainted (5.14.0-70.9.1.el9_0.ppc64le)
+MSR: 800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 2800028f XER: 0000000f
+CFAR: c0000000001f55bc IRQMASK: 0
+        GPR00: c0000000007c4bac c00000025c10ba00 c000000002a47c00 000000000000004e
+        GPR04: c0000031e3006f88 c0000031e308bd00 c00000025c10b768 0000000000000027
+        GPR08: 0000000000000000 c0000031e3009dc0 00000031e0eb0000 0000000000000000
+        GPR12: c0000031e2ffffa8 c000000002dd0000 c000000000187108 c00000020fcee2c0
+        GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
+        GPR20: 0000000000000000 0000000000000000 0000000000000000 c008000002f81300
+        GPR24: 5deadbeef0000100 5deadbeef0000122 c000000263ba6910 c00000024cc88000
+        GPR28: 000000000000003c c0000002430a0000 c0000002430ac300 000000000000c300
+NIP [c0000000007c4bb0] __list_del_entry_valid+0x90/0x100
+LR [c0000000007c4bac] __list_del_entry_valid+0x8c/0x100
+Call Trace:
+[c00000025c10ba00] [c0000000007c4bac] __list_del_entry_valid+0x8c/0x100 (unreliable)
+[c00000025c10ba60] [c008000002f42284] ibmvfc_free_queue+0xec/0x210 [ibmvfc]
+[c00000025c10bb10] [c008000002f4246c] ibmvfc_deregister_scsi_channel+0xc4/0x160 [ibmvfc]
+[c00000025c10bba0] [c008000002f42580] ibmvfc_release_sub_crqs+0x78/0x130 [ibmvfc]
+[c00000025c10bc20] [c008000002f4f6cc] ibmvfc_do_work+0x5c4/0xc70 [ibmvfc]
+[c00000025c10bce0] [c008000002f4fdec] ibmvfc_work+0x74/0x1e8 [ibmvfc]
+[c00000025c10bda0] [c0000000001872b8] kthread+0x1b8/0x1c0
+[c00000025c10be10] [c00000000000cd64] ret_from_kernel_thread+0x5c/0x64
+Instruction dump:
+40820034 38600001 38210060 4e800020 7c0802a6 7c641b78 3c62fe7a 7d254b78
+3863b590 f8010070 4ba309cd 60000000 <0fe00000> 7c0802a6 3c62fe7a 3863b640
+---[ end trace 11a2b65a92f8b66c ]---
+ibmvfc 30000003: Send warning. Receive queue closed, will retry.
+
+Add registration/deregistration helpers that are called instead during
+connection resets to sanitize and reconfigure the queues.
+
+Link: https://lore.kernel.org/r/20220616191126.1281259-3-tyreld@linux.ibm.com
+Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels")
+Cc: stable@vger.kernel.org
+Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
+Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ibmvscsi/ibmvfc.c |   79 ++++++++++++++++++++++++++++++++---------
+ 1 file changed, 62 insertions(+), 17 deletions(-)
+
+--- a/drivers/scsi/ibmvscsi/ibmvfc.c
++++ b/drivers/scsi/ibmvscsi/ibmvfc.c
+@@ -160,8 +160,8 @@ static void ibmvfc_npiv_logout(struct ib
+ static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
+ static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
+ 
+-static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
+-static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
++static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *);
++static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *);
+ 
+ static const char *unknown_error = "unknown error";
+ 
+@@ -917,7 +917,7 @@ static int ibmvfc_reenable_crq_queue(str
+       struct vio_dev *vdev = to_vio_dev(vhost->dev);
+       unsigned long flags;
+ 
+-      ibmvfc_release_sub_crqs(vhost);
++      ibmvfc_dereg_sub_crqs(vhost);
+ 
+       /* Re-enable the CRQ */
+       do {
+@@ -936,7 +936,7 @@ static int ibmvfc_reenable_crq_queue(str
+       spin_unlock(vhost->crq.q_lock);
+       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ 
+-      ibmvfc_init_sub_crqs(vhost);
++      ibmvfc_reg_sub_crqs(vhost);
+ 
+       return rc;
+ }
+@@ -955,7 +955,7 @@ static int ibmvfc_reset_crq(struct ibmvf
+       struct vio_dev *vdev = to_vio_dev(vhost->dev);
+       struct ibmvfc_queue *crq = &vhost->crq;
+ 
+-      ibmvfc_release_sub_crqs(vhost);
++      ibmvfc_dereg_sub_crqs(vhost);
+ 
+       /* Close the CRQ */
+       do {
+@@ -988,7 +988,7 @@ static int ibmvfc_reset_crq(struct ibmvf
+       spin_unlock(vhost->crq.q_lock);
+       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ 
+-      ibmvfc_init_sub_crqs(vhost);
++      ibmvfc_reg_sub_crqs(vhost);
+ 
+       return rc;
+ }
+@@ -5759,9 +5759,6 @@ static int ibmvfc_register_scsi_channel(
+ 
+       ENTER;
+ 
+-      if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT))
+-              return -ENOMEM;
+-
+       rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
+                          &scrq->cookie, &scrq->hw_irq);
+ 
+@@ -5801,7 +5798,6 @@ irq_failed:
+               rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
+       } while (rtas_busy_delay(rc));
+ reg_failed:
+-      ibmvfc_free_queue(vhost, scrq);
+       LEAVE;
+       return rc;
+ }
+@@ -5827,12 +5823,50 @@ static void ibmvfc_deregister_scsi_chann
+       if (rc)
+               dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc);
+ 
+-      ibmvfc_free_queue(vhost, scrq);
++      /* Clean out the queue */
++      memset(scrq->msgs.crq, 0, PAGE_SIZE);
++      scrq->cur = 0;
++
++      LEAVE;
++}
++
++static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *vhost)
++{
++      int i, j;
++
++      ENTER;
++      if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
++              return;
++
++      for (i = 0; i < nr_scsi_hw_queues; i++) {
++              if (ibmvfc_register_scsi_channel(vhost, i)) {
++                      for (j = i; j > 0; j--)
++                              ibmvfc_deregister_scsi_channel(vhost, j - 1);
++                      vhost->do_enquiry = 0;
++                      return;
++              }
++      }
++
++      LEAVE;
++}
++
++static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *vhost)
++{
++      int i;
++
++      ENTER;
++      if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
++              return;
++
++      for (i = 0; i < nr_scsi_hw_queues; i++)
++              ibmvfc_deregister_scsi_channel(vhost, i);
++
+       LEAVE;
+ }
+ 
+ static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
+ {
++      struct ibmvfc_queue *scrq;
+       int i, j;
+ 
+       ENTER;
+@@ -5848,30 +5882,41 @@ static void ibmvfc_init_sub_crqs(struct
+       }
+ 
+       for (i = 0; i < nr_scsi_hw_queues; i++) {
+-              if (ibmvfc_register_scsi_channel(vhost, i)) {
+-                      for (j = i; j > 0; j--)
+-                              ibmvfc_deregister_scsi_channel(vhost, j - 1);
++              scrq = &vhost->scsi_scrqs.scrqs[i];
++              if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) {
++                      for (j = i; j > 0; j--) {
++                              scrq = &vhost->scsi_scrqs.scrqs[j - 1];
++                              ibmvfc_free_queue(vhost, scrq);
++                      }
+                       kfree(vhost->scsi_scrqs.scrqs);
+                       vhost->scsi_scrqs.scrqs = NULL;
+                       vhost->scsi_scrqs.active_queues = 0;
+                       vhost->do_enquiry = 0;
+-                      break;
++                      vhost->mq_enabled = 0;
++                      return;
+               }
+       }
+ 
++      ibmvfc_reg_sub_crqs(vhost);
++
+       LEAVE;
+ }
+ 
+ static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
+ {
++      struct ibmvfc_queue *scrq;
+       int i;
+ 
+       ENTER;
+       if (!vhost->scsi_scrqs.scrqs)
+               return;
+ 
+-      for (i = 0; i < nr_scsi_hw_queues; i++)
+-              ibmvfc_deregister_scsi_channel(vhost, i);
++      ibmvfc_dereg_sub_crqs(vhost);
++
++      for (i = 0; i < nr_scsi_hw_queues; i++) {
++              scrq = &vhost->scsi_scrqs.scrqs[i];
++              ibmvfc_free_queue(vhost, scrq);
++      }
+ 
+       kfree(vhost->scsi_scrqs.scrqs);
+       vhost->scsi_scrqs.scrqs = NULL;
diff --git a/queue-5.18/scsi-ibmvfc-store-vhost-pointer-during-subcrq-allocation.patch b/queue-5.18/scsi-ibmvfc-store-vhost-pointer-during-subcrq-allocation.patch

new file mode 100644 (file)

index 0000000..0e5c620
--- /dev/null
+++ b/queue-5.18/scsi-ibmvfc-store-vhost-pointer-during-subcrq-allocation.patch
@@ -0,0 +1,96 @@
+From aeaadcde1a60138bceb65de3cdaeec78170b4459 Mon Sep 17 00:00:00 2001
+From: Tyrel Datwyler <tyreld@linux.ibm.com>
+Date: Thu, 16 Jun 2022 12:11:25 -0700
+Subject: scsi: ibmvfc: Store vhost pointer during subcrq allocation
+
+From: Tyrel Datwyler <tyreld@linux.ibm.com>
+
+commit aeaadcde1a60138bceb65de3cdaeec78170b4459 upstream.
+
+Currently the back pointer from a queue to the vhost adapter isn't set
+until after subcrq interrupt registration. The value is available when a
+queue is first allocated and can/should be also set for primary and async
+queues as well as subcrqs.
+
+This fixes a crash observed during kexec/kdump on Power 9 with legacy XICS
+interrupt controller where a pending subcrq interrupt from the previous
+kernel can be replayed immediately upon IRQ registration resulting in
+dereference of a garbage backpointer in ibmvfc_interrupt_scsi().
+
+Kernel attempted to read user page (58) - exploit attempt? (uid: 0)
+BUG: Kernel NULL pointer dereference on read at 0x00000058
+Faulting instruction address: 0xc008000003216a08
+Oops: Kernel access of bad area, sig: 11 [#1]
+...
+NIP [c008000003216a08] ibmvfc_interrupt_scsi+0x40/0xb0 [ibmvfc]
+LR [c0000000082079e8] __handle_irq_event_percpu+0x98/0x270
+Call Trace:
+[c000000047fa3d80] [c0000000123e6180] 0xc0000000123e6180 (unreliable)
+[c000000047fa3df0] [c0000000082079e8] __handle_irq_event_percpu+0x98/0x270
+[c000000047fa3ea0] [c000000008207d18] handle_irq_event+0x98/0x188
+[c000000047fa3ef0] [c00000000820f564] handle_fasteoi_irq+0xc4/0x310
+[c000000047fa3f40] [c000000008205c60] generic_handle_irq+0x50/0x80
+[c000000047fa3f60] [c000000008015c40] __do_irq+0x70/0x1a0
+[c000000047fa3f90] [c000000008016d7c] __do_IRQ+0x9c/0x130
+[c000000014622f60] [0000000020000000] 0x20000000
+[c000000014622ff0] [c000000008016e50] do_IRQ+0x40/0xa0
+[c000000014623020] [c000000008017044] replay_soft_interrupts+0x194/0x2f0
+[c000000014623210] [c0000000080172a8] arch_local_irq_restore+0x108/0x170
+[c000000014623240] [c000000008eb1008] _raw_spin_unlock_irqrestore+0x58/0xb0
+[c000000014623270] [c00000000820b12c] __setup_irq+0x49c/0x9f0
+[c000000014623310] [c00000000820b7c0] request_threaded_irq+0x140/0x230
+[c000000014623380] [c008000003212a50] ibmvfc_register_scsi_channel+0x1e8/0x2f0 [ibmvfc]
+[c000000014623450] [c008000003213d1c] ibmvfc_init_sub_crqs+0xc4/0x1f0 [ibmvfc]
+[c0000000146234d0] [c0080000032145a8] ibmvfc_reset_crq+0x150/0x210 [ibmvfc]
+[c000000014623550] [c0080000032147c8] ibmvfc_init_crq+0x160/0x280 [ibmvfc]
+[c0000000146235f0] [c00800000321a9cc] ibmvfc_probe+0x2a4/0x530 [ibmvfc]
+
+Link: https://lore.kernel.org/r/20220616191126.1281259-2-tyreld@linux.ibm.com
+Fixes: 3034ebe26389 ("scsi: ibmvfc: Add alloc/dealloc routines for SCSI Sub-CRQ Channels")
+Cc: stable@vger.kernel.org
+Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
+Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/ibmvscsi/ibmvfc.c |    3 ++-
+ drivers/scsi/ibmvscsi/ibmvfc.h |    2 +-
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/scsi/ibmvscsi/ibmvfc.c
++++ b/drivers/scsi/ibmvscsi/ibmvfc.c
+@@ -5682,6 +5682,8 @@ static int ibmvfc_alloc_queue(struct ibm
+       queue->cur = 0;
+       queue->fmt = fmt;
+       queue->size = PAGE_SIZE / fmt_size;
++
++      queue->vhost = vhost;
+       return 0;
+ }
+ 
+@@ -5790,7 +5792,6 @@ static int ibmvfc_register_scsi_channel(
+       }
+ 
+       scrq->hwq_id = index;
+-      scrq->vhost = vhost;
+ 
+       LEAVE;
+       return 0;
+--- a/drivers/scsi/ibmvscsi/ibmvfc.h
++++ b/drivers/scsi/ibmvscsi/ibmvfc.h
+@@ -789,6 +789,7 @@ struct ibmvfc_queue {
+       spinlock_t _lock;
+       spinlock_t *q_lock;
+ 
++      struct ibmvfc_host *vhost;
+       struct ibmvfc_event_pool evt_pool;
+       struct list_head sent;
+       struct list_head free;
+@@ -797,7 +798,6 @@ struct ibmvfc_queue {
+       union ibmvfc_iu cancel_rsp;
+ 
+       /* Sub-CRQ fields */
+-      struct ibmvfc_host *vhost;
+       unsigned long cookie;
+       unsigned long vios_cookie;
+       unsigned long hw_irq;
diff --git a/queue-5.18/series b/queue-5.18/series

index b37f59ddbd69920fe4418cd29ffec8971b0f2f9f..87a9531ba389aeb120c5dcbc62cba6ad8e3ee74c 100644 (file)
--- a/queue-5.18/series
+++ b/queue-5.18/series
@@ -9,3 +9,24 @@ alsa-hda-realtek-alc897-headset-mic-no-sound.patch
  alsa-hda-realtek-apply-fixup-for-lenovo-yoga-duet-7-properly.patch
  alsa-hda-realtek-add-quirk-for-clevo-pd70pnt.patch
  alsa-hda-realtek-add-quirk-for-clevo-ns50pu.patch
+net-openvswitch-fix-parsing-of-nw_proto-for-ipv6-fragments.patch
+ipv4-ping-fix-bind-address-validity-check.patch
+9p-fix-refcounting-during-full-path-walks-for-fid-lookups.patch
+9p-fix-fid-refcount-leak-in-v9fs_vfs_atomic_open_dotl.patch
+9p-fix-fid-refcount-leak-in-v9fs_vfs_get_link.patch
+9p-fix-ebadf-errors-in-cached-mode.patch
+btrfs-fix-hang-during-unmount-when-block-group-reclaim-task-is-running.patch
+btrfs-prevent-remounting-to-v1-space-cache-for-subpage-mount.patch
+btrfs-add-error-messages-to-all-unrecognized-mount-options.patch
+scsi-ibmvfc-store-vhost-pointer-during-subcrq-allocation.patch
+scsi-ibmvfc-allocate-free-queue-resource-only-during-probe-remove.patch
+mmc-sdhci-pci-o2micro-fix-card-detect-by-dealing-with-debouncing.patch
+mmc-mediatek-wait-dma-stop-bit-reset-to-0.patch
+xen-gntdev-avoid-blocking-in-unmap_grant_pages.patch
+maintainers-add-new-iommu-development-mailing-list.patch
+mtd-rawnand-gpmi-fix-setting-busy-timeout-setting.patch
+ata-libata-add-qc-flags-in-ata_qc_complete_template-tracepoint.patch
+dm-era-commit-metadata-in-postsuspend-after-worker-stops.patch
+dm-do-not-return-early-from-dm_io_complete-if-blk_sts_again-without-polling.patch
+dm-mirror-log-clear-log-bits-up-to-bits_per_long-boundary.patch
+tracing-kprobes-check-whether-get_kretprobe-returns-null-in-kretprobe_dispatcher.patch
diff --git a/queue-5.18/tracing-kprobes-check-whether-get_kretprobe-returns-null-in-kretprobe_dispatcher.patch b/queue-5.18/tracing-kprobes-check-whether-get_kretprobe-returns-null-in-kretprobe_dispatcher.patch

new file mode 100644 (file)

index 0000000..7e09a74
--- /dev/null
+++ b/queue-5.18/tracing-kprobes-check-whether-get_kretprobe-returns-null-in-kretprobe_dispatcher.patch
@@ -0,0 +1,61 @@
+From cc72b72073ac982a954d3b43519ca1c28f03c27c Mon Sep 17 00:00:00 2001
+From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Date: Sat, 28 May 2022 00:55:39 +0900
+Subject: tracing/kprobes: Check whether get_kretprobe() returns NULL in kretprobe_dispatcher()
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+commit cc72b72073ac982a954d3b43519ca1c28f03c27c upstream.
+
+There is a small chance that get_kretprobe(ri) returns NULL in
+kretprobe_dispatcher() when another CPU unregisters the kretprobe
+right after __kretprobe_trampoline_handler().
+
+To avoid this issue, kretprobe_dispatcher() checks the get_kretprobe()
+return value again. And if it is NULL, it returns soon because that
+kretprobe is under unregistering process.
+
+This issue has been introduced when the kretprobe is decoupled
+from the struct kretprobe_instance by commit d741bf41d7c7
+("kprobes: Remove kretprobe hash"). Before that commit, the
+struct kretprob_instance::rp directly points the kretprobe
+and it is never be NULL.
+
+Link: https://lkml.kernel.org/r/165366693881.797669.16926184644089588731.stgit@devnote2
+
+Reported-by: Yonghong Song <yhs@fb.com>
+Fixes: d741bf41d7c7 ("kprobes: Remove kretprobe hash")
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: bpf <bpf@vger.kernel.org>
+Cc: Kernel Team <kernel-team@fb.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Acked-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/trace_kprobe.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/trace_kprobe.c
++++ b/kernel/trace/trace_kprobe.c
+@@ -1718,8 +1718,17 @@ static int
+ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
+ {
+       struct kretprobe *rp = get_kretprobe(ri);
+-      struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp);
++      struct trace_kprobe *tk;
+ 
++      /*
++       * There is a small chance that get_kretprobe(ri) returns NULL when
++       * the kretprobe is unregister on another CPU between kretprobe's
++       * trampoline_handler and this function.
++       */
++      if (unlikely(!rp))
++              return 0;
++
++      tk = container_of(rp, struct trace_kprobe, rp);
+       raw_cpu_inc(*tk->nhit);
+ 
+       if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
diff --git a/queue-5.18/xen-gntdev-avoid-blocking-in-unmap_grant_pages.patch b/queue-5.18/xen-gntdev-avoid-blocking-in-unmap_grant_pages.patch

new file mode 100644 (file)

index 0000000..ddcab30
--- /dev/null
+++ b/queue-5.18/xen-gntdev-avoid-blocking-in-unmap_grant_pages.patch
@@ -0,0 +1,362 @@
+From dbe97cff7dd9f0f75c524afdd55ad46be3d15295 Mon Sep 17 00:00:00 2001
+From: Demi Marie Obenour <demi@invisiblethingslab.com>
+Date: Tue, 21 Jun 2022 22:27:26 -0400
+Subject: xen/gntdev: Avoid blocking in unmap_grant_pages()
+
+From: Demi Marie Obenour <demi@invisiblethingslab.com>
+
+commit dbe97cff7dd9f0f75c524afdd55ad46be3d15295 upstream.
+
+unmap_grant_pages() currently waits for the pages to no longer be used.
+In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
+deadlock against i915: i915 was waiting for gntdev's MMU notifier to
+finish, while gntdev was waiting for i915 to free its pages.  I also
+believe this is responsible for various deadlocks I have experienced in
+the past.
+
+Avoid these problems by making unmap_grant_pages async.  This requires
+making it return void, as any errors will not be available when the
+function returns.  Fortunately, the only use of the return value is a
+WARN_ON(), which can be replaced by a WARN_ON when the error is
+detected.  Additionally, a failed call will not prevent further calls
+from being made, but this is harmless.
+
+Because unmap_grant_pages is now async, the grant handle will be sent to
+INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
+handle.  Instead, a separate bool array is allocated for this purpose.
+This wastes memory, but stuffing this information in padding bytes is
+too fragile.  Furthermore, it is necessary to grab a reference to the
+map before making the asynchronous call, and release the reference when
+the call returns.
+
+It is also necessary to guard against reentrancy in gntdev_map_put(),
+and to handle the case where userspace tries to map a mapping whose
+contents have not all been freed yet.
+
+Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
+Cc: stable@vger.kernel.org
+Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/gntdev-common.h |    7 +
+ drivers/xen/gntdev.c        |  157 +++++++++++++++++++++++++++++---------------
+ 2 files changed, 113 insertions(+), 51 deletions(-)
+
+--- a/drivers/xen/gntdev-common.h
++++ b/drivers/xen/gntdev-common.h
+@@ -16,6 +16,7 @@
+ #include <linux/mmu_notifier.h>
+ #include <linux/types.h>
+ #include <xen/interface/event_channel.h>
++#include <xen/grant_table.h>
+ 
+ struct gntdev_dmabuf_priv;
+ 
+@@ -56,6 +57,7 @@ struct gntdev_grant_map {
+       struct gnttab_unmap_grant_ref *unmap_ops;
+       struct gnttab_map_grant_ref   *kmap_ops;
+       struct gnttab_unmap_grant_ref *kunmap_ops;
++      bool *being_removed;
+       struct page **pages;
+       unsigned long pages_vm_start;
+ 
+@@ -73,6 +75,11 @@ struct gntdev_grant_map {
+       /* Needed to avoid allocation in gnttab_dma_free_pages(). */
+       xen_pfn_t *frames;
+ #endif
++
++      /* Number of live grants */
++      atomic_t live_grants;
++      /* Needed to avoid allocation in __unmap_grant_pages */
++      struct gntab_unmap_queue_data unmap_data;
+ };
+ 
+ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
+--- a/drivers/xen/gntdev.c
++++ b/drivers/xen/gntdev.c
+@@ -35,6 +35,7 @@
+ #include <linux/slab.h>
+ #include <linux/highmem.h>
+ #include <linux/refcount.h>
++#include <linux/workqueue.h>
+ 
+ #include <xen/xen.h>
+ #include <xen/grant_table.h>
+@@ -60,10 +61,11 @@ module_param(limit, uint, 0644);
+ MODULE_PARM_DESC(limit,
+       "Maximum number of grants that may be mapped by one mapping request");
+ 
++/* True in PV mode, false otherwise */
+ static int use_ptemod;
+ 
+-static int unmap_grant_pages(struct gntdev_grant_map *map,
+-                           int offset, int pages);
++static void unmap_grant_pages(struct gntdev_grant_map *map,
++                            int offset, int pages);
+ 
+ static struct miscdevice gntdev_miscdev;
+ 
+@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntde
+       kvfree(map->unmap_ops);
+       kvfree(map->kmap_ops);
+       kvfree(map->kunmap_ops);
++      kvfree(map->being_removed);
+       kfree(map);
+ }
+ 
+@@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_ma
+       add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
+                                       GFP_KERNEL);
+       add->pages     = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
++      add->being_removed =
++              kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
+       if (NULL == add->grants    ||
+           NULL == add->map_ops   ||
+           NULL == add->unmap_ops ||
+-          NULL == add->pages)
++          NULL == add->pages     ||
++          NULL == add->being_removed)
+               goto err;
+       if (use_ptemod) {
+               add->kmap_ops   = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
+@@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *
+       if (!refcount_dec_and_test(&map->users))
+               return;
+ 
+-      if (map->pages && !use_ptemod)
++      if (map->pages && !use_ptemod) {
++              /*
++               * Increment the reference count.  This ensures that the
++               * subsequent call to unmap_grant_pages() will not wind up
++               * re-entering itself.  It *can* wind up calling
++               * gntdev_put_map() recursively, but such calls will be with a
++               * reference count greater than 1, so they will return before
++               * this code is reached.  The recursion depth is thus limited to
++               * 1.  Do NOT use refcount_inc() here, as it will detect that
++               * the reference count is zero and WARN().
++               */
++              refcount_set(&map->users, 1);
++
++              /*
++               * Unmap the grants.  This may or may not be asynchronous, so it
++               * is possible that the reference count is 1 on return, but it
++               * could also be greater than 1.
++               */
+               unmap_grant_pages(map, 0, map->count);
+ 
++              /* Check if the memory now needs to be freed */
++              if (!refcount_dec_and_test(&map->users))
++                      return;
++
++              /*
++               * All pages have been returned to the hypervisor, so free the
++               * map.
++               */
++      }
++
+       if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
+               notify_remote_via_evtchn(map->notify.event);
+               evtchn_put(map->notify.event);
+@@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, u
+ 
+ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
+ {
++      size_t alloced = 0;
+       int i, err = 0;
+ 
+       if (!use_ptemod) {
+@@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev
+                       map->count);
+ 
+       for (i = 0; i < map->count; i++) {
+-              if (map->map_ops[i].status == GNTST_okay)
++              if (map->map_ops[i].status == GNTST_okay) {
+                       map->unmap_ops[i].handle = map->map_ops[i].handle;
+-              else if (!err)
++                      if (!use_ptemod)
++                              alloced++;
++              } else if (!err)
+                       err = -EINVAL;
+ 
+               if (map->flags & GNTMAP_device_map)
+                       map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
+ 
+               if (use_ptemod) {
+-                      if (map->kmap_ops[i].status == GNTST_okay)
++                      if (map->kmap_ops[i].status == GNTST_okay) {
++                              if (map->map_ops[i].status == GNTST_okay)
++                                      alloced++;
+                               map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+-                      else if (!err)
++                      } else if (!err)
+                               err = -EINVAL;
+               }
+       }
++      atomic_add(alloced, &map->live_grants);
+       return err;
+ }
+ 
+-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+-                             int pages)
++static void __unmap_grant_pages_done(int result,
++              struct gntab_unmap_queue_data *data)
+ {
+-      int i, err = 0;
+-      struct gntab_unmap_queue_data unmap_data;
+-
+-      if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+-              int pgno = (map->notify.addr >> PAGE_SHIFT);
+-              if (pgno >= offset && pgno < offset + pages) {
+-                      /* No need for kmap, pages are in lowmem */
+-                      uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+-                      tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+-                      map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+-              }
+-      }
+-
+-      unmap_data.unmap_ops = map->unmap_ops + offset;
+-      unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+-      unmap_data.pages = map->pages + offset;
+-      unmap_data.count = pages;
+-
+-      err = gnttab_unmap_refs_sync(&unmap_data);
+-      if (err)
+-              return err;
++      unsigned int i;
++      struct gntdev_grant_map *map = data->data;
++      unsigned int offset = data->unmap_ops - map->unmap_ops;
+ 
+-      for (i = 0; i < pages; i++) {
+-              if (map->unmap_ops[offset+i].status)
+-                      err = -EINVAL;
++      for (i = 0; i < data->count; i++) {
++              WARN_ON(map->unmap_ops[offset+i].status);
+               pr_debug("unmap handle=%d st=%d\n",
+                       map->unmap_ops[offset+i].handle,
+                       map->unmap_ops[offset+i].status);
+               map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+               if (use_ptemod) {
+-                      if (map->kunmap_ops[offset+i].status)
+-                              err = -EINVAL;
++                      WARN_ON(map->kunmap_ops[offset+i].status);
+                       pr_debug("kunmap handle=%u st=%d\n",
+                                map->kunmap_ops[offset+i].handle,
+                                map->kunmap_ops[offset+i].status);
+                       map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
+               }
+       }
+-      return err;
++      /*
++       * Decrease the live-grant counter.  This must happen after the loop to
++       * prevent premature reuse of the grants by gnttab_mmap().
++       */
++      atomic_sub(data->count, &map->live_grants);
++
++      /* Release reference taken by __unmap_grant_pages */
++      gntdev_put_map(NULL, map);
+ }
+ 
+-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+-                           int pages)
++static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
++                             int pages)
+ {
+-      int range, err = 0;
++      if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
++              int pgno = (map->notify.addr >> PAGE_SHIFT);
++
++              if (pgno >= offset && pgno < offset + pages) {
++                      /* No need for kmap, pages are in lowmem */
++                      uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
++
++                      tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
++                      map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
++              }
++      }
++
++      map->unmap_data.unmap_ops = map->unmap_ops + offset;
++      map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
++      map->unmap_data.pages = map->pages + offset;
++      map->unmap_data.count = pages;
++      map->unmap_data.done = __unmap_grant_pages_done;
++      map->unmap_data.data = map;
++      refcount_inc(&map->users); /* to keep map alive during async call below */
++
++      gnttab_unmap_refs_async(&map->unmap_data);
++}
++
++static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
++                            int pages)
++{
++      int range;
++
++      if (atomic_read(&map->live_grants) == 0)
++              return; /* Nothing to do */
+ 
+       pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
+ 
+       /* It is possible the requested range will have a "hole" where we
+        * already unmapped some of the grants. Only unmap valid ranges.
+        */
+-      while (pages && !err) {
+-              while (pages &&
+-                     map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
++      while (pages) {
++              while (pages && map->being_removed[offset]) {
+                       offset++;
+                       pages--;
+               }
+               range = 0;
+               while (range < pages) {
+-                      if (map->unmap_ops[offset + range].handle ==
+-                          INVALID_GRANT_HANDLE)
++                      if (map->being_removed[offset + range])
+                               break;
++                      map->being_removed[offset + range] = true;
+                       range++;
+               }
+-              err = __unmap_grant_pages(map, offset, range);
++              if (range)
++                      __unmap_grant_pages(map, offset, range);
+               offset += range;
+               pages -= range;
+       }
+-
+-      return err;
+ }
+ 
+ /* ------------------------------------------------------------------ */
+@@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu
+       struct gntdev_grant_map *map =
+               container_of(mn, struct gntdev_grant_map, notifier);
+       unsigned long mstart, mend;
+-      int err;
+ 
+       if (!mmu_notifier_range_blockable(range))
+               return false;
+@@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu
+                       map->index, map->count,
+                       map->vma->vm_start, map->vma->vm_end,
+                       range->start, range->end, mstart, mend);
+-      err = unmap_grant_pages(map,
++      unmap_grant_pages(map,
+                               (mstart - map->vma->vm_start) >> PAGE_SHIFT,
+                               (mend - mstart) >> PAGE_SHIFT);
+-      WARN_ON(err);
+ 
+       return true;
+ }
+@@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip
+               goto unlock_out;
+       if (use_ptemod && map->vma)
+               goto unlock_out;
++      if (atomic_read(&map->live_grants)) {
++              err = -EAGAIN;
++              goto unlock_out;
++      }
+       refcount_inc(&map->users);
+ 
+       vma->vm_ops = &gntdev_vmops;
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 25 Jun 2022 14:35:21 +0000 (16:35 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 25 Jun 2022 14:35:21 +0000 (16:35 +0200)
queue-5.18/9p-fix-ebadf-errors-in-cached-mode.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_atomic_open_dotl.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/9p-fix-fid-refcount-leak-in-v9fs_vfs_get_link.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/9p-fix-refcounting-during-full-path-walks-for-fid-lookups.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/ata-libata-add-qc-flags-in-ata_qc_complete_template-tracepoint.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/btrfs-add-error-messages-to-all-unrecognized-mount-options.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/btrfs-fix-hang-during-unmount-when-block-group-reclaim-task-is-running.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/btrfs-prevent-remounting-to-v1-space-cache-for-subpage-mount.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/dm-do-not-return-early-from-dm_io_complete-if-blk_sts_again-without-polling.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/dm-era-commit-metadata-in-postsuspend-after-worker-stops.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/dm-mirror-log-clear-log-bits-up-to-bits_per_long-boundary.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/ipv4-ping-fix-bind-address-validity-check.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/maintainers-add-new-iommu-development-mailing-list.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/mmc-mediatek-wait-dma-stop-bit-reset-to-0.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/mmc-sdhci-pci-o2micro-fix-card-detect-by-dealing-with-debouncing.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/mtd-rawnand-gpmi-fix-setting-busy-timeout-setting.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/net-openvswitch-fix-parsing-of-nw_proto-for-ipv6-fragments.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/scsi-ibmvfc-allocate-free-queue-resource-only-during-probe-remove.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/scsi-ibmvfc-store-vhost-pointer-during-subcrq-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/series		patch \| blob \| blame \| history
queue-5.18/tracing-kprobes-check-whether-get_kretprobe-returns-null-in-kretprobe_dispatcher.patch	[new file with mode: 0644]	patch \| blob
queue-5.18/xen-gntdev-avoid-blocking-in-unmap_grant_pages.patch	[new file with mode: 0644]	patch \| blob