]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 23 Apr 2023 13:17:08 +0000 (15:17 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 23 Apr 2023 13:17:08 +0000 (15:17 +0200)
added patches:
fuse-always-revalidate-rename-target-dentry.patch
fuse-check-s_root-when-destroying-sb.patch
fuse-fix-attr-version-comparison-in-fuse_read_update_size.patch
fuse-fix-deadlock-between-atomic-o_trunc-and-page-invalidation.patch
virtiofs-split-requests-that-exceed-virtqueue-size.patch

queue-5.10/fuse-always-revalidate-rename-target-dentry.patch [new file with mode: 0644]
queue-5.10/fuse-check-s_root-when-destroying-sb.patch [new file with mode: 0644]
queue-5.10/fuse-fix-attr-version-comparison-in-fuse_read_update_size.patch [new file with mode: 0644]
queue-5.10/fuse-fix-deadlock-between-atomic-o_trunc-and-page-invalidation.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/virtiofs-split-requests-that-exceed-virtqueue-size.patch [new file with mode: 0644]

diff --git a/queue-5.10/fuse-always-revalidate-rename-target-dentry.patch b/queue-5.10/fuse-always-revalidate-rename-target-dentry.patch
new file mode 100644 (file)
index 0000000..ffc1743
--- /dev/null
@@ -0,0 +1,36 @@
+From ccc031e26afe60d2a5a3d93dabd9c978210825fb Mon Sep 17 00:00:00 2001
+From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Date: Wed, 28 Sep 2022 20:19:34 +0800
+Subject: fuse: always revalidate rename target dentry
+
+From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+
+commit ccc031e26afe60d2a5a3d93dabd9c978210825fb upstream.
+
+The previous commit df8629af2934 ("fuse: always revalidate if exclusive
+create") ensures that the dentries are revalidated on O_EXCL creates.  This
+commit complements it by also performing revalidation for rename target
+dentries.  Otherwise, a rename target file that only exists in kernel
+dentry cache but not in the filesystem will result in EEXIST if
+RENAME_NOREPLACE flag is used.
+
+Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Yang Bo <yb203166@antfin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/dir.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/dir.c
++++ b/fs/fuse/dir.c
+@@ -205,7 +205,7 @@ static int fuse_dentry_revalidate(struct
+       if (inode && fuse_is_bad(inode))
+               goto invalid;
+       else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
+-               (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) {
++               (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) {
+               struct fuse_entry_out outarg;
+               FUSE_ARGS(args);
+               struct fuse_forget_link *forget;
diff --git a/queue-5.10/fuse-check-s_root-when-destroying-sb.patch b/queue-5.10/fuse-check-s_root-when-destroying-sb.patch
new file mode 100644 (file)
index 0000000..f0d673f
--- /dev/null
@@ -0,0 +1,45 @@
+From d534d31d6a45d71de61db22090b4820afb68fddc Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Thu, 21 Oct 2021 10:01:38 +0200
+Subject: fuse: check s_root when destroying sb
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit d534d31d6a45d71de61db22090b4820afb68fddc upstream.
+
+Checking "fm" works because currently sb->s_fs_info is cleared on error
+paths; however, sb->s_root is what generic_shutdown_super() checks to
+determine whether the sb was fully initialized or not.
+
+This change will allow cleanup of sb setup error paths.
+
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Yang Bo <yb203166@antfin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/inode.c     |    2 +-
+ fs/fuse/virtio_fs.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -1596,7 +1596,7 @@ static void fuse_kill_sb_blk(struct supe
+       struct fuse_mount *fm = get_fuse_mount_super(sb);
+       bool last;
+-      if (fm) {
++      if (sb->s_root) {
+               last = fuse_mount_remove(fm);
+               if (last)
+                       fuse_conn_destroy(fm);
+--- a/fs/fuse/virtio_fs.c
++++ b/fs/fuse/virtio_fs.c
+@@ -1399,7 +1399,7 @@ static void virtio_kill_sb(struct super_
+       bool last;
+       /* If mount failed, we can still be called without any fc */
+-      if (fm) {
++      if (sb->s_root) {
+               last = fuse_mount_remove(fm);
+               if (last)
+                       virtio_fs_conn_destroy(fm);
diff --git a/queue-5.10/fuse-fix-attr-version-comparison-in-fuse_read_update_size.patch b/queue-5.10/fuse-fix-attr-version-comparison-in-fuse_read_update_size.patch
new file mode 100644 (file)
index 0000000..13aea10
--- /dev/null
@@ -0,0 +1,33 @@
+From 484ce65715b06aead8c4901f01ca32c5a240bc71 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Fri, 22 Oct 2021 17:03:03 +0200
+Subject: fuse: fix attr version comparison in fuse_read_update_size()
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 484ce65715b06aead8c4901f01ca32c5a240bc71 upstream.
+
+A READ request returning a short count is taken as indication of EOF, and
+the cached file size is modified accordingly.
+
+Fix the attribute version checking to allow for changes to fc->attr_version
+on other inodes.
+
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Yang Bo <yb203166@antfin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -782,7 +782,7 @@ static void fuse_read_update_size(struct
+       struct fuse_inode *fi = get_fuse_inode(inode);
+       spin_lock(&fi->lock);
+-      if (attr_ver == fi->attr_version && size < inode->i_size &&
++      if (attr_ver >= fi->attr_version && size < inode->i_size &&
+           !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
+               fi->attr_version = atomic64_inc_return(&fc->attr_version);
+               i_size_write(inode, size);
diff --git a/queue-5.10/fuse-fix-deadlock-between-atomic-o_trunc-and-page-invalidation.patch b/queue-5.10/fuse-fix-deadlock-between-atomic-o_trunc-and-page-invalidation.patch
new file mode 100644 (file)
index 0000000..a92c396
--- /dev/null
@@ -0,0 +1,162 @@
+From 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Fri, 22 Apr 2022 15:48:53 +0200
+Subject: fuse: fix deadlock between atomic O_TRUNC and page invalidation
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b upstream.
+
+fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
+O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
+atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
+in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
+deadlock related to the case above, which will cause the xfstests
+generic/464 testcase hung in our virtio-fs test environment.
+
+For example, consider two processes concurrently open one same file, one
+with O_TRUNC and another without O_TRUNC. The deadlock case is described
+below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
+to lock a page (acquiring B), open() could have held the page lock
+(acquired B), and waiting on the page writeback (acquiring A). This would
+lead to deadlocks.
+
+open(O_TRUNC)
+----------------------------------------------------------------
+fuse_open_common
+  inode_lock            [C acquire]
+  fuse_set_nowrite      [A acquire]
+
+  fuse_finish_open
+    truncate_pagecache
+      lock_page         [B acquire]
+      truncate_inode_page
+      unlock_page       [B release]
+
+  fuse_release_nowrite  [A release]
+  inode_unlock          [C release]
+----------------------------------------------------------------
+
+open()
+----------------------------------------------------------------
+fuse_open_common
+  fuse_finish_open
+    invalidate_inode_pages2
+      lock_page         [B acquire]
+        fuse_launder_page
+          fuse_wait_on_page_writeback [A acquire & release]
+      unlock_page       [B release]
+----------------------------------------------------------------
+
+Besides this case, all calls of invalidate_inode_pages2() and
+invalidate_inode_pages2_range() in fuse code also can deadlock with
+open(O_TRUNC).
+
+Fix by moving the truncate_pagecache() call outside the nowrite protected
+region.  The nowrite protection is only for delayed writeback
+(writeback_cache) case, where inode lock does not protect against
+truncation racing with writes on the server.  Write syscalls racing with
+page cache truncation still get the inode lock protection.
+
+This patch also changes the order of filemap_invalidate_lock()
+vs. fuse_set_nowrite() in fuse_open_common().  This new order matches the
+order found in fuse_file_fallocate() and fuse_do_setattr().
+
+Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Tested-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Yang Bo <yb203166@antfin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/dir.c  |    5 +++++
+ fs/fuse/file.c |   29 +++++++++++++++++------------
+ 2 files changed, 22 insertions(+), 12 deletions(-)
+
+--- a/fs/fuse/dir.c
++++ b/fs/fuse/dir.c
+@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode
+       struct fuse_entry_out outentry;
+       struct fuse_inode *fi;
+       struct fuse_file *ff;
++      bool trunc = flags & O_TRUNC;
+       /* Userspace expects S_IFREG in create mode */
+       BUG_ON((mode & S_IFMT) != S_IFREG);
+@@ -604,6 +605,10 @@ static int fuse_create_open(struct inode
+       } else {
+               file->private_data = ff;
+               fuse_finish_open(inode, file);
++              if (fm->fc->atomic_o_trunc && trunc)
++                      truncate_pagecache(inode, 0);
++              else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
++                      invalidate_inode_pages2(inode->i_mapping);
+       }
+       return err;
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -206,14 +206,10 @@ void fuse_finish_open(struct inode *inod
+               fi->attr_version = atomic64_inc_return(&fc->attr_version);
+               i_size_write(inode, 0);
+               spin_unlock(&fi->lock);
+-              truncate_pagecache(inode, 0);
+               fuse_invalidate_attr(inode);
+               if (fc->writeback_cache)
+                       file_update_time(file);
+-      } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
+-              invalidate_inode_pages2(inode->i_mapping);
+       }
+-
+       if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+               fuse_link_write_file(file);
+ }
+@@ -236,30 +232,39 @@ int fuse_open_common(struct inode *inode
+       if (err)
+               return err;
+-      if (is_wb_truncate || dax_truncate) {
++      if (is_wb_truncate || dax_truncate)
+               inode_lock(inode);
+-              fuse_set_nowrite(inode);
+-      }
+       if (dax_truncate) {
+               down_write(&get_fuse_inode(inode)->i_mmap_sem);
+               err = fuse_dax_break_layouts(inode, 0, 0);
+               if (err)
+-                      goto out;
++                      goto out_inode_unlock;
+       }
++      if (is_wb_truncate || dax_truncate)
++              fuse_set_nowrite(inode);
++
+       err = fuse_do_open(fm, get_node_id(inode), file, isdir);
+       if (!err)
+               fuse_finish_open(inode, file);
+-out:
++      if (is_wb_truncate || dax_truncate)
++              fuse_release_nowrite(inode);
++      if (!err) {
++              struct fuse_file *ff = file->private_data;
++
++              if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
++                      truncate_pagecache(inode, 0);
++              else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
++                      invalidate_inode_pages2(inode->i_mapping);
++      }
+       if (dax_truncate)
+               up_write(&get_fuse_inode(inode)->i_mmap_sem);
+-      if (is_wb_truncate | dax_truncate) {
+-              fuse_release_nowrite(inode);
++out_inode_unlock:
++      if (is_wb_truncate || dax_truncate)
+               inode_unlock(inode);
+-      }
+       return err;
+ }
index f4aad3edb46b98eb29cf938f2c884a4293a428ba..3fd060da5550642c91760626eff56e2fc0ae7c5d 100644 (file)
@@ -47,3 +47,8 @@ mips-define-runtime_discard_exit-in-ld-script.patch
 docs-futex-fix-kernel-doc-references-after-code-split-up-preparation.patch
 purgatory-fix-disabling-debug-info.patch
 virtiofs-clean-up-error-handling-in-virtio_fs_get_tree.patch
+virtiofs-split-requests-that-exceed-virtqueue-size.patch
+fuse-check-s_root-when-destroying-sb.patch
+fuse-fix-attr-version-comparison-in-fuse_read_update_size.patch
+fuse-always-revalidate-rename-target-dentry.patch
+fuse-fix-deadlock-between-atomic-o_trunc-and-page-invalidation.patch
diff --git a/queue-5.10/virtiofs-split-requests-that-exceed-virtqueue-size.patch b/queue-5.10/virtiofs-split-requests-that-exceed-virtqueue-size.patch
new file mode 100644 (file)
index 0000000..0516ee0
--- /dev/null
@@ -0,0 +1,122 @@
+From a7f0d7aab0b4f3f0780b1f77356e2fe7202ac0cb Mon Sep 17 00:00:00 2001
+From: Connor Kuehl <ckuehl@redhat.com>
+Date: Thu, 18 Mar 2021 08:52:22 -0500
+Subject: virtiofs: split requests that exceed virtqueue size
+
+From: Connor Kuehl <ckuehl@redhat.com>
+
+commit a7f0d7aab0b4f3f0780b1f77356e2fe7202ac0cb upstream.
+
+If an incoming FUSE request can't fit on the virtqueue, the request is
+placed onto a workqueue so a worker can try to resubmit it later where
+there will (hopefully) be space for it next time.
+
+This is fine for requests that aren't larger than a virtqueue's maximum
+capacity.  However, if a request's size exceeds the maximum capacity of the
+virtqueue (even if the virtqueue is empty), it will be doomed to a life of
+being placed on the workqueue, removed, discovered it won't fit, and placed
+on the workqueue yet again.
+
+Furthermore, from section 2.6.5.3.1 (Driver Requirements: Indirect
+Descriptors) of the virtio spec:
+
+  "A driver MUST NOT create a descriptor chain longer than the Queue
+  Size of the device."
+
+To fix this, limit the number of pages FUSE will use for an overall
+request.  This way, each request can realistically fit on the virtqueue
+when it is decomposed into a scattergather list and avoid violating section
+2.6.5.3.1 of the virtio spec.
+
+Signed-off-by: Connor Kuehl <ckuehl@redhat.com>
+Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Yang Bo <yb203166@antfin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/fuse_i.h    |    3 +++
+ fs/fuse/inode.c     |    3 ++-
+ fs/fuse/virtio_fs.c |   19 +++++++++++++++++--
+ 3 files changed, 22 insertions(+), 3 deletions(-)
+
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -556,6 +556,9 @@ struct fuse_conn {
+       /** Maxmum number of pages that can be used in a single request */
+       unsigned int max_pages;
++      /** Constrain ->max_pages to this value during feature negotiation */
++      unsigned int max_pages_limit;
++
+       /** Input queue */
+       struct fuse_iqueue iq;
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -710,6 +710,7 @@ void fuse_conn_init(struct fuse_conn *fc
+       fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
+       fc->user_ns = get_user_ns(user_ns);
+       fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
++      fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
+       INIT_LIST_HEAD(&fc->mounts);
+       list_add(&fm->fc_entry, &fc->mounts);
+@@ -1056,7 +1057,7 @@ static void process_init_reply(struct fu
+                               fc->abort_err = 1;
+                       if (arg->flags & FUSE_MAX_PAGES) {
+                               fc->max_pages =
+-                                      min_t(unsigned int, FUSE_MAX_MAX_PAGES,
++                                      min_t(unsigned int, fc->max_pages_limit,
+                                       max_t(unsigned int, arg->max_pages, 1));
+                       }
+                       if (IS_ENABLED(CONFIG_FUSE_DAX) &&
+--- a/fs/fuse/virtio_fs.c
++++ b/fs/fuse/virtio_fs.c
+@@ -18,6 +18,12 @@
+ #include <linux/uio.h>
+ #include "fuse_i.h"
++/* Used to help calculate the FUSE connection's max_pages limit for a request's
++ * size. Parts of the struct fuse_req are sliced into scattergather lists in
++ * addition to the pages used, so this can help account for that overhead.
++ */
++#define FUSE_HEADER_OVERHEAD    4
++
+ /* List of virtio-fs device instances and a lock for the list. Also provides
+  * mutual exclusion in device removal and mounting path
+  */
+@@ -1426,9 +1432,10 @@ static int virtio_fs_get_tree(struct fs_
+ {
+       struct virtio_fs *fs;
+       struct super_block *sb;
+-      struct fuse_conn *fc;
++      struct fuse_conn *fc = NULL;
+       struct fuse_mount *fm;
+-      int err;
++      unsigned int virtqueue_size;
++      int err = -EIO;
+       /* This gets a reference on virtio_fs object. This ptr gets installed
+        * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
+@@ -1440,6 +1447,10 @@ static int virtio_fs_get_tree(struct fs_
+               return -EINVAL;
+       }
++      virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
++      if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
++              goto out_err;
++
+       err = -ENOMEM;
+       fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
+       if (!fc)
+@@ -1454,6 +1465,10 @@ static int virtio_fs_get_tree(struct fs_
+       fc->delete_stale = true;
+       fc->auto_submounts = true;
++      /* Tell FUSE to split requests that exceed the virtqueue's size */
++      fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
++                                  virtqueue_size - FUSE_HEADER_OVERHEAD);
++
+       fsc->s_fs_info = fm;
+       sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
+       fuse_mount_put(fm);