]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Mon, 18 Mar 2024 00:20:44 +0000 (20:20 -0400)
committerSasha Levin <sashal@kernel.org>
Mon, 18 Mar 2024 00:20:44 +0000 (20:20 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
19 files changed:
queue-6.1/io_uring-drop-any-code-related-to-scm_rights.patch [new file with mode: 0644]
queue-6.1/io_uring-unix-drop-usage-of-io_uring-socket.patch [new file with mode: 0644]
queue-6.1/md-fix-data-corruption-for-raid456-when-reshape-rest.patch [new file with mode: 0644]
queue-6.1/md-raid10-prevent-soft-lockup-while-flush-writes.patch [new file with mode: 0644]
queue-6.1/nfsd-add-an-nfsd4_encode_nfstime4-helper.patch [new file with mode: 0644]
queue-6.1/nfsd-add-some-comments-to-nfsd_file_do_acquire.patch [new file with mode: 0644]
queue-6.1/nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch [new file with mode: 0644]
queue-6.1/nfsd-allow-reaping-files-still-under-writeback.patch [new file with mode: 0644]
queue-6.1/nfsd-convert-filecache-to-rhltable.patch [new file with mode: 0644]
queue-6.1/nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch [new file with mode: 0644]
queue-6.1/nfsd-don-t-open-code-clear_and_wake_up_bit.patch [new file with mode: 0644]
queue-6.1/nfsd-don-t-take-put-an-extra-reference-when-putting-.patch [new file with mode: 0644]
queue-6.1/nfsd-fix-creation-time-serialization-order.patch [new file with mode: 0644]
queue-6.1/nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch [new file with mode: 0644]
queue-6.1/nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch [new file with mode: 0644]
queue-6.1/nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch [new file with mode: 0644]
queue-6.1/nfsd-simplify-the-delayed-disposal-list-code.patch [new file with mode: 0644]
queue-6.1/nfsd-update-comment-over-__nfsd_file_cache_purge.patch [new file with mode: 0644]
queue-6.1/series [new file with mode: 0644]

diff --git a/queue-6.1/io_uring-drop-any-code-related-to-scm_rights.patch b/queue-6.1/io_uring-drop-any-code-related-to-scm_rights.patch
new file mode 100644 (file)
index 0000000..74e12cd
--- /dev/null
@@ -0,0 +1,344 @@
+From 6e16ae9b86871d8b78fd8999d6b8c396ed42993a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Mar 2024 18:15:05 -0600
+Subject: io_uring: drop any code related to SCM_RIGHTS
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 6e5e6d274956305f1fc0340522b38f5f5be74bdb upstream.
+
+This is dead code after we dropped support for passing io_uring fds
+over SCM_RIGHTS, get rid of it.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/io_uring_types.h |   3 -
+ io_uring/filetable.c           |  10 +--
+ io_uring/io_uring.c            |  31 +------
+ io_uring/rsrc.c                | 151 +--------------------------------
+ io_uring/rsrc.h                |  15 ----
+ 5 files changed, 8 insertions(+), 202 deletions(-)
+
+diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
+index f5b687a787a34..37aeea266ebb3 100644
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -330,9 +330,6 @@ struct io_ring_ctx {
+       struct list_head                io_buffers_pages;
+-      #if defined(CONFIG_UNIX)
+-              struct socket           *ring_sock;
+-      #endif
+       /* hashed buffered write serialization */
+       struct io_wq_hash               *hash_map;
+diff --git a/io_uring/filetable.c b/io_uring/filetable.c
+index b80614e7d6051..4660cb89ea9f5 100644
+--- a/io_uring/filetable.c
++++ b/io_uring/filetable.c
+@@ -95,12 +95,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
+               needs_switch = true;
+       }
+-      ret = io_scm_file_account(ctx, file);
+-      if (!ret) {
+-              *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+-              io_fixed_file_set(file_slot, file);
+-              io_file_bitmap_set(&ctx->file_table, slot_index);
+-      }
++      *io_get_tag_slot(ctx->file_data, slot_index) = 0;
++      io_fixed_file_set(file_slot, file);
++      io_file_bitmap_set(&ctx->file_table, slot_index);
++      return 0;
+ err:
+       if (needs_switch)
+               io_rsrc_node_switch(ctx, ctx->file_data);
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index cf7dd62da0e37..415248c1f82c6 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -60,7 +60,6 @@
+ #include <linux/net.h>
+ #include <net/sock.h>
+ #include <net/af_unix.h>
+-#include <net/scm.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/sched/mm.h>
+ #include <linux/uaccess.h>
+@@ -2628,12 +2627,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
+       WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
+       WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
+-#if defined(CONFIG_UNIX)
+-      if (ctx->ring_sock) {
+-              ctx->ring_sock->file = NULL; /* so that iput() is called */
+-              sock_release(ctx->ring_sock);
+-      }
+-#endif
+       WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
+       if (ctx->mm_account) {
+@@ -3438,32 +3431,12 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
+ /*
+  * Allocate an anonymous fd, this is what constitutes the application
+  * visible backing of an io_uring instance. The application mmaps this
+- * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
+- * we have to tie this fd to a socket for file garbage collection purposes.
++ * fd to gain access to the SQ/CQ ring details.
+  */
+ static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
+ {
+-      struct file *file;
+-#if defined(CONFIG_UNIX)
+-      int ret;
+-
+-      ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
+-                              &ctx->ring_sock);
+-      if (ret)
+-              return ERR_PTR(ret);
+-#endif
+-
+-      file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx,
++      return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx,
+                                        O_RDWR | O_CLOEXEC, NULL);
+-#if defined(CONFIG_UNIX)
+-      if (IS_ERR(file)) {
+-              sock_release(ctx->ring_sock);
+-              ctx->ring_sock = NULL;
+-      } else {
+-              ctx->ring_sock->file = file;
+-      }
+-#endif
+-      return file;
+ }
+ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
+diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
+index 7ada0339b3870..ac658cfa89c63 100644
+--- a/io_uring/rsrc.c
++++ b/io_uring/rsrc.c
+@@ -494,11 +494,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
+                               err = -EBADF;
+                               break;
+                       }
+-                      err = io_scm_file_account(ctx, file);
+-                      if (err) {
+-                              fput(file);
+-                              break;
+-                      }
+                       *io_get_tag_slot(data, i) = tag;
+                       io_fixed_file_set(file_slot, file);
+                       io_file_bitmap_set(&ctx->file_table, i);
+@@ -762,22 +757,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
+       for (i = 0; i < ctx->nr_user_files; i++) {
+               struct file *file = io_file_from_index(&ctx->file_table, i);
+-              /* skip scm accounted files, they'll be freed by ->ring_sock */
+-              if (!file || io_file_need_scm(file))
++              if (!file)
+                       continue;
+               io_file_bitmap_clear(&ctx->file_table, i);
+               fput(file);
+       }
+-#if defined(CONFIG_UNIX)
+-      if (ctx->ring_sock) {
+-              struct sock *sock = ctx->ring_sock->sk;
+-              struct sk_buff *skb;
+-
+-              while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
+-                      kfree_skb(skb);
+-      }
+-#endif
+       io_free_file_tables(&ctx->file_table);
+       io_file_table_set_alloc_range(ctx, 0, 0);
+       io_rsrc_data_free(ctx->file_data);
+@@ -805,134 +790,11 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx)
+       return ret;
+ }
+-/*
+- * Ensure the UNIX gc is aware of our file set, so we are certain that
+- * the io_uring can be safely unregistered on process exit, even if we have
+- * loops in the file referencing. We account only files that can hold other
+- * files because otherwise they can't form a loop and so are not interesting
+- * for GC.
+- */
+-int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
+-{
+-#if defined(CONFIG_UNIX)
+-      struct sock *sk = ctx->ring_sock->sk;
+-      struct sk_buff_head *head = &sk->sk_receive_queue;
+-      struct scm_fp_list *fpl;
+-      struct sk_buff *skb;
+-
+-      if (likely(!io_file_need_scm(file)))
+-              return 0;
+-
+-      /*
+-       * See if we can merge this file into an existing skb SCM_RIGHTS
+-       * file set. If there's no room, fall back to allocating a new skb
+-       * and filling it in.
+-       */
+-      spin_lock_irq(&head->lock);
+-      skb = skb_peek(head);
+-      if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
+-              __skb_unlink(skb, head);
+-      else
+-              skb = NULL;
+-      spin_unlock_irq(&head->lock);
+-
+-      if (!skb) {
+-              fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
+-              if (!fpl)
+-                      return -ENOMEM;
+-
+-              skb = alloc_skb(0, GFP_KERNEL);
+-              if (!skb) {
+-                      kfree(fpl);
+-                      return -ENOMEM;
+-              }
+-
+-              fpl->user = get_uid(current_user());
+-              fpl->max = SCM_MAX_FD;
+-              fpl->count = 0;
+-
+-              UNIXCB(skb).fp = fpl;
+-              skb->sk = sk;
+-              skb->scm_io_uring = 1;
+-              skb->destructor = unix_destruct_scm;
+-              refcount_add(skb->truesize, &sk->sk_wmem_alloc);
+-      }
+-
+-      fpl = UNIXCB(skb).fp;
+-      fpl->fp[fpl->count++] = get_file(file);
+-      unix_inflight(fpl->user, file);
+-      skb_queue_head(head, skb);
+-      fput(file);
+-#endif
+-      return 0;
+-}
+-
+ static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
+ {
+       struct file *file = prsrc->file;
+-#if defined(CONFIG_UNIX)
+-      struct sock *sock = ctx->ring_sock->sk;
+-      struct sk_buff_head list, *head = &sock->sk_receive_queue;
+-      struct sk_buff *skb;
+-      int i;
+-
+-      if (!io_file_need_scm(file)) {
+-              fput(file);
+-              return;
+-      }
+-
+-      __skb_queue_head_init(&list);
+-
+-      /*
+-       * Find the skb that holds this file in its SCM_RIGHTS. When found,
+-       * remove this entry and rearrange the file array.
+-       */
+-      skb = skb_dequeue(head);
+-      while (skb) {
+-              struct scm_fp_list *fp;
+-              fp = UNIXCB(skb).fp;
+-              for (i = 0; i < fp->count; i++) {
+-                      int left;
+-
+-                      if (fp->fp[i] != file)
+-                              continue;
+-
+-                      unix_notinflight(fp->user, fp->fp[i]);
+-                      left = fp->count - 1 - i;
+-                      if (left) {
+-                              memmove(&fp->fp[i], &fp->fp[i + 1],
+-                                              left * sizeof(struct file *));
+-                      }
+-                      fp->count--;
+-                      if (!fp->count) {
+-                              kfree_skb(skb);
+-                              skb = NULL;
+-                      } else {
+-                              __skb_queue_tail(&list, skb);
+-                      }
+-                      fput(file);
+-                      file = NULL;
+-                      break;
+-              }
+-
+-              if (!file)
+-                      break;
+-
+-              __skb_queue_tail(&list, skb);
+-
+-              skb = skb_dequeue(head);
+-      }
+-
+-      if (skb_peek(&list)) {
+-              spin_lock_irq(&head->lock);
+-              while ((skb = __skb_dequeue(&list)) != NULL)
+-                      __skb_queue_tail(head, skb);
+-              spin_unlock_irq(&head->lock);
+-      }
+-#else
+       fput(file);
+-#endif
+ }
+ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+@@ -986,21 +848,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+                       goto fail;
+               /*
+-               * Don't allow io_uring instances to be registered. If UNIX
+-               * isn't enabled, then this causes a reference cycle and this
+-               * instance can never get freed. If UNIX is enabled we'll
+-               * handle it just fine, but there's still no point in allowing
+-               * a ring fd as it doesn't support regular read/write anyway.
++               * Don't allow io_uring instances to be registered.
+                */
+               if (io_is_uring_fops(file)) {
+                       fput(file);
+                       goto fail;
+               }
+-              ret = io_scm_file_account(ctx, file);
+-              if (ret) {
+-                      fput(file);
+-                      goto fail;
+-              }
+               file_slot = io_fixed_file_slot(&ctx->file_table, i);
+               io_fixed_file_set(file_slot, file);
+               io_file_bitmap_set(&ctx->file_table, i);
+diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
+index acaf8dad05401..85f145607c620 100644
+--- a/io_uring/rsrc.h
++++ b/io_uring/rsrc.h
+@@ -77,21 +77,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx);
+ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+                         unsigned nr_args, u64 __user *tags);
+-int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file);
+-
+-static inline bool io_file_need_scm(struct file *filp)
+-{
+-      return false;
+-}
+-
+-static inline int io_scm_file_account(struct io_ring_ctx *ctx,
+-                                    struct file *file)
+-{
+-      if (likely(!io_file_need_scm(file)))
+-              return 0;
+-      return __io_scm_file_account(ctx, file);
+-}
+-
+ int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
+                            unsigned nr_args);
+ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
+-- 
+2.43.0
+
diff --git a/queue-6.1/io_uring-unix-drop-usage-of-io_uring-socket.patch b/queue-6.1/io_uring-unix-drop-usage-of-io_uring-socket.patch
new file mode 100644 (file)
index 0000000..48477b2
--- /dev/null
@@ -0,0 +1,134 @@
+From dc638a609ff754e234ca3900338628b94617e4a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Mar 2024 18:10:12 -0600
+Subject: io_uring/unix: drop usage of io_uring socket
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit a4104821ad651d8a0b374f0b2474c345bbb42f82 upstream.
+
+Since we no longer allow sending io_uring fds over SCM_RIGHTS, move to
+using io_is_uring_fops() to detect whether this is a io_uring fd or not.
+With that done, kill off io_uring_get_socket() as nobody calls it
+anymore.
+
+This is in preparation to yanking out the rest of the core related to
+unix gc with io_uring.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/io_uring.h | 10 +++++-----
+ io_uring/io_uring.c      | 13 -------------
+ io_uring/io_uring.h      |  1 -
+ net/core/scm.c           |  2 +-
+ net/unix/scm.c           |  4 +---
+ 5 files changed, 7 insertions(+), 23 deletions(-)
+
+diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
+index a1484cdb3158e..a8f3058448eaa 100644
+--- a/include/linux/io_uring.h
++++ b/include/linux/io_uring.h
+@@ -42,11 +42,11 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
+                       unsigned issue_flags);
+ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *, unsigned));
+-struct sock *io_uring_get_socket(struct file *file);
+ void __io_uring_cancel(bool cancel_all);
+ void __io_uring_free(struct task_struct *tsk);
+ void io_uring_unreg_ringfd(void);
+ const char *io_uring_get_opcode(u8 opcode);
++bool io_is_uring_fops(struct file *file);
+ static inline void io_uring_files_cancel(void)
+ {
+@@ -71,6 +71,10 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
+ {
+       return -EOPNOTSUPP;
+ }
++static inline bool io_is_uring_fops(struct file *file)
++{
++      return false;
++}
+ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+               ssize_t ret2, unsigned issue_flags)
+ {
+@@ -79,10 +83,6 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+ {
+ }
+-static inline struct sock *io_uring_get_socket(struct file *file)
+-{
+-      return NULL;
+-}
+ static inline void io_uring_task_cancel(void)
+ {
+ }
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 35894955b4549..cf7dd62da0e37 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -153,19 +153,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
+ static struct kmem_cache *req_cachep;
+-struct sock *io_uring_get_socket(struct file *file)
+-{
+-#if defined(CONFIG_UNIX)
+-      if (io_is_uring_fops(file)) {
+-              struct io_ring_ctx *ctx = file->private_data;
+-
+-              return ctx->ring_sock->sk;
+-      }
+-#endif
+-      return NULL;
+-}
+-EXPORT_SYMBOL(io_uring_get_socket);
+-
+ static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
+ {
+       if (!wq_list_empty(&ctx->submit_state.compl_reqs))
+diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
+index 019600570ee49..59e6f755f12c6 100644
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -52,7 +52,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req)
+ }
+ void __io_req_task_work_add(struct io_kiocb *req, bool allow_local);
+-bool io_is_uring_fops(struct file *file);
+ bool io_alloc_async_data(struct io_kiocb *req);
+ void io_req_task_queue(struct io_kiocb *req);
+ void io_queue_iowq(struct io_kiocb *req, bool *dont_use);
+diff --git a/net/core/scm.c b/net/core/scm.c
+index e762a4b8a1d22..a877c4ef4c256 100644
+--- a/net/core/scm.c
++++ b/net/core/scm.c
+@@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+               if (fd < 0 || !(file = fget_raw(fd)))
+                       return -EBADF;
+               /* don't allow io_uring files */
+-              if (io_uring_get_socket(file)) {
++              if (io_is_uring_fops(file)) {
+                       fput(file);
+                       return -EINVAL;
+               }
+diff --git a/net/unix/scm.c b/net/unix/scm.c
+index e8e2a00bb0f58..d1048b4c2baaf 100644
+--- a/net/unix/scm.c
++++ b/net/unix/scm.c
+@@ -34,10 +34,8 @@ struct sock *unix_get_socket(struct file *filp)
+               /* PF_UNIX ? */
+               if (s && sock->ops && sock->ops->family == PF_UNIX)
+                       u_sock = s;
+-      } else {
+-              /* Could be an io_uring instance */
+-              u_sock = io_uring_get_socket(filp);
+       }
++
+       return u_sock;
+ }
+ EXPORT_SYMBOL(unix_get_socket);
+-- 
+2.43.0
+
diff --git a/queue-6.1/md-fix-data-corruption-for-raid456-when-reshape-rest.patch b/queue-6.1/md-fix-data-corruption-for-raid456-when-reshape-rest.patch
new file mode 100644 (file)
index 0000000..a408267
--- /dev/null
@@ -0,0 +1,60 @@
+From 90a9cc9319044b9183d226527684e42547c5703c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 09:56:07 +0800
+Subject: md: fix data corruption for raid456 when reshape restart while grow
+ up
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ]
+
+Currently, if reshape is interrupted, echo "reshape" to sync_action will
+restart reshape from scratch, for example:
+
+echo frozen > sync_action
+echo reshape > sync_action
+
+This will corrupt data before reshape_position if the array is growing,
+fix the problem by continue reshape from reshape_position.
+
+Reported-by: Peter Neuwirth <reddunur@online.de>
+Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/md.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 846bdee4daa0e..1c87f3e708094 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4903,11 +4903,21 @@ action_store(struct mddev *mddev, const char *page, size_t len)
+                       return -EINVAL;
+               err = mddev_lock(mddev);
+               if (!err) {
+-                      if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
++                      if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+                               err =  -EBUSY;
+-                      else {
++                      } else if (mddev->reshape_position == MaxSector ||
++                                 mddev->pers->check_reshape == NULL ||
++                                 mddev->pers->check_reshape(mddev)) {
+                               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                               err = mddev->pers->start_reshape(mddev);
++                      } else {
++                              /*
++                               * If reshape is still in progress, and
++                               * md_check_recovery() can continue to reshape,
++                               * don't restart reshape because data can be
++                               * corrupted for raid456.
++                               */
++                              clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+                       }
+                       mddev_unlock(mddev);
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/md-raid10-prevent-soft-lockup-while-flush-writes.patch b/queue-6.1/md-raid10-prevent-soft-lockup-while-flush-writes.patch
new file mode 100644 (file)
index 0000000..28085ff
--- /dev/null
@@ -0,0 +1,79 @@
+From 5036866ba258e5dd80caf93c4ce92bf13eef7d82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 May 2023 21:11:00 +0800
+Subject: md/raid10: prevent soft lockup while flush writes
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ]
+
+Currently, there is no limit for raid1/raid10 plugged bio. While flushing
+writes, raid1 has cond_resched() while raid10 doesn't, and too many
+writes can cause soft lockup.
+
+Follow up soft lockup can be triggered easily with writeback test for
+raid10 with ramdisks:
+
+watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293]
+Call Trace:
+ <TASK>
+ call_rcu+0x16/0x20
+ put_object+0x41/0x80
+ __delete_object+0x50/0x90
+ delete_object_full+0x2b/0x40
+ kmemleak_free+0x46/0xa0
+ slab_free_freelist_hook.constprop.0+0xed/0x1a0
+ kmem_cache_free+0xfd/0x300
+ mempool_free_slab+0x1f/0x30
+ mempool_free+0x3a/0x100
+ bio_free+0x59/0x80
+ bio_put+0xcf/0x2c0
+ free_r10bio+0xbf/0xf0
+ raid_end_bio_io+0x78/0xb0
+ one_write_done+0x8a/0xa0
+ raid10_end_write_request+0x1b4/0x430
+ bio_endio+0x175/0x320
+ brd_submit_bio+0x3b9/0x9b7 [brd]
+ __submit_bio+0x69/0xe0
+ submit_bio_noacct_nocheck+0x1e6/0x5a0
+ submit_bio_noacct+0x38c/0x7e0
+ flush_pending_writes+0xf0/0x240
+ raid10d+0xac/0x1ed0
+
+Fix the problem by adding cond_resched() to raid10 like what raid1 did.
+
+Note that unlimited plugged bio still need to be optimized, for example,
+in the case of lots of dirty pages writeback, this will take lots of
+memory and io will spend a long time in plug, hence io latency is bad.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/raid10.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 7b318e7e8d459..009f7ffe4e10c 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -920,6 +920,7 @@ static void flush_pending_writes(struct r10conf *conf)
+                       raid1_submit_write(bio);
+                       bio = next;
++                      cond_resched();
+               }
+               blk_finish_plug(&plug);
+       } else
+@@ -1130,6 +1131,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
+               raid1_submit_write(bio);
+               bio = next;
++              cond_resched();
+       }
+       kfree(plug);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-add-an-nfsd4_encode_nfstime4-helper.patch b/queue-6.1/nfsd-add-an-nfsd4_encode_nfstime4-helper.patch
new file mode 100644 (file)
index 0000000..b896b59
--- /dev/null
@@ -0,0 +1,97 @@
+From c64e15be4c1424636df5a25108b846072809d0d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Jun 2023 10:13:39 -0400
+Subject: NFSD: Add an nfsd4_encode_nfstime4() helper
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 262176798b18b12fd8ab84c94cfece0a6a652476 ]
+
+Clean up: de-duplicate some common code.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Tom Talpey <tom@talpey.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 26 insertions(+), 20 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 597f14a80512f..514f4456cf5c6 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+       return p;
+ }
++static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
++                                  struct timespec64 *tv)
++{
++      __be32 *p;
++
++      p = xdr_reserve_space(xdr, XDR_UNIT * 3);
++      if (!p)
++              return nfserr_resource;
++
++      p = xdr_encode_hyper(p, (s64)tv->tv_sec);
++      *p = cpu_to_be32(tv->tv_nsec);
++      return nfs_ok;
++}
++
+ /*
+  * ctime (in NFSv4, time_metadata) is not writeable, and the client
+  * doesn't really care what resolution could theoretically be stored by
+@@ -3346,11 +3360,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+               p = xdr_encode_hyper(p, dummy64);
+       }
+       if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+-              p = xdr_reserve_space(xdr, 12);
+-              if (!p)
+-                      goto out_resource;
+-              p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
+-              *p++ = cpu_to_be32(stat.atime.tv_nsec);
++              status = nfsd4_encode_nfstime4(xdr, &stat.atime);
++              if (status)
++                      goto out;
+       }
+       if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+               p = xdr_reserve_space(xdr, 12);
+@@ -3359,25 +3371,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+               p = encode_time_delta(p, d_inode(dentry));
+       }
+       if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+-              p = xdr_reserve_space(xdr, 12);
+-              if (!p)
+-                      goto out_resource;
+-              p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
+-              *p++ = cpu_to_be32(stat.ctime.tv_nsec);
++              status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
++              if (status)
++                      goto out;
+       }
+       if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+-              p = xdr_reserve_space(xdr, 12);
+-              if (!p)
+-                      goto out_resource;
+-              p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
+-              *p++ = cpu_to_be32(stat.mtime.tv_nsec);
++              status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
++              if (status)
++                      goto out;
+       }
+       if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+-              p = xdr_reserve_space(xdr, 12);
+-              if (!p)
+-                      goto out_resource;
+-              p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
+-              *p++ = cpu_to_be32(stat.btime.tv_nsec);
++              status = nfsd4_encode_nfstime4(xdr, &stat.btime);
++              if (status)
++                      goto out;
+       }
+       if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+               u64 ino = stat.ino;
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-add-some-comments-to-nfsd_file_do_acquire.patch b/queue-6.1/nfsd-add-some-comments-to-nfsd_file_do_acquire.patch
new file mode 100644 (file)
index 0000000..6d3946d
--- /dev/null
@@ -0,0 +1,39 @@
+From fd22589985ab174696c30d86be0cdea35b00176b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:12 -0500
+Subject: nfsd: add some comments to nfsd_file_do_acquire
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b680cb9b737331aad271feebbedafb865504e234 ]
+
+David Howells mentioned that he found this bit of code confusing, so
+sprinkle in some comments to clarify.
+
+Reported-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index ab37b85b72077..50349449a4e52 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1094,6 +1094,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+       rcu_read_unlock();
+       if (nf) {
++              /*
++               * If the nf is on the LRU then it holds an extra reference
++               * that must be put if it's removed. It had better not be
++               * the last one however, since we should hold another.
++               */
+               if (nfsd_file_lru_remove(nf))
+                       WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
+               goto wait_for_construction;
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch b/queue-6.1/nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch
new file mode 100644 (file)
index 0000000..308030e
--- /dev/null
@@ -0,0 +1,61 @@
+From 84897a6adc1d73b3d75791c1336ca59ca96bded8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:33:47 -0500
+Subject: nfsd: allow nfsd_file_get to sanely handle a NULL pointer
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 70f62231cdfd52357836733dd31db787e0412ab2 ]
+
+...and remove some now-useless NULL pointer checks in its callers.
+
+Suggested-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 5 ++---
+ fs/nfsd/nfs4state.c | 4 +---
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 697acf5c3c681..6e8712bd7c998 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -431,7 +431,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf)
+ struct nfsd_file *
+ nfsd_file_get(struct nfsd_file *nf)
+ {
+-      if (likely(refcount_inc_not_zero(&nf->nf_ref)))
++      if (nf && refcount_inc_not_zero(&nf->nf_ref))
+               return nf;
+       return NULL;
+ }
+@@ -1086,8 +1086,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+       rcu_read_lock();
+       nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+                              nfsd_file_rhash_params);
+-      if (nf)
+-              nf = nfsd_file_get(nf);
++      nf = nfsd_file_get(nf);
+       rcu_read_unlock();
+       if (nf) {
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index b9d694ec25d19..e4522e86e984e 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -602,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ static struct nfsd_file *
+ __nfs4_get_fd(struct nfs4_file *f, int oflag)
+ {
+-      if (f->fi_fds[oflag])
+-              return nfsd_file_get(f->fi_fds[oflag]);
+-      return NULL;
++      return nfsd_file_get(f->fi_fds[oflag]);
+ }
+ static struct nfsd_file *
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-allow-reaping-files-still-under-writeback.patch b/queue-6.1/nfsd-allow-reaping-files-still-under-writeback.patch
new file mode 100644 (file)
index 0000000..1f919d3
--- /dev/null
@@ -0,0 +1,95 @@
+From ac9a9f41a15c31910d32eee697d7fab55053c493 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 06:53:54 -0500
+Subject: nfsd: allow reaping files still under writeback
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit dcb779fcd4ed5984ad15991d574943d12a8693d1 ]
+
+On most filesystems, there is no reason to delay reaping an nfsd_file
+just because its underlying inode is still under writeback. nfsd just
+relies on client activity or the local flusher threads to do writeback.
+
+The main exception is NFS, which flushes all of its dirty data on last
+close. Add a new EXPORT_OP_FLUSH_ON_CLOSE flag to allow filesystems to
+signal that they do this, and only skip closing files under writeback on
+such filesystems.
+
+Also, remove a redundant NULL file pointer check in
+nfsd_file_check_writeback, and clean up nfs's export op flag
+definitions.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+[ cel: adjusted to apply to v6.1.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/export.c          |  9 ++++++---
+ fs/nfsd/filecache.c      | 12 +++++++++++-
+ include/linux/exportfs.h |  1 +
+ 3 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfs/export.c b/fs/nfs/export.c
+index 01596f2d0a1ed..9fe9586a51b71 100644
+--- a/fs/nfs/export.c
++++ b/fs/nfs/export.c
+@@ -156,7 +156,10 @@ const struct export_operations nfs_export_ops = {
+       .fh_to_dentry = nfs_fh_to_dentry,
+       .get_parent = nfs_get_parent,
+       .fetch_iversion = nfs_fetch_iversion,
+-      .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
+-              EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
+-              EXPORT_OP_NOATOMIC_ATTR,
++      .flags = EXPORT_OP_NOWCC                |
++               EXPORT_OP_NOSUBTREECHK         |
++               EXPORT_OP_CLOSE_BEFORE_UNLINK  |
++               EXPORT_OP_REMOTE_FS            |
++               EXPORT_OP_NOATOMIC_ATTR        |
++               EXPORT_OP_FLUSH_ON_CLOSE,
+ };
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 9b7082fdd2115..a6fa6e9802772 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -402,13 +402,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
+       struct file *file = nf->nf_file;
+       struct address_space *mapping;
+-      if (!file || !(file->f_mode & FMODE_WRITE))
++      /* File not open for write? */
++      if (!(file->f_mode & FMODE_WRITE))
+               return false;
++
++      /*
++       * Some filesystems (e.g. NFS) flush all dirty data on close.
++       * On others, there is no need to wait for writeback.
++       */
++      if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
++              return false;
++
+       mapping = file->f_mapping;
+       return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+               mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+ }
++
+ static bool nfsd_file_lru_add(struct nfsd_file *nf)
+ {
+       set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
+index fe848901fcc3a..218fc5c54e901 100644
+--- a/include/linux/exportfs.h
++++ b/include/linux/exportfs.h
+@@ -221,6 +221,7 @@ struct export_operations {
+ #define EXPORT_OP_NOATOMIC_ATTR               (0x10) /* Filesystem cannot supply
+                                                 atomic attribute updates
+                                               */
++#define EXPORT_OP_FLUSH_ON_CLOSE      (0x20) /* fs flushes file data on close */
+       unsigned long   flags;
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-convert-filecache-to-rhltable.patch b/queue-6.1/nfsd-convert-filecache-to-rhltable.patch
new file mode 100644 (file)
index 0000000..5cbc9a8
--- /dev/null
@@ -0,0 +1,578 @@
+From e0179f3964a7b79117b258fcdf461385cf2f7d62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 15:09:04 -0500
+Subject: NFSD: Convert filecache to rhltable
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c4c649ab413ba6a785b25f0edbb12f617c87db2a ]
+
+While we were converting the nfs4_file hashtable to use the kernel's
+resizable hashtable data structure, Neil Brown observed that the
+list variant (rhltable) would be better for managing nfsd_file items
+as well. The nfsd_file hash table will contain multiple entries for
+the same inode -- these should be kept together on a list. And, it
+could be possible for exotic or malicious client behavior to cause
+the hash table to resize itself on every insertion.
+
+A nice simplification is that rhltable_lookup() can return a list
+that contains only nfsd_file items that match a given inode, which
+enables us to eliminate specialized hash table helper functions and
+use the default functions provided by the rhashtable implementation).
+
+Since we are now storing nfsd_file items for the same inode on a
+single list, that effectively reduces the number of hash entries
+that have to be tracked in the hash table. The mininum bucket count
+is therefore lowered.
+
+Light testing with fstests generic/531 show no regressions.
+
+Suggested-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 311 ++++++++++++++++++--------------------------
+ fs/nfsd/filecache.h |   9 +-
+ 2 files changed, 133 insertions(+), 187 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index a6fa6e9802772..2f0b2d964cbb1 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -74,70 +74,9 @@ static struct list_lru                      nfsd_file_lru;
+ static unsigned long                  nfsd_file_flags;
+ static struct fsnotify_group          *nfsd_file_fsnotify_group;
+ static struct delayed_work            nfsd_filecache_laundrette;
+-static struct rhashtable              nfsd_file_rhash_tbl
++static struct rhltable                        nfsd_file_rhltable
+                                               ____cacheline_aligned_in_smp;
+-enum nfsd_file_lookup_type {
+-      NFSD_FILE_KEY_INODE,
+-      NFSD_FILE_KEY_FULL,
+-};
+-
+-struct nfsd_file_lookup_key {
+-      struct inode                    *inode;
+-      struct net                      *net;
+-      const struct cred               *cred;
+-      unsigned char                   need;
+-      bool                            gc;
+-      enum nfsd_file_lookup_type      type;
+-};
+-
+-/*
+- * The returned hash value is based solely on the address of an in-code
+- * inode, a pointer to a slab-allocated object. The entropy in such a
+- * pointer is concentrated in its middle bits.
+- */
+-static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
+-{
+-      unsigned long ptr = (unsigned long)inode;
+-      u32 k;
+-
+-      k = ptr >> L1_CACHE_SHIFT;
+-      k &= 0x00ffffff;
+-      return jhash2(&k, 1, seed);
+-}
+-
+-/**
+- * nfsd_file_key_hashfn - Compute the hash value of a lookup key
+- * @data: key on which to compute the hash value
+- * @len: rhash table's key_len parameter (unused)
+- * @seed: rhash table's random seed of the day
+- *
+- * Return value:
+- *   Computed 32-bit hash value
+- */
+-static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
+-{
+-      const struct nfsd_file_lookup_key *key = data;
+-
+-      return nfsd_file_inode_hash(key->inode, seed);
+-}
+-
+-/**
+- * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
+- * @data: object on which to compute the hash value
+- * @len: rhash table's key_len parameter (unused)
+- * @seed: rhash table's random seed of the day
+- *
+- * Return value:
+- *   Computed 32-bit hash value
+- */
+-static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
+-{
+-      const struct nfsd_file *nf = data;
+-
+-      return nfsd_file_inode_hash(nf->nf_inode, seed);
+-}
+-
+ static bool
+ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+ {
+@@ -158,55 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+       return true;
+ }
+-/**
+- * nfsd_file_obj_cmpfn - Match a cache item against search criteria
+- * @arg: search criteria
+- * @ptr: cache item to check
+- *
+- * Return values:
+- *   %0 - Item matches search criteria
+- *   %1 - Item does not match search criteria
+- */
+-static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+-                             const void *ptr)
+-{
+-      const struct nfsd_file_lookup_key *key = arg->key;
+-      const struct nfsd_file *nf = ptr;
+-
+-      switch (key->type) {
+-      case NFSD_FILE_KEY_INODE:
+-              if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+-                      return 1;
+-              if (nf->nf_inode != key->inode)
+-                      return 1;
+-              break;
+-      case NFSD_FILE_KEY_FULL:
+-              if (nf->nf_inode != key->inode)
+-                      return 1;
+-              if (nf->nf_may != key->need)
+-                      return 1;
+-              if (nf->nf_net != key->net)
+-                      return 1;
+-              if (!nfsd_match_cred(nf->nf_cred, key->cred))
+-                      return 1;
+-              if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+-                      return 1;
+-              if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+-                      return 1;
+-              break;
+-      }
+-      return 0;
+-}
+-
+ static const struct rhashtable_params nfsd_file_rhash_params = {
+       .key_len                = sizeof_field(struct nfsd_file, nf_inode),
+       .key_offset             = offsetof(struct nfsd_file, nf_inode),
+-      .head_offset            = offsetof(struct nfsd_file, nf_rhash),
+-      .hashfn                 = nfsd_file_key_hashfn,
+-      .obj_hashfn             = nfsd_file_obj_hashfn,
+-      .obj_cmpfn              = nfsd_file_obj_cmpfn,
+-      /* Reduce resizing churn on light workloads */
+-      .min_size               = 512,          /* buckets */
++      .head_offset            = offsetof(struct nfsd_file, nf_rlist),
++
++      /*
++       * Start with a single page hash table to reduce resizing churn
++       * on light workloads.
++       */
++      .min_size               = 256,
+       .automatic_shrinking    = true,
+ };
+@@ -309,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
+ }
+ static struct nfsd_file *
+-nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
++nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
++              bool want_gc)
+ {
+       struct nfsd_file *nf;
+       nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+-      if (nf) {
+-              INIT_LIST_HEAD(&nf->nf_lru);
+-              nf->nf_birthtime = ktime_get();
+-              nf->nf_file = NULL;
+-              nf->nf_cred = get_current_cred();
+-              nf->nf_net = key->net;
+-              nf->nf_flags = 0;
+-              __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+-              __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+-              if (key->gc)
+-                      __set_bit(NFSD_FILE_GC, &nf->nf_flags);
+-              nf->nf_inode = key->inode;
+-              refcount_set(&nf->nf_ref, 1);
+-              nf->nf_may = key->need;
+-              nf->nf_mark = NULL;
+-      }
++      if (unlikely(!nf))
++              return NULL;
++
++      INIT_LIST_HEAD(&nf->nf_lru);
++      nf->nf_birthtime = ktime_get();
++      nf->nf_file = NULL;
++      nf->nf_cred = get_current_cred();
++      nf->nf_net = net;
++      nf->nf_flags = want_gc ?
++              BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
++              BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
++      nf->nf_inode = inode;
++      refcount_set(&nf->nf_ref, 1);
++      nf->nf_may = need;
++      nf->nf_mark = NULL;
+       return nf;
+ }
+@@ -354,8 +254,8 @@ static void
+ nfsd_file_hash_remove(struct nfsd_file *nf)
+ {
+       trace_nfsd_file_unhash(nf);
+-      rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
+-                             nfsd_file_rhash_params);
++      rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
++                      nfsd_file_rhash_params);
+ }
+ static bool
+@@ -688,8 +588,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+  * @inode:   inode on which to close out nfsd_files
+  * @dispose: list on which to gather nfsd_files to close out
+  *
+- * An nfsd_file represents a struct file being held open on behalf of nfsd. An
+- * open file however can block other activity (such as leases), or cause
++ * An nfsd_file represents a struct file being held open on behalf of nfsd.
++ * An open file however can block other activity (such as leases), or cause
+  * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
+  *
+  * This function is intended to find open nfsd_files when this sort of
+@@ -702,21 +602,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ static void
+ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+ {
+-      struct nfsd_file_lookup_key key = {
+-              .type   = NFSD_FILE_KEY_INODE,
+-              .inode  = inode,
+-              .gc     = true,
+-      };
++      struct rhlist_head *tmp, *list;
+       struct nfsd_file *nf;
+       rcu_read_lock();
+-      do {
+-              nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+-                                     nfsd_file_rhash_params);
+-              if (!nf)
+-                      break;
++      list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++                             nfsd_file_rhash_params);
++      rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
++              if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
++                      continue;
+               nfsd_file_cond_queue(nf, dispose);
+-      } while (1);
++      }
+       rcu_read_unlock();
+ }
+@@ -840,7 +736,7 @@ nfsd_file_cache_init(void)
+       if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+               return 0;
+-      ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
++      ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
+       if (ret)
+               return ret;
+@@ -908,7 +804,7 @@ nfsd_file_cache_init(void)
+       nfsd_file_mark_slab = NULL;
+       destroy_workqueue(nfsd_filecache_wq);
+       nfsd_filecache_wq = NULL;
+-      rhashtable_destroy(&nfsd_file_rhash_tbl);
++      rhltable_destroy(&nfsd_file_rhltable);
+       goto out;
+ }
+@@ -927,7 +823,7 @@ __nfsd_file_cache_purge(struct net *net)
+       struct nfsd_file *nf;
+       LIST_HEAD(dispose);
+-      rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
++      rhltable_walk_enter(&nfsd_file_rhltable, &iter);
+       do {
+               rhashtable_walk_start(&iter);
+@@ -1033,7 +929,7 @@ nfsd_file_cache_shutdown(void)
+       nfsd_file_mark_slab = NULL;
+       destroy_workqueue(nfsd_filecache_wq);
+       nfsd_filecache_wq = NULL;
+-      rhashtable_destroy(&nfsd_file_rhash_tbl);
++      rhltable_destroy(&nfsd_file_rhltable);
+       for_each_possible_cpu(i) {
+               per_cpu(nfsd_file_cache_hits, i) = 0;
+@@ -1044,6 +940,35 @@ nfsd_file_cache_shutdown(void)
+       }
+ }
++static struct nfsd_file *
++nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
++                      struct inode *inode, unsigned char need,
++                      bool want_gc)
++{
++      struct rhlist_head *tmp, *list;
++      struct nfsd_file *nf;
++
++      list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++                             nfsd_file_rhash_params);
++      rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
++              if (nf->nf_may != need)
++                      continue;
++              if (nf->nf_net != net)
++                      continue;
++              if (!nfsd_match_cred(nf->nf_cred, cred))
++                      continue;
++              if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
++                      continue;
++              if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
++                      continue;
++
++              if (!nfsd_file_get(nf))
++                      continue;
++              return nf;
++      }
++      return NULL;
++}
++
+ /**
+  * nfsd_file_is_cached - are there any cached open files for this inode?
+  * @inode: inode to check
+@@ -1058,16 +983,20 @@ nfsd_file_cache_shutdown(void)
+ bool
+ nfsd_file_is_cached(struct inode *inode)
+ {
+-      struct nfsd_file_lookup_key key = {
+-              .type   = NFSD_FILE_KEY_INODE,
+-              .inode  = inode,
+-              .gc     = true,
+-      };
++      struct rhlist_head *tmp, *list;
++      struct nfsd_file *nf;
+       bool ret = false;
+-      if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
+-                                 nfsd_file_rhash_params) != NULL)
+-              ret = true;
++      rcu_read_lock();
++      list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++                             nfsd_file_rhash_params);
++      rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
++              if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
++                      ret = true;
++                      break;
++              }
++      rcu_read_unlock();
++
+       trace_nfsd_file_is_cached(inode, (int)ret);
+       return ret;
+ }
+@@ -1077,14 +1006,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                    unsigned int may_flags, struct file *file,
+                    struct nfsd_file **pnf, bool want_gc)
+ {
+-      struct nfsd_file_lookup_key key = {
+-              .type   = NFSD_FILE_KEY_FULL,
+-              .need   = may_flags & NFSD_FILE_MAY_MASK,
+-              .net    = SVC_NET(rqstp),
+-              .gc     = want_gc,
+-      };
++      unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
++      struct net *net = SVC_NET(rqstp);
++      struct nfsd_file *new, *nf;
++      const struct cred *cred;
+       bool open_retry = true;
+-      struct nfsd_file *nf;
++      struct inode *inode;
+       __be32 status;
+       int ret;
+@@ -1092,14 +1019,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+                               may_flags|NFSD_MAY_OWNER_OVERRIDE);
+       if (status != nfs_ok)
+               return status;
+-      key.inode = d_inode(fhp->fh_dentry);
+-      key.cred = get_current_cred();
++      inode = d_inode(fhp->fh_dentry);
++      cred = get_current_cred();
+ retry:
+       rcu_read_lock();
+-      nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+-                             nfsd_file_rhash_params);
+-      nf = nfsd_file_get(nf);
++      nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
+       rcu_read_unlock();
+       if (nf) {
+@@ -1113,21 +1038,32 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               goto wait_for_construction;
+       }
+-      nf = nfsd_file_alloc(&key, may_flags);
+-      if (!nf) {
++      new = nfsd_file_alloc(net, inode, need, want_gc);
++      if (!new) {
+               status = nfserr_jukebox;
+               goto out;
+       }
+-      ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
+-                                         &key, &nf->nf_rhash,
+-                                         nfsd_file_rhash_params);
++      rcu_read_lock();
++      spin_lock(&inode->i_lock);
++      nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
++      if (unlikely(nf)) {
++              spin_unlock(&inode->i_lock);
++              rcu_read_unlock();
++              nfsd_file_slab_free(&new->nf_rcu);
++              goto wait_for_construction;
++      }
++      nf = new;
++      ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
++                            nfsd_file_rhash_params);
++      spin_unlock(&inode->i_lock);
++      rcu_read_unlock();
+       if (likely(ret == 0))
+               goto open_file;
+       if (ret == -EEXIST)
+               goto retry;
+-      trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
++      trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
+       status = nfserr_jukebox;
+       goto construction_err;
+@@ -1136,7 +1072,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+       /* Did construction of this file fail? */
+       if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+-              trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
++              trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
+               if (!open_retry) {
+                       status = nfserr_jukebox;
+                       goto construction_err;
+@@ -1158,13 +1094,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               nfsd_file_check_write_error(nf);
+               *pnf = nf;
+       }
+-      put_cred(key.cred);
+-      trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
++      put_cred(cred);
++      trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
+       return status;
+ open_file:
+       trace_nfsd_file_alloc(nf);
+-      nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
++      nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
+       if (nf->nf_mark) {
+               if (file) {
+                       get_file(file);
+@@ -1182,7 +1118,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+        * If construction failed, or we raced with a call to unlink()
+        * then unhash.
+        */
+-      if (status == nfs_ok && key.inode->i_nlink == 0)
++      if (status != nfs_ok || inode->i_nlink == 0)
+               status = nfserr_jukebox;
+       if (status != nfs_ok)
+               nfsd_file_unhash(nf);
+@@ -1209,8 +1145,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+  * seconds after the final nfsd_file_put() in case the caller
+  * wants to re-use it.
+  *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ *   %nfs_ok - @pnf points to an nfsd_file with its reference
++ *   count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+  */
+ __be32
+ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1230,8 +1169,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+  * but not garbage-collected. The object is unhashed after the
+  * final nfsd_file_put().
+  *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ *   %nfs_ok - @pnf points to an nfsd_file with its reference
++ *   count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+  */
+ __be32
+ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1252,8 +1194,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+  * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+  * opening a new one.
+  *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ *   %nfs_ok - @pnf points to an nfsd_file with its reference
++ *   count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+  */
+ __be32
+ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1284,7 +1229,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+               lru = list_lru_count(&nfsd_file_lru);
+               rcu_read_lock();
+-              ht = &nfsd_file_rhash_tbl;
++              ht = &nfsd_file_rhltable.ht;
+               count = atomic_read(&ht->nelems);
+               tbl = rht_dereference_rcu(ht->tbl, ht);
+               buckets = tbl->size;
+@@ -1300,7 +1245,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+               evictions += per_cpu(nfsd_file_evictions, i);
+       }
+-      seq_printf(m, "total entries: %u\n", count);
++      seq_printf(m, "total inodes:  %u\n", count);
+       seq_printf(m, "hash buckets:  %u\n", buckets);
+       seq_printf(m, "lru entries:   %lu\n", lru);
+       seq_printf(m, "cache hits:    %lu\n", hits);
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 41516a4263ea5..e54165a3224f0 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -29,9 +29,8 @@ struct nfsd_file_mark {
+  * never be dereferenced, only used for comparison.
+  */
+ struct nfsd_file {
+-      struct rhash_head       nf_rhash;
+-      struct list_head        nf_lru;
+-      struct rcu_head         nf_rcu;
++      struct rhlist_head      nf_rlist;
++      void                    *nf_inode;
+       struct file             *nf_file;
+       const struct cred       *nf_cred;
+       struct net              *nf_net;
+@@ -40,10 +39,12 @@ struct nfsd_file {
+ #define NFSD_FILE_REFERENCED  (2)
+ #define NFSD_FILE_GC          (3)
+       unsigned long           nf_flags;
+-      struct inode            *nf_inode;      /* don't deref */
+       refcount_t              nf_ref;
+       unsigned char           nf_may;
++
+       struct nfsd_file_mark   *nf_mark;
++      struct list_head        nf_lru;
++      struct rcu_head         nf_rcu;
+       ktime_t                 nf_birthtime;
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch b/queue-6.1/nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch
new file mode 100644 (file)
index 0000000..b8aef4e
--- /dev/null
@@ -0,0 +1,101 @@
+From 3ffb64192436277690d09cc004c73101b7734148 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:11 -0500
+Subject: nfsd: don't kill nfsd_files because of lease break error
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit c6593366c0bf222be9c7561354dfb921c611745e ]
+
+An error from break_lease is non-fatal, so we needn't destroy the
+nfsd_file in that case. Just put the reference like we normally would
+and return the error.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 568963b8a4777..ab37b85b72077 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1102,7 +1102,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+       nf = nfsd_file_alloc(&key, may_flags);
+       if (!nf) {
+               status = nfserr_jukebox;
+-              goto out_status;
++              goto out;
+       }
+       ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
+@@ -1111,13 +1111,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+       if (likely(ret == 0))
+               goto open_file;
+-      nfsd_file_slab_free(&nf->nf_rcu);
+-      nf = NULL;
+       if (ret == -EEXIST)
+               goto retry;
+       trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
+       status = nfserr_jukebox;
+-      goto out_status;
++      goto construction_err;
+ wait_for_construction:
+       wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+@@ -1127,29 +1125,25 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
+               if (!open_retry) {
+                       status = nfserr_jukebox;
+-                      goto out;
++                      goto construction_err;
+               }
+               open_retry = false;
+-              if (refcount_dec_and_test(&nf->nf_ref))
+-                      nfsd_file_free(nf);
+               goto retry;
+       }
+-
+       this_cpu_inc(nfsd_file_cache_hits);
+       status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
++      if (status != nfs_ok) {
++              nfsd_file_put(nf);
++              nf = NULL;
++      }
++
+ out:
+       if (status == nfs_ok) {
+               this_cpu_inc(nfsd_file_acquisitions);
+               nfsd_file_check_write_error(nf);
+               *pnf = nf;
+-      } else {
+-              if (refcount_dec_and_test(&nf->nf_ref))
+-                      nfsd_file_free(nf);
+-              nf = NULL;
+       }
+-
+-out_status:
+       put_cred(key.cred);
+       trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+       return status;
+@@ -1179,6 +1173,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+       if (status != nfs_ok)
+               nfsd_file_unhash(nf);
+       clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
++      if (status == nfs_ok)
++              goto out;
++
++construction_err:
++      if (refcount_dec_and_test(&nf->nf_ref))
++              nfsd_file_free(nf);
++      nf = NULL;
+       goto out;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-don-t-open-code-clear_and_wake_up_bit.patch b/queue-6.1/nfsd-don-t-open-code-clear_and_wake_up_bit.patch
new file mode 100644 (file)
index 0000000..bfb67f4
--- /dev/null
@@ -0,0 +1,34 @@
+From f9a7567d2186c97f9e03d5d7ed90c33431a472ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:09 -0500
+Subject: nfsd: don't open-code clear_and_wake_up_bit
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b8bea9f6cdd7236c7c2238d022145e9b2f8aac22 ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 6e8712bd7c998..5b5d39ec7b010 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1174,9 +1174,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               status = nfserr_jukebox;
+       if (status != nfs_ok)
+               nfsd_file_unhash(nf);
+-      clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+-      smp_mb__after_atomic();
+-      wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
++      clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+       goto out;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-don-t-take-put-an-extra-reference-when-putting-.patch b/queue-6.1/nfsd-don-t-take-put-an-extra-reference-when-putting-.patch
new file mode 100644 (file)
index 0000000..60bc534
--- /dev/null
@@ -0,0 +1,38 @@
+From 51d7b689d98814aec1c0dee5d591bb57eecb47d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jan 2023 12:31:37 -0500
+Subject: nfsd: don't take/put an extra reference when putting a file
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b2ff1bd71db2a1b193a6dde0845adcd69cbcf75e ]
+
+The last thing that filp_close does is an fput, so don't bother taking
+and putting the extra reference.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 50349449a4e52..51e2947c21a7d 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -382,10 +382,8 @@ nfsd_file_free(struct nfsd_file *nf)
+       if (nf->nf_mark)
+               nfsd_file_mark_put(nf->nf_mark);
+       if (nf->nf_file) {
+-              get_file(nf->nf_file);
+-              filp_close(nf->nf_file, NULL);
+               nfsd_file_check_write_error(nf);
+-              fput(nf->nf_file);
++              filp_close(nf->nf_file, NULL);
+       }
+       /*
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-fix-creation-time-serialization-order.patch b/queue-6.1/nfsd-fix-creation-time-serialization-order.patch
new file mode 100644 (file)
index 0000000..b20d993
--- /dev/null
@@ -0,0 +1,64 @@
+From a90a656e0be7aca2852443302021edca6b6c014a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Jun 2023 17:09:06 -0400
+Subject: nfsd: Fix creation time serialization order
+
+From: Tavian Barnes <tavianator@tavianator.com>
+
+In nfsd4_encode_fattr(), TIME_CREATE was being written out after all
+other times.  However, they should be written out in an order that
+matches the bit flags in bmval1, which in this case are
+
+    #define FATTR4_WORD1_TIME_ACCESS        (1UL << 15)
+    #define FATTR4_WORD1_TIME_CREATE        (1UL << 18)
+    #define FATTR4_WORD1_TIME_DELTA         (1UL << 19)
+    #define FATTR4_WORD1_TIME_METADATA      (1UL << 20)
+    #define FATTR4_WORD1_TIME_MODIFY        (1UL << 21)
+
+so TIME_CREATE should come second.
+
+I noticed this on a FreeBSD NFSv4.2 client, which supports creation
+times.  On this client, file times were weirdly permuted.  With this
+patch applied on the server, times looked normal on the client.
+
+Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute")
+Link: https://unix.stackexchange.com/q/749605/56202
+Signed-off-by: Tavian Barnes <tavianator@tavianator.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4xdr.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 514f4456cf5c6..4ed9fef14adc2 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3364,6 +3364,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+               if (status)
+                       goto out;
+       }
++      if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
++              status = nfsd4_encode_nfstime4(xdr, &stat.btime);
++              if (status)
++                      goto out;
++      }
+       if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+               p = xdr_reserve_space(xdr, 12);
+               if (!p)
+@@ -3380,11 +3385,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+               if (status)
+                       goto out;
+       }
+-      if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+-              status = nfsd4_encode_nfstime4(xdr, &stat.btime);
+-              if (status)
+-                      goto out;
+-      }
+       if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+               u64 ino = stat.ino;
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch b/queue-6.1/nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch
new file mode 100644 (file)
index 0000000..023f733
--- /dev/null
@@ -0,0 +1,48 @@
+From a756d96c306e659f97ceacf72814028f0d6342fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Apr 2023 10:53:18 -0700
+Subject: NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 147abcacee33781e75588869e944ddb07528a897 ]
+
+The following request sequence to the same file causes the NFS client and
+server getting into an infinite loop with COMMIT and NFS4ERR_DELAY:
+
+OPEN
+REMOVE
+WRITE
+COMMIT
+
+Problem reported by recall11, recall12, recall14, recall20, recall22,
+recall40, recall42, recall48, recall50 of nfstest suite.
+
+This patch restores the handling of race condition in nfsd_file_do_acquire
+with unlink to that prior of the regression.
+
+Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache")
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index f40d8f3b35a4c..ee9c923192e08 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1099,8 +1099,6 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+        * then unhash.
+        */
+       if (status != nfs_ok || inode->i_nlink == 0)
+-              status = nfserr_jukebox;
+-      if (status != nfs_ok)
+               nfsd_file_unhash(nf);
+       clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+       if (status == nfs_ok)
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch b/queue-6.1/nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch
new file mode 100644 (file)
index 0000000..0305b70
--- /dev/null
@@ -0,0 +1,55 @@
+From f494a5af1d58eb2bba67f9dfe869001e4b7f8ee9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:39:00 -0500
+Subject: nfsd: NFSD_FILE_KEY_INODE only needs to find GC'ed entries
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 6c31e4c98853a4ba47355ea151b36a77c42b7734 ]
+
+Since v4 files are expected to be long-lived, there's little value in
+closing them out of the cache when there is conflicting access.
+
+Change the comparator to also match the gc value in the key. Change both
+of the current users of that key to set the gc value in the key to
+"true".
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 5b5d39ec7b010..c36e3032d4386 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -175,6 +175,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+       switch (key->type) {
+       case NFSD_FILE_KEY_INODE:
++              if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
++                      return 1;
+               if (nf->nf_inode != key->inode)
+                       return 1;
+               break;
+@@ -695,6 +697,7 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+       struct nfsd_file_lookup_key key = {
+               .type   = NFSD_FILE_KEY_INODE,
+               .inode  = inode,
++              .gc     = true,
+       };
+       struct nfsd_file *nf;
+@@ -1049,6 +1052,7 @@ nfsd_file_is_cached(struct inode *inode)
+       struct nfsd_file_lookup_key key = {
+               .type   = NFSD_FILE_KEY_INODE,
+               .inode  = inode,
++              .gc     = true,
+       };
+       bool ret = false;
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch b/queue-6.1/nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch
new file mode 100644 (file)
index 0000000..7e7f3b5
--- /dev/null
@@ -0,0 +1,35 @@
+From 8832f93342b22e70fa14c6a809aedfe115a37007 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:39:01 -0500
+Subject: nfsd: simplify test_bit return in NFSD_FILE_KEY_FULL comparator
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit d69b8dbfd0866abc5ec84652cc1c10fc3d4d91ef ]
+
+test_bit returns bool, so we can just compare the result of that to the
+key->gc value without the "!!".
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index c36e3032d4386..568963b8a4777 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -189,7 +189,7 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+                       return 1;
+               if (!nfsd_match_cred(nf->nf_cred, key->cred))
+                       return 1;
+-              if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
++              if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+                       return 1;
+               if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+                       return 1;
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-simplify-the-delayed-disposal-list-code.patch b/queue-6.1/nfsd-simplify-the-delayed-disposal-list-code.patch
new file mode 100644 (file)
index 0000000..820f019
--- /dev/null
@@ -0,0 +1,119 @@
+From b6ea58695aec8f2b36a931723f6630cdc3ac409f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Apr 2023 17:31:44 -0400
+Subject: nfsd: simplify the delayed disposal list code
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 92e4a6733f922f0fef1d0995f7b2d0eaff86c7ea ]
+
+When queueing a dispose list to the appropriate "freeme" lists, it
+pointlessly queues the objects one at a time to an intermediate list.
+
+Remove a few helpers and just open code a list_move to make it more
+clear and efficient. Better document the resulting functions with
+kerneldoc comments.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 64 ++++++++++++++++-----------------------------
+ 1 file changed, 22 insertions(+), 42 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 2f0b2d964cbb1..f40d8f3b35a4c 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -402,49 +402,26 @@ nfsd_file_dispose_list(struct list_head *dispose)
+       }
+ }
+-static void
+-nfsd_file_list_remove_disposal(struct list_head *dst,
+-              struct nfsd_fcache_disposal *l)
+-{
+-      spin_lock(&l->lock);
+-      list_splice_init(&l->freeme, dst);
+-      spin_unlock(&l->lock);
+-}
+-
+-static void
+-nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+-{
+-      struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+-      struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+-
+-      spin_lock(&l->lock);
+-      list_splice_tail_init(files, &l->freeme);
+-      spin_unlock(&l->lock);
+-      queue_work(nfsd_filecache_wq, &l->work);
+-}
+-
+-static void
+-nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+-              struct net *net)
+-{
+-      struct nfsd_file *nf, *tmp;
+-
+-      list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+-              if (nf->nf_net == net)
+-                      list_move_tail(&nf->nf_lru, dst);
+-      }
+-}
+-
++/**
++ * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
++ * @dispose: list of nfsd_files to be disposed
++ *
++ * Transfers each file to the "freeme" list for its nfsd_net, to eventually
++ * be disposed of by the per-net garbage collector.
++ */
+ static void
+ nfsd_file_dispose_list_delayed(struct list_head *dispose)
+ {
+-      LIST_HEAD(list);
+-      struct nfsd_file *nf;
+-
+       while(!list_empty(dispose)) {
+-              nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+-              nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+-              nfsd_file_list_add_disposal(&list, nf->nf_net);
++              struct nfsd_file *nf = list_first_entry(dispose,
++                                              struct nfsd_file, nf_lru);
++              struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
++              struct nfsd_fcache_disposal *l = nn->fcache_disposal;
++
++              spin_lock(&l->lock);
++              list_move_tail(&nf->nf_lru, &l->freeme);
++              spin_unlock(&l->lock);
++              queue_work(nfsd_filecache_wq, &l->work);
+       }
+ }
+@@ -665,8 +642,8 @@ nfsd_file_close_inode_sync(struct inode *inode)
+  * nfsd_file_delayed_close - close unused nfsd_files
+  * @work: dummy
+  *
+- * Walk the LRU list and destroy any entries that have not been used since
+- * the last scan.
++ * Scrape the freeme list for this nfsd_net, and then dispose of them
++ * all.
+  */
+ static void
+ nfsd_file_delayed_close(struct work_struct *work)
+@@ -675,7 +652,10 @@ nfsd_file_delayed_close(struct work_struct *work)
+       struct nfsd_fcache_disposal *l = container_of(work,
+                       struct nfsd_fcache_disposal, work);
+-      nfsd_file_list_remove_disposal(&head, l);
++      spin_lock(&l->lock);
++      list_splice_init(&l->freeme, &head);
++      spin_unlock(&l->lock);
++
+       nfsd_file_dispose_list(&head);
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.1/nfsd-update-comment-over-__nfsd_file_cache_purge.patch b/queue-6.1/nfsd-update-comment-over-__nfsd_file_cache_purge.patch
new file mode 100644 (file)
index 0000000..9e2affa
--- /dev/null
@@ -0,0 +1,33 @@
+From 40fe2393e752ab49b382d502c2f9ae0d31e4ac19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Jan 2023 12:21:16 -0500
+Subject: nfsd: update comment over __nfsd_file_cache_purge
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 972cc0e0924598cb293b919d39c848dc038b2c28 ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 51e2947c21a7d..9b7082fdd2115 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -907,7 +907,8 @@ nfsd_file_cache_init(void)
+  * @net: net-namespace to shut down the cache (may be NULL)
+  *
+  * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
+- * then close out everything. Called when an nfsd instance is being shut down.
++ * then close out everything. Called when an nfsd instance is being shut down,
++ * and when the exports table is flushed.
+  */
+ static void
+ __nfsd_file_cache_purge(struct net *net)
+-- 
+2.43.0
+
diff --git a/queue-6.1/series b/queue-6.1/series
new file mode 100644 (file)
index 0000000..109865b
--- /dev/null
@@ -0,0 +1,18 @@
+md-fix-data-corruption-for-raid456-when-reshape-rest.patch
+md-raid10-prevent-soft-lockup-while-flush-writes.patch
+io_uring-unix-drop-usage-of-io_uring-socket.patch
+io_uring-drop-any-code-related-to-scm_rights.patch
+nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch
+nfsd-don-t-open-code-clear_and_wake_up_bit.patch
+nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch
+nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch
+nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch
+nfsd-add-some-comments-to-nfsd_file_do_acquire.patch
+nfsd-don-t-take-put-an-extra-reference-when-putting-.patch
+nfsd-update-comment-over-__nfsd_file_cache_purge.patch
+nfsd-allow-reaping-files-still-under-writeback.patch
+nfsd-convert-filecache-to-rhltable.patch
+nfsd-simplify-the-delayed-disposal-list-code.patch
+nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch
+nfsd-add-an-nfsd4_encode_nfstime4-helper.patch
+nfsd-fix-creation-time-serialization-order.patch