--- /dev/null
+From 6e16ae9b86871d8b78fd8999d6b8c396ed42993a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Mar 2024 18:15:05 -0600
+Subject: io_uring: drop any code related to SCM_RIGHTS
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 6e5e6d274956305f1fc0340522b38f5f5be74bdb upstream.
+
+This is dead code after we dropped support for passing io_uring fds
+over SCM_RIGHTS, get rid of it.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/io_uring_types.h | 3 -
+ io_uring/filetable.c | 10 +--
+ io_uring/io_uring.c | 31 +------
+ io_uring/rsrc.c | 151 +--------------------------------
+ io_uring/rsrc.h | 15 ----
+ 5 files changed, 8 insertions(+), 202 deletions(-)
+
+diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
+index f5b687a787a34..37aeea266ebb3 100644
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -330,9 +330,6 @@ struct io_ring_ctx {
+
+ struct list_head io_buffers_pages;
+
+- #if defined(CONFIG_UNIX)
+- struct socket *ring_sock;
+- #endif
+ /* hashed buffered write serialization */
+ struct io_wq_hash *hash_map;
+
+diff --git a/io_uring/filetable.c b/io_uring/filetable.c
+index b80614e7d6051..4660cb89ea9f5 100644
+--- a/io_uring/filetable.c
++++ b/io_uring/filetable.c
+@@ -95,12 +95,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
+ needs_switch = true;
+ }
+
+- ret = io_scm_file_account(ctx, file);
+- if (!ret) {
+- *io_get_tag_slot(ctx->file_data, slot_index) = 0;
+- io_fixed_file_set(file_slot, file);
+- io_file_bitmap_set(&ctx->file_table, slot_index);
+- }
++ *io_get_tag_slot(ctx->file_data, slot_index) = 0;
++ io_fixed_file_set(file_slot, file);
++ io_file_bitmap_set(&ctx->file_table, slot_index);
++ return 0;
+ err:
+ if (needs_switch)
+ io_rsrc_node_switch(ctx, ctx->file_data);
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index cf7dd62da0e37..415248c1f82c6 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -60,7 +60,6 @@
+ #include <linux/net.h>
+ #include <net/sock.h>
+ #include <net/af_unix.h>
+-#include <net/scm.h>
+ #include <linux/anon_inodes.h>
+ #include <linux/sched/mm.h>
+ #include <linux/uaccess.h>
+@@ -2628,12 +2627,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
+ WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list));
+ WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist));
+
+-#if defined(CONFIG_UNIX)
+- if (ctx->ring_sock) {
+- ctx->ring_sock->file = NULL; /* so that iput() is called */
+- sock_release(ctx->ring_sock);
+- }
+-#endif
+ WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
+
+ if (ctx->mm_account) {
+@@ -3438,32 +3431,12 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
+ /*
+ * Allocate an anonymous fd, this is what constitutes the application
+ * visible backing of an io_uring instance. The application mmaps this
+- * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
+- * we have to tie this fd to a socket for file garbage collection purposes.
++ * fd to gain access to the SQ/CQ ring details.
+ */
+ static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
+ {
+- struct file *file;
+-#if defined(CONFIG_UNIX)
+- int ret;
+-
+- ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
+- &ctx->ring_sock);
+- if (ret)
+- return ERR_PTR(ret);
+-#endif
+-
+- file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx,
++ return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx,
+ O_RDWR | O_CLOEXEC, NULL);
+-#if defined(CONFIG_UNIX)
+- if (IS_ERR(file)) {
+- sock_release(ctx->ring_sock);
+- ctx->ring_sock = NULL;
+- } else {
+- ctx->ring_sock->file = file;
+- }
+-#endif
+- return file;
+ }
+
+ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
+diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
+index 7ada0339b3870..ac658cfa89c63 100644
+--- a/io_uring/rsrc.c
++++ b/io_uring/rsrc.c
+@@ -494,11 +494,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
+ err = -EBADF;
+ break;
+ }
+- err = io_scm_file_account(ctx, file);
+- if (err) {
+- fput(file);
+- break;
+- }
+ *io_get_tag_slot(data, i) = tag;
+ io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
+@@ -762,22 +757,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
+ for (i = 0; i < ctx->nr_user_files; i++) {
+ struct file *file = io_file_from_index(&ctx->file_table, i);
+
+- /* skip scm accounted files, they'll be freed by ->ring_sock */
+- if (!file || io_file_need_scm(file))
++ if (!file)
+ continue;
+ io_file_bitmap_clear(&ctx->file_table, i);
+ fput(file);
+ }
+
+-#if defined(CONFIG_UNIX)
+- if (ctx->ring_sock) {
+- struct sock *sock = ctx->ring_sock->sk;
+- struct sk_buff *skb;
+-
+- while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
+- kfree_skb(skb);
+- }
+-#endif
+ io_free_file_tables(&ctx->file_table);
+ io_file_table_set_alloc_range(ctx, 0, 0);
+ io_rsrc_data_free(ctx->file_data);
+@@ -805,134 +790,11 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx)
+ return ret;
+ }
+
+-/*
+- * Ensure the UNIX gc is aware of our file set, so we are certain that
+- * the io_uring can be safely unregistered on process exit, even if we have
+- * loops in the file referencing. We account only files that can hold other
+- * files because otherwise they can't form a loop and so are not interesting
+- * for GC.
+- */
+-int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
+-{
+-#if defined(CONFIG_UNIX)
+- struct sock *sk = ctx->ring_sock->sk;
+- struct sk_buff_head *head = &sk->sk_receive_queue;
+- struct scm_fp_list *fpl;
+- struct sk_buff *skb;
+-
+- if (likely(!io_file_need_scm(file)))
+- return 0;
+-
+- /*
+- * See if we can merge this file into an existing skb SCM_RIGHTS
+- * file set. If there's no room, fall back to allocating a new skb
+- * and filling it in.
+- */
+- spin_lock_irq(&head->lock);
+- skb = skb_peek(head);
+- if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
+- __skb_unlink(skb, head);
+- else
+- skb = NULL;
+- spin_unlock_irq(&head->lock);
+-
+- if (!skb) {
+- fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
+- if (!fpl)
+- return -ENOMEM;
+-
+- skb = alloc_skb(0, GFP_KERNEL);
+- if (!skb) {
+- kfree(fpl);
+- return -ENOMEM;
+- }
+-
+- fpl->user = get_uid(current_user());
+- fpl->max = SCM_MAX_FD;
+- fpl->count = 0;
+-
+- UNIXCB(skb).fp = fpl;
+- skb->sk = sk;
+- skb->scm_io_uring = 1;
+- skb->destructor = unix_destruct_scm;
+- refcount_add(skb->truesize, &sk->sk_wmem_alloc);
+- }
+-
+- fpl = UNIXCB(skb).fp;
+- fpl->fp[fpl->count++] = get_file(file);
+- unix_inflight(fpl->user, file);
+- skb_queue_head(head, skb);
+- fput(file);
+-#endif
+- return 0;
+-}
+-
+ static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
+ {
+ struct file *file = prsrc->file;
+-#if defined(CONFIG_UNIX)
+- struct sock *sock = ctx->ring_sock->sk;
+- struct sk_buff_head list, *head = &sock->sk_receive_queue;
+- struct sk_buff *skb;
+- int i;
+-
+- if (!io_file_need_scm(file)) {
+- fput(file);
+- return;
+- }
+-
+- __skb_queue_head_init(&list);
+-
+- /*
+- * Find the skb that holds this file in its SCM_RIGHTS. When found,
+- * remove this entry and rearrange the file array.
+- */
+- skb = skb_dequeue(head);
+- while (skb) {
+- struct scm_fp_list *fp;
+
+- fp = UNIXCB(skb).fp;
+- for (i = 0; i < fp->count; i++) {
+- int left;
+-
+- if (fp->fp[i] != file)
+- continue;
+-
+- unix_notinflight(fp->user, fp->fp[i]);
+- left = fp->count - 1 - i;
+- if (left) {
+- memmove(&fp->fp[i], &fp->fp[i + 1],
+- left * sizeof(struct file *));
+- }
+- fp->count--;
+- if (!fp->count) {
+- kfree_skb(skb);
+- skb = NULL;
+- } else {
+- __skb_queue_tail(&list, skb);
+- }
+- fput(file);
+- file = NULL;
+- break;
+- }
+-
+- if (!file)
+- break;
+-
+- __skb_queue_tail(&list, skb);
+-
+- skb = skb_dequeue(head);
+- }
+-
+- if (skb_peek(&list)) {
+- spin_lock_irq(&head->lock);
+- while ((skb = __skb_dequeue(&list)) != NULL)
+- __skb_queue_tail(head, skb);
+- spin_unlock_irq(&head->lock);
+- }
+-#else
+ fput(file);
+-#endif
+ }
+
+ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+@@ -986,21 +848,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+ goto fail;
+
+ /*
+- * Don't allow io_uring instances to be registered. If UNIX
+- * isn't enabled, then this causes a reference cycle and this
+- * instance can never get freed. If UNIX is enabled we'll
+- * handle it just fine, but there's still no point in allowing
+- * a ring fd as it doesn't support regular read/write anyway.
++ * Don't allow io_uring instances to be registered.
+ */
+ if (io_is_uring_fops(file)) {
+ fput(file);
+ goto fail;
+ }
+- ret = io_scm_file_account(ctx, file);
+- if (ret) {
+- fput(file);
+- goto fail;
+- }
+ file_slot = io_fixed_file_slot(&ctx->file_table, i);
+ io_fixed_file_set(file_slot, file);
+ io_file_bitmap_set(&ctx->file_table, i);
+diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
+index acaf8dad05401..85f145607c620 100644
+--- a/io_uring/rsrc.h
++++ b/io_uring/rsrc.h
+@@ -77,21 +77,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx);
+ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned nr_args, u64 __user *tags);
+
+-int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file);
+-
+-static inline bool io_file_need_scm(struct file *filp)
+-{
+- return false;
+-}
+-
+-static inline int io_scm_file_account(struct io_ring_ctx *ctx,
+- struct file *file)
+-{
+- if (likely(!io_file_need_scm(file)))
+- return 0;
+- return __io_scm_file_account(ctx, file);
+-}
+-
+ int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned nr_args);
+ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
+--
+2.43.0
+
--- /dev/null
+From dc638a609ff754e234ca3900338628b94617e4a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Mar 2024 18:10:12 -0600
+Subject: io_uring/unix: drop usage of io_uring socket
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit a4104821ad651d8a0b374f0b2474c345bbb42f82 upstream.
+
+Since we no longer allow sending io_uring fds over SCM_RIGHTS, move to
+using io_is_uring_fops() to detect whether this is a io_uring fd or not.
+With that done, kill off io_uring_get_socket() as nobody calls it
+anymore.
+
+This is in preparation to yanking out the rest of the core related to
+unix gc with io_uring.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/io_uring.h | 10 +++++-----
+ io_uring/io_uring.c | 13 -------------
+ io_uring/io_uring.h | 1 -
+ net/core/scm.c | 2 +-
+ net/unix/scm.c | 4 +---
+ 5 files changed, 7 insertions(+), 23 deletions(-)
+
+diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
+index a1484cdb3158e..a8f3058448eaa 100644
+--- a/include/linux/io_uring.h
++++ b/include/linux/io_uring.h
+@@ -42,11 +42,11 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
+ unsigned issue_flags);
+ void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned));
+-struct sock *io_uring_get_socket(struct file *file);
+ void __io_uring_cancel(bool cancel_all);
+ void __io_uring_free(struct task_struct *tsk);
+ void io_uring_unreg_ringfd(void);
+ const char *io_uring_get_opcode(u8 opcode);
++bool io_is_uring_fops(struct file *file);
+
+ static inline void io_uring_files_cancel(void)
+ {
+@@ -71,6 +71,10 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
+ {
+ return -EOPNOTSUPP;
+ }
++static inline bool io_is_uring_fops(struct file *file)
++{
++ return false;
++}
+ static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+ ssize_t ret2, unsigned issue_flags)
+ {
+@@ -79,10 +83,6 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+ void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+ {
+ }
+-static inline struct sock *io_uring_get_socket(struct file *file)
+-{
+- return NULL;
+-}
+ static inline void io_uring_task_cancel(void)
+ {
+ }
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 35894955b4549..cf7dd62da0e37 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -153,19 +153,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx);
+
+ static struct kmem_cache *req_cachep;
+
+-struct sock *io_uring_get_socket(struct file *file)
+-{
+-#if defined(CONFIG_UNIX)
+- if (io_is_uring_fops(file)) {
+- struct io_ring_ctx *ctx = file->private_data;
+-
+- return ctx->ring_sock->sk;
+- }
+-#endif
+- return NULL;
+-}
+-EXPORT_SYMBOL(io_uring_get_socket);
+-
+ static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
+ {
+ if (!wq_list_empty(&ctx->submit_state.compl_reqs))
+diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
+index 019600570ee49..59e6f755f12c6 100644
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -52,7 +52,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req)
+ }
+
+ void __io_req_task_work_add(struct io_kiocb *req, bool allow_local);
+-bool io_is_uring_fops(struct file *file);
+ bool io_alloc_async_data(struct io_kiocb *req);
+ void io_req_task_queue(struct io_kiocb *req);
+ void io_queue_iowq(struct io_kiocb *req, bool *dont_use);
+diff --git a/net/core/scm.c b/net/core/scm.c
+index e762a4b8a1d22..a877c4ef4c256 100644
+--- a/net/core/scm.c
++++ b/net/core/scm.c
+@@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
+ if (fd < 0 || !(file = fget_raw(fd)))
+ return -EBADF;
+ /* don't allow io_uring files */
+- if (io_uring_get_socket(file)) {
++ if (io_is_uring_fops(file)) {
+ fput(file);
+ return -EINVAL;
+ }
+diff --git a/net/unix/scm.c b/net/unix/scm.c
+index e8e2a00bb0f58..d1048b4c2baaf 100644
+--- a/net/unix/scm.c
++++ b/net/unix/scm.c
+@@ -34,10 +34,8 @@ struct sock *unix_get_socket(struct file *filp)
+ /* PF_UNIX ? */
+ if (s && sock->ops && sock->ops->family == PF_UNIX)
+ u_sock = s;
+- } else {
+- /* Could be an io_uring instance */
+- u_sock = io_uring_get_socket(filp);
+ }
++
+ return u_sock;
+ }
+ EXPORT_SYMBOL(unix_get_socket);
+--
+2.43.0
+
--- /dev/null
+From 90a9cc9319044b9183d226527684e42547c5703c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 09:56:07 +0800
+Subject: md: fix data corruption for raid456 when reshape restart while grow
+ up
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ]
+
+Currently, if reshape is interrupted, echo "reshape" to sync_action will
+restart reshape from scratch, for example:
+
+echo frozen > sync_action
+echo reshape > sync_action
+
+This will corrupt data before reshape_position if the array is growing,
+fix the problem by continue reshape from reshape_position.
+
+Reported-by: Peter Neuwirth <reddunur@online.de>
+Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/md.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 846bdee4daa0e..1c87f3e708094 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -4903,11 +4903,21 @@ action_store(struct mddev *mddev, const char *page, size_t len)
+ return -EINVAL;
+ err = mddev_lock(mddev);
+ if (!err) {
+- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
++ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
+ err = -EBUSY;
+- else {
++ } else if (mddev->reshape_position == MaxSector ||
++ mddev->pers->check_reshape == NULL ||
++ mddev->pers->check_reshape(mddev)) {
+ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ err = mddev->pers->start_reshape(mddev);
++ } else {
++ /*
++ * If reshape is still in progress, and
++ * md_check_recovery() can continue to reshape,
++ * don't restart reshape because data can be
++ * corrupted for raid456.
++ */
++ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ }
+ mddev_unlock(mddev);
+ }
+--
+2.43.0
+
--- /dev/null
+From 5036866ba258e5dd80caf93c4ce92bf13eef7d82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 May 2023 21:11:00 +0800
+Subject: md/raid10: prevent soft lockup while flush writes
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ]
+
+Currently, there is no limit for raid1/raid10 plugged bio. While flushing
+writes, raid1 has cond_resched() while raid10 doesn't, and too many
+writes can cause soft lockup.
+
+Follow up soft lockup can be triggered easily with writeback test for
+raid10 with ramdisks:
+
+watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293]
+Call Trace:
+ <TASK>
+ call_rcu+0x16/0x20
+ put_object+0x41/0x80
+ __delete_object+0x50/0x90
+ delete_object_full+0x2b/0x40
+ kmemleak_free+0x46/0xa0
+ slab_free_freelist_hook.constprop.0+0xed/0x1a0
+ kmem_cache_free+0xfd/0x300
+ mempool_free_slab+0x1f/0x30
+ mempool_free+0x3a/0x100
+ bio_free+0x59/0x80
+ bio_put+0xcf/0x2c0
+ free_r10bio+0xbf/0xf0
+ raid_end_bio_io+0x78/0xb0
+ one_write_done+0x8a/0xa0
+ raid10_end_write_request+0x1b4/0x430
+ bio_endio+0x175/0x320
+ brd_submit_bio+0x3b9/0x9b7 [brd]
+ __submit_bio+0x69/0xe0
+ submit_bio_noacct_nocheck+0x1e6/0x5a0
+ submit_bio_noacct+0x38c/0x7e0
+ flush_pending_writes+0xf0/0x240
+ raid10d+0xac/0x1ed0
+
+Fix the problem by adding cond_resched() to raid10 like what raid1 did.
+
+Note that unlimited plugged bio still need to be optimized, for example,
+in the case of lots of dirty pages writeback, this will take lots of
+memory and io will spend a long time in plug, hence io latency is bad.
+
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/raid10.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 7b318e7e8d459..009f7ffe4e10c 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -920,6 +920,7 @@ static void flush_pending_writes(struct r10conf *conf)
+
+ raid1_submit_write(bio);
+ bio = next;
++ cond_resched();
+ }
+ blk_finish_plug(&plug);
+ } else
+@@ -1130,6 +1131,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
+
+ raid1_submit_write(bio);
+ bio = next;
++ cond_resched();
+ }
+ kfree(plug);
+ }
+--
+2.43.0
+
--- /dev/null
+From c64e15be4c1424636df5a25108b846072809d0d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Jun 2023 10:13:39 -0400
+Subject: NFSD: Add an nfsd4_encode_nfstime4() helper
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 262176798b18b12fd8ab84c94cfece0a6a652476 ]
+
+Clean up: de-duplicate some common code.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Tom Talpey <tom@talpey.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 26 insertions(+), 20 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 597f14a80512f..514f4456cf5c6 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+ return p;
+ }
+
++static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
++ struct timespec64 *tv)
++{
++ __be32 *p;
++
++ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
++ if (!p)
++ return nfserr_resource;
++
++ p = xdr_encode_hyper(p, (s64)tv->tv_sec);
++ *p = cpu_to_be32(tv->tv_nsec);
++ return nfs_ok;
++}
++
+ /*
+ * ctime (in NFSv4, time_metadata) is not writeable, and the client
+ * doesn't really care what resolution could theoretically be stored by
+@@ -3346,11 +3360,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ p = xdr_encode_hyper(p, dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
+- *p++ = cpu_to_be32(stat.atime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.atime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+ p = xdr_reserve_space(xdr, 12);
+@@ -3359,25 +3371,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ p = encode_time_delta(p, d_inode(dentry));
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
+- *p++ = cpu_to_be32(stat.ctime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
+- *p++ = cpu_to_be32(stat.mtime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
+- *p++ = cpu_to_be32(stat.btime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.btime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ u64 ino = stat.ino;
+--
+2.43.0
+
--- /dev/null
+From fd22589985ab174696c30d86be0cdea35b00176b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:12 -0500
+Subject: nfsd: add some comments to nfsd_file_do_acquire
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b680cb9b737331aad271feebbedafb865504e234 ]
+
+David Howells mentioned that he found this bit of code confusing, so
+sprinkle in some comments to clarify.
+
+Reported-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index ab37b85b72077..50349449a4e52 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1094,6 +1094,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ rcu_read_unlock();
+
+ if (nf) {
++ /*
++ * If the nf is on the LRU then it holds an extra reference
++ * that must be put if it's removed. It had better not be
++ * the last one however, since we should hold another.
++ */
+ if (nfsd_file_lru_remove(nf))
+ WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
+ goto wait_for_construction;
+--
+2.43.0
+
--- /dev/null
+From 84897a6adc1d73b3d75791c1336ca59ca96bded8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:33:47 -0500
+Subject: nfsd: allow nfsd_file_get to sanely handle a NULL pointer
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 70f62231cdfd52357836733dd31db787e0412ab2 ]
+
+...and remove some now-useless NULL pointer checks in its callers.
+
+Suggested-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 5 ++---
+ fs/nfsd/nfs4state.c | 4 +---
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 697acf5c3c681..6e8712bd7c998 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -431,7 +431,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf)
+ struct nfsd_file *
+ nfsd_file_get(struct nfsd_file *nf)
+ {
+- if (likely(refcount_inc_not_zero(&nf->nf_ref)))
++ if (nf && refcount_inc_not_zero(&nf->nf_ref))
+ return nf;
+ return NULL;
+ }
+@@ -1086,8 +1086,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ rcu_read_lock();
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
+- if (nf)
+- nf = nfsd_file_get(nf);
++ nf = nfsd_file_get(nf);
+ rcu_read_unlock();
+
+ if (nf) {
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index b9d694ec25d19..e4522e86e984e 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -602,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ static struct nfsd_file *
+ __nfs4_get_fd(struct nfs4_file *f, int oflag)
+ {
+- if (f->fi_fds[oflag])
+- return nfsd_file_get(f->fi_fds[oflag]);
+- return NULL;
++ return nfsd_file_get(f->fi_fds[oflag]);
+ }
+
+ static struct nfsd_file *
+--
+2.43.0
+
--- /dev/null
+From ac9a9f41a15c31910d32eee697d7fab55053c493 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 06:53:54 -0500
+Subject: nfsd: allow reaping files still under writeback
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit dcb779fcd4ed5984ad15991d574943d12a8693d1 ]
+
+On most filesystems, there is no reason to delay reaping an nfsd_file
+just because its underlying inode is still under writeback. nfsd just
+relies on client activity or the local flusher threads to do writeback.
+
+The main exception is NFS, which flushes all of its dirty data on last
+close. Add a new EXPORT_OP_FLUSH_ON_CLOSE flag to allow filesystems to
+signal that they do this, and only skip closing files under writeback on
+such filesystems.
+
+Also, remove a redundant NULL file pointer check in
+nfsd_file_check_writeback, and clean up nfs's export op flag
+definitions.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+[ cel: adjusted to apply to v6.1.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/export.c | 9 ++++++---
+ fs/nfsd/filecache.c | 12 +++++++++++-
+ include/linux/exportfs.h | 1 +
+ 3 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfs/export.c b/fs/nfs/export.c
+index 01596f2d0a1ed..9fe9586a51b71 100644
+--- a/fs/nfs/export.c
++++ b/fs/nfs/export.c
+@@ -156,7 +156,10 @@ const struct export_operations nfs_export_ops = {
+ .fh_to_dentry = nfs_fh_to_dentry,
+ .get_parent = nfs_get_parent,
+ .fetch_iversion = nfs_fetch_iversion,
+- .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
+- EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
+- EXPORT_OP_NOATOMIC_ATTR,
++ .flags = EXPORT_OP_NOWCC |
++ EXPORT_OP_NOSUBTREECHK |
++ EXPORT_OP_CLOSE_BEFORE_UNLINK |
++ EXPORT_OP_REMOTE_FS |
++ EXPORT_OP_NOATOMIC_ATTR |
++ EXPORT_OP_FLUSH_ON_CLOSE,
+ };
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 9b7082fdd2115..a6fa6e9802772 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -402,13 +402,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
+ struct file *file = nf->nf_file;
+ struct address_space *mapping;
+
+- if (!file || !(file->f_mode & FMODE_WRITE))
++ /* File not open for write? */
++ if (!(file->f_mode & FMODE_WRITE))
+ return false;
++
++ /*
++ * Some filesystems (e.g. NFS) flush all dirty data on close.
++ * On others, there is no need to wait for writeback.
++ */
++ if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
++ return false;
++
+ mapping = file->f_mapping;
+ return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+ }
+
++
+ static bool nfsd_file_lru_add(struct nfsd_file *nf)
+ {
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
+index fe848901fcc3a..218fc5c54e901 100644
+--- a/include/linux/exportfs.h
++++ b/include/linux/exportfs.h
+@@ -221,6 +221,7 @@ struct export_operations {
+ #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
+ atomic attribute updates
+ */
++#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */
+ unsigned long flags;
+ };
+
+--
+2.43.0
+
--- /dev/null
+From e0179f3964a7b79117b258fcdf461385cf2f7d62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 15:09:04 -0500
+Subject: NFSD: Convert filecache to rhltable
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c4c649ab413ba6a785b25f0edbb12f617c87db2a ]
+
+While we were converting the nfs4_file hashtable to use the kernel's
+resizable hashtable data structure, Neil Brown observed that the
+list variant (rhltable) would be better for managing nfsd_file items
+as well. The nfsd_file hash table will contain multiple entries for
+the same inode -- these should be kept together on a list. And, it
+could be possible for exotic or malicious client behavior to cause
+the hash table to resize itself on every insertion.
+
+A nice simplification is that rhltable_lookup() can return a list
+that contains only nfsd_file items that match a given inode, which
+enables us to eliminate specialized hash table helper functions and
+use the default functions provided by the rhashtable implementation).
+
+Since we are now storing nfsd_file items for the same inode on a
+single list, that effectively reduces the number of hash entries
+that have to be tracked in the hash table. The mininum bucket count
+is therefore lowered.
+
+Light testing with fstests generic/531 show no regressions.
+
+Suggested-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 311 ++++++++++++++++++--------------------------
+ fs/nfsd/filecache.h | 9 +-
+ 2 files changed, 133 insertions(+), 187 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index a6fa6e9802772..2f0b2d964cbb1 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -74,70 +74,9 @@ static struct list_lru nfsd_file_lru;
+ static unsigned long nfsd_file_flags;
+ static struct fsnotify_group *nfsd_file_fsnotify_group;
+ static struct delayed_work nfsd_filecache_laundrette;
+-static struct rhashtable nfsd_file_rhash_tbl
++static struct rhltable nfsd_file_rhltable
+ ____cacheline_aligned_in_smp;
+
+-enum nfsd_file_lookup_type {
+- NFSD_FILE_KEY_INODE,
+- NFSD_FILE_KEY_FULL,
+-};
+-
+-struct nfsd_file_lookup_key {
+- struct inode *inode;
+- struct net *net;
+- const struct cred *cred;
+- unsigned char need;
+- bool gc;
+- enum nfsd_file_lookup_type type;
+-};
+-
+-/*
+- * The returned hash value is based solely on the address of an in-code
+- * inode, a pointer to a slab-allocated object. The entropy in such a
+- * pointer is concentrated in its middle bits.
+- */
+-static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
+-{
+- unsigned long ptr = (unsigned long)inode;
+- u32 k;
+-
+- k = ptr >> L1_CACHE_SHIFT;
+- k &= 0x00ffffff;
+- return jhash2(&k, 1, seed);
+-}
+-
+-/**
+- * nfsd_file_key_hashfn - Compute the hash value of a lookup key
+- * @data: key on which to compute the hash value
+- * @len: rhash table's key_len parameter (unused)
+- * @seed: rhash table's random seed of the day
+- *
+- * Return value:
+- * Computed 32-bit hash value
+- */
+-static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
+-{
+- const struct nfsd_file_lookup_key *key = data;
+-
+- return nfsd_file_inode_hash(key->inode, seed);
+-}
+-
+-/**
+- * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
+- * @data: object on which to compute the hash value
+- * @len: rhash table's key_len parameter (unused)
+- * @seed: rhash table's random seed of the day
+- *
+- * Return value:
+- * Computed 32-bit hash value
+- */
+-static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
+-{
+- const struct nfsd_file *nf = data;
+-
+- return nfsd_file_inode_hash(nf->nf_inode, seed);
+-}
+-
+ static bool
+ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+ {
+@@ -158,55 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+ return true;
+ }
+
+-/**
+- * nfsd_file_obj_cmpfn - Match a cache item against search criteria
+- * @arg: search criteria
+- * @ptr: cache item to check
+- *
+- * Return values:
+- * %0 - Item matches search criteria
+- * %1 - Item does not match search criteria
+- */
+-static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+- const void *ptr)
+-{
+- const struct nfsd_file_lookup_key *key = arg->key;
+- const struct nfsd_file *nf = ptr;
+-
+- switch (key->type) {
+- case NFSD_FILE_KEY_INODE:
+- if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+- return 1;
+- if (nf->nf_inode != key->inode)
+- return 1;
+- break;
+- case NFSD_FILE_KEY_FULL:
+- if (nf->nf_inode != key->inode)
+- return 1;
+- if (nf->nf_may != key->need)
+- return 1;
+- if (nf->nf_net != key->net)
+- return 1;
+- if (!nfsd_match_cred(nf->nf_cred, key->cred))
+- return 1;
+- if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+- return 1;
+- if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+- return 1;
+- break;
+- }
+- return 0;
+-}
+-
+ static const struct rhashtable_params nfsd_file_rhash_params = {
+ .key_len = sizeof_field(struct nfsd_file, nf_inode),
+ .key_offset = offsetof(struct nfsd_file, nf_inode),
+- .head_offset = offsetof(struct nfsd_file, nf_rhash),
+- .hashfn = nfsd_file_key_hashfn,
+- .obj_hashfn = nfsd_file_obj_hashfn,
+- .obj_cmpfn = nfsd_file_obj_cmpfn,
+- /* Reduce resizing churn on light workloads */
+- .min_size = 512, /* buckets */
++ .head_offset = offsetof(struct nfsd_file, nf_rlist),
++
++ /*
++ * Start with a single page hash table to reduce resizing churn
++ * on light workloads.
++ */
++ .min_size = 256,
+ .automatic_shrinking = true,
+ };
+
+@@ -309,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
+ }
+
+ static struct nfsd_file *
+-nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
++nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
++ bool want_gc)
+ {
+ struct nfsd_file *nf;
+
+ nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+- if (nf) {
+- INIT_LIST_HEAD(&nf->nf_lru);
+- nf->nf_birthtime = ktime_get();
+- nf->nf_file = NULL;
+- nf->nf_cred = get_current_cred();
+- nf->nf_net = key->net;
+- nf->nf_flags = 0;
+- __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+- __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+- if (key->gc)
+- __set_bit(NFSD_FILE_GC, &nf->nf_flags);
+- nf->nf_inode = key->inode;
+- refcount_set(&nf->nf_ref, 1);
+- nf->nf_may = key->need;
+- nf->nf_mark = NULL;
+- }
++ if (unlikely(!nf))
++ return NULL;
++
++ INIT_LIST_HEAD(&nf->nf_lru);
++ nf->nf_birthtime = ktime_get();
++ nf->nf_file = NULL;
++ nf->nf_cred = get_current_cred();
++ nf->nf_net = net;
++ nf->nf_flags = want_gc ?
++ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
++ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
++ nf->nf_inode = inode;
++ refcount_set(&nf->nf_ref, 1);
++ nf->nf_may = need;
++ nf->nf_mark = NULL;
+ return nf;
+ }
+
+@@ -354,8 +254,8 @@ static void
+ nfsd_file_hash_remove(struct nfsd_file *nf)
+ {
+ trace_nfsd_file_unhash(nf);
+- rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
+- nfsd_file_rhash_params);
++ rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
++ nfsd_file_rhash_params);
+ }
+
+ static bool
+@@ -688,8 +588,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ * @inode: inode on which to close out nfsd_files
+ * @dispose: list on which to gather nfsd_files to close out
+ *
+- * An nfsd_file represents a struct file being held open on behalf of nfsd. An
+- * open file however can block other activity (such as leases), or cause
++ * An nfsd_file represents a struct file being held open on behalf of nfsd.
++ * An open file however can block other activity (such as leases), or cause
+ * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
+ *
+ * This function is intended to find open nfsd_files when this sort of
+@@ -702,21 +602,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ static void
+ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+ {
+- struct nfsd_file_lookup_key key = {
+- .type = NFSD_FILE_KEY_INODE,
+- .inode = inode,
+- .gc = true,
+- };
++ struct rhlist_head *tmp, *list;
+ struct nfsd_file *nf;
+
+ rcu_read_lock();
+- do {
+- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+- nfsd_file_rhash_params);
+- if (!nf)
+- break;
++ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++ nfsd_file_rhash_params);
++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
++ if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
++ continue;
+ nfsd_file_cond_queue(nf, dispose);
+- } while (1);
++ }
+ rcu_read_unlock();
+ }
+
+@@ -840,7 +736,7 @@ nfsd_file_cache_init(void)
+ if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+ return 0;
+
+- ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
++ ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
+ if (ret)
+ return ret;
+
+@@ -908,7 +804,7 @@ nfsd_file_cache_init(void)
+ nfsd_file_mark_slab = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+- rhashtable_destroy(&nfsd_file_rhash_tbl);
++ rhltable_destroy(&nfsd_file_rhltable);
+ goto out;
+ }
+
+@@ -927,7 +823,7 @@ __nfsd_file_cache_purge(struct net *net)
+ struct nfsd_file *nf;
+ LIST_HEAD(dispose);
+
+- rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
++ rhltable_walk_enter(&nfsd_file_rhltable, &iter);
+ do {
+ rhashtable_walk_start(&iter);
+
+@@ -1033,7 +929,7 @@ nfsd_file_cache_shutdown(void)
+ nfsd_file_mark_slab = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+- rhashtable_destroy(&nfsd_file_rhash_tbl);
++ rhltable_destroy(&nfsd_file_rhltable);
+
+ for_each_possible_cpu(i) {
+ per_cpu(nfsd_file_cache_hits, i) = 0;
+@@ -1044,6 +940,35 @@ nfsd_file_cache_shutdown(void)
+ }
+ }
+
++static struct nfsd_file *
++nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
++ struct inode *inode, unsigned char need,
++ bool want_gc)
++{
++ struct rhlist_head *tmp, *list;
++ struct nfsd_file *nf;
++
++ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++ nfsd_file_rhash_params);
++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
++ if (nf->nf_may != need)
++ continue;
++ if (nf->nf_net != net)
++ continue;
++ if (!nfsd_match_cred(nf->nf_cred, cred))
++ continue;
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
++ continue;
++ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
++ continue;
++
++ if (!nfsd_file_get(nf))
++ continue;
++ return nf;
++ }
++ return NULL;
++}
++
+ /**
+ * nfsd_file_is_cached - are there any cached open files for this inode?
+ * @inode: inode to check
+@@ -1058,16 +983,20 @@ nfsd_file_cache_shutdown(void)
+ bool
+ nfsd_file_is_cached(struct inode *inode)
+ {
+- struct nfsd_file_lookup_key key = {
+- .type = NFSD_FILE_KEY_INODE,
+- .inode = inode,
+- .gc = true,
+- };
++ struct rhlist_head *tmp, *list;
++ struct nfsd_file *nf;
+ bool ret = false;
+
+- if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
+- nfsd_file_rhash_params) != NULL)
+- ret = true;
++ rcu_read_lock();
++ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++ nfsd_file_rhash_params);
++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
++ ret = true;
++ break;
++ }
++ rcu_read_unlock();
++
+ trace_nfsd_file_is_cached(inode, (int)ret);
+ return ret;
+ }
+@@ -1077,14 +1006,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf, bool want_gc)
+ {
+- struct nfsd_file_lookup_key key = {
+- .type = NFSD_FILE_KEY_FULL,
+- .need = may_flags & NFSD_FILE_MAY_MASK,
+- .net = SVC_NET(rqstp),
+- .gc = want_gc,
+- };
++ unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
++ struct net *net = SVC_NET(rqstp);
++ struct nfsd_file *new, *nf;
++ const struct cred *cred;
+ bool open_retry = true;
+- struct nfsd_file *nf;
++ struct inode *inode;
+ __be32 status;
+ int ret;
+
+@@ -1092,14 +1019,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ if (status != nfs_ok)
+ return status;
+- key.inode = d_inode(fhp->fh_dentry);
+- key.cred = get_current_cred();
++ inode = d_inode(fhp->fh_dentry);
++ cred = get_current_cred();
+
+ retry:
+ rcu_read_lock();
+- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+- nfsd_file_rhash_params);
+- nf = nfsd_file_get(nf);
++ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
+ rcu_read_unlock();
+
+ if (nf) {
+@@ -1113,21 +1038,32 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto wait_for_construction;
+ }
+
+- nf = nfsd_file_alloc(&key, may_flags);
+- if (!nf) {
++ new = nfsd_file_alloc(net, inode, need, want_gc);
++ if (!new) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+
+- ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
+- &key, &nf->nf_rhash,
+- nfsd_file_rhash_params);
++ rcu_read_lock();
++ spin_lock(&inode->i_lock);
++ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
++ if (unlikely(nf)) {
++ spin_unlock(&inode->i_lock);
++ rcu_read_unlock();
++ nfsd_file_slab_free(&new->nf_rcu);
++ goto wait_for_construction;
++ }
++ nf = new;
++ ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
++ nfsd_file_rhash_params);
++ spin_unlock(&inode->i_lock);
++ rcu_read_unlock();
+ if (likely(ret == 0))
+ goto open_file;
+
+ if (ret == -EEXIST)
+ goto retry;
+- trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
++ trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
+ status = nfserr_jukebox;
+ goto construction_err;
+
+@@ -1136,7 +1072,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ /* Did construction of this file fail? */
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+- trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
++ trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
+ if (!open_retry) {
+ status = nfserr_jukebox;
+ goto construction_err;
+@@ -1158,13 +1094,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ nfsd_file_check_write_error(nf);
+ *pnf = nf;
+ }
+- put_cred(key.cred);
+- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
++ put_cred(cred);
++ trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
+ return status;
+
+ open_file:
+ trace_nfsd_file_alloc(nf);
+- nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
++ nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
+ if (nf->nf_mark) {
+ if (file) {
+ get_file(file);
+@@ -1182,7 +1118,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * If construction failed, or we raced with a call to unlink()
+ * then unhash.
+ */
+- if (status == nfs_ok && key.inode->i_nlink == 0)
++ if (status != nfs_ok || inode->i_nlink == 0)
+ status = nfserr_jukebox;
+ if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+@@ -1209,8 +1145,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * seconds after the final nfsd_file_put() in case the caller
+ * wants to re-use it.
+ *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ * %nfs_ok - @pnf points to an nfsd_file with its reference
++ * count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+ */
+ __be32
+ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1230,8 +1169,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put().
+ *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ * %nfs_ok - @pnf points to an nfsd_file with its reference
++ * count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+ */
+ __be32
+ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1252,8 +1194,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+ * opening a new one.
+ *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ * %nfs_ok - @pnf points to an nfsd_file with its reference
++ * count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+ */
+ __be32
+ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1284,7 +1229,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ lru = list_lru_count(&nfsd_file_lru);
+
+ rcu_read_lock();
+- ht = &nfsd_file_rhash_tbl;
++ ht = &nfsd_file_rhltable.ht;
+ count = atomic_read(&ht->nelems);
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ buckets = tbl->size;
+@@ -1300,7 +1245,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ evictions += per_cpu(nfsd_file_evictions, i);
+ }
+
+- seq_printf(m, "total entries: %u\n", count);
++ seq_printf(m, "total inodes: %u\n", count);
+ seq_printf(m, "hash buckets: %u\n", buckets);
+ seq_printf(m, "lru entries: %lu\n", lru);
+ seq_printf(m, "cache hits: %lu\n", hits);
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 41516a4263ea5..e54165a3224f0 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -29,9 +29,8 @@ struct nfsd_file_mark {
+ * never be dereferenced, only used for comparison.
+ */
+ struct nfsd_file {
+- struct rhash_head nf_rhash;
+- struct list_head nf_lru;
+- struct rcu_head nf_rcu;
++ struct rhlist_head nf_rlist;
++ void *nf_inode;
+ struct file *nf_file;
+ const struct cred *nf_cred;
+ struct net *nf_net;
+@@ -40,10 +39,12 @@ struct nfsd_file {
+ #define NFSD_FILE_REFERENCED (2)
+ #define NFSD_FILE_GC (3)
+ unsigned long nf_flags;
+- struct inode *nf_inode; /* don't deref */
+ refcount_t nf_ref;
+ unsigned char nf_may;
++
+ struct nfsd_file_mark *nf_mark;
++ struct list_head nf_lru;
++ struct rcu_head nf_rcu;
+ ktime_t nf_birthtime;
+ };
+
+--
+2.43.0
+
--- /dev/null
+From 3ffb64192436277690d09cc004c73101b7734148 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:11 -0500
+Subject: nfsd: don't kill nfsd_files because of lease break error
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit c6593366c0bf222be9c7561354dfb921c611745e ]
+
+An error from break_lease is non-fatal, so we needn't destroy the
+nfsd_file in that case. Just put the reference like we normally would
+and return the error.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 568963b8a4777..ab37b85b72077 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1102,7 +1102,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ nf = nfsd_file_alloc(&key, may_flags);
+ if (!nf) {
+ status = nfserr_jukebox;
+- goto out_status;
++ goto out;
+ }
+
+ ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
+@@ -1111,13 +1111,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (likely(ret == 0))
+ goto open_file;
+
+- nfsd_file_slab_free(&nf->nf_rcu);
+- nf = NULL;
+ if (ret == -EEXIST)
+ goto retry;
+ trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
+ status = nfserr_jukebox;
+- goto out_status;
++ goto construction_err;
+
+ wait_for_construction:
+ wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+@@ -1127,29 +1125,25 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
+ if (!open_retry) {
+ status = nfserr_jukebox;
+- goto out;
++ goto construction_err;
+ }
+ open_retry = false;
+- if (refcount_dec_and_test(&nf->nf_ref))
+- nfsd_file_free(nf);
+ goto retry;
+ }
+-
+ this_cpu_inc(nfsd_file_cache_hits);
+
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
++ if (status != nfs_ok) {
++ nfsd_file_put(nf);
++ nf = NULL;
++ }
++
+ out:
+ if (status == nfs_ok) {
+ this_cpu_inc(nfsd_file_acquisitions);
+ nfsd_file_check_write_error(nf);
+ *pnf = nf;
+- } else {
+- if (refcount_dec_and_test(&nf->nf_ref))
+- nfsd_file_free(nf);
+- nf = NULL;
+ }
+-
+-out_status:
+ put_cred(key.cred);
+ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ return status;
+@@ -1179,6 +1173,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
++ if (status == nfs_ok)
++ goto out;
++
++construction_err:
++ if (refcount_dec_and_test(&nf->nf_ref))
++ nfsd_file_free(nf);
++ nf = NULL;
+ goto out;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From f9a7567d2186c97f9e03d5d7ed90c33431a472ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:09 -0500
+Subject: nfsd: don't open-code clear_and_wake_up_bit
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b8bea9f6cdd7236c7c2238d022145e9b2f8aac22 ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 6e8712bd7c998..5b5d39ec7b010 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1174,9 +1174,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ status = nfserr_jukebox;
+ if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+- clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+- smp_mb__after_atomic();
+- wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
++ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ goto out;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 51d7b689d98814aec1c0dee5d591bb57eecb47d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jan 2023 12:31:37 -0500
+Subject: nfsd: don't take/put an extra reference when putting a file
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b2ff1bd71db2a1b193a6dde0845adcd69cbcf75e ]
+
+The last thing that filp_close does is an fput, so don't bother taking
+and putting the extra reference.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 50349449a4e52..51e2947c21a7d 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -382,10 +382,8 @@ nfsd_file_free(struct nfsd_file *nf)
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+- get_file(nf->nf_file);
+- filp_close(nf->nf_file, NULL);
+ nfsd_file_check_write_error(nf);
+- fput(nf->nf_file);
++ filp_close(nf->nf_file, NULL);
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From a90a656e0be7aca2852443302021edca6b6c014a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Jun 2023 17:09:06 -0400
+Subject: nfsd: Fix creation time serialization order
+
+From: Tavian Barnes <tavianator@tavianator.com>
+
+In nfsd4_encode_fattr(), TIME_CREATE was being written out after all
+other times. However, they should be written out in an order that
+matches the bit flags in bmval1, which in this case are
+
+ #define FATTR4_WORD1_TIME_ACCESS (1UL << 15)
+ #define FATTR4_WORD1_TIME_CREATE (1UL << 18)
+ #define FATTR4_WORD1_TIME_DELTA (1UL << 19)
+ #define FATTR4_WORD1_TIME_METADATA (1UL << 20)
+ #define FATTR4_WORD1_TIME_MODIFY (1UL << 21)
+
+so TIME_CREATE should come second.
+
+I noticed this on a FreeBSD NFSv4.2 client, which supports creation
+times. On this client, file times were weirdly permuted. With this
+patch applied on the server, times looked normal on the client.
+
+Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute")
+Link: https://unix.stackexchange.com/q/749605/56202
+Signed-off-by: Tavian Barnes <tavianator@tavianator.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfs4xdr.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 514f4456cf5c6..4ed9fef14adc2 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3364,6 +3364,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ if (status)
+ goto out;
+ }
++ if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
++ status = nfsd4_encode_nfstime4(xdr, &stat.btime);
++ if (status)
++ goto out;
++ }
+ if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
+@@ -3380,11 +3385,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ if (status)
+ goto out;
+ }
+- if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+- status = nfsd4_encode_nfstime4(xdr, &stat.btime);
+- if (status)
+- goto out;
+- }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ u64 ino = stat.ino;
+
+--
+2.43.0
+
--- /dev/null
+From a756d96c306e659f97ceacf72814028f0d6342fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Apr 2023 10:53:18 -0700
+Subject: NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 147abcacee33781e75588869e944ddb07528a897 ]
+
+The following request sequence to the same file causes the NFS client and
+server getting into an infinite loop with COMMIT and NFS4ERR_DELAY:
+
+OPEN
+REMOVE
+WRITE
+COMMIT
+
+Problem reported by recall11, recall12, recall14, recall20, recall22,
+recall40, recall42, recall48, recall50 of nfstest suite.
+
+This patch restores the handling of race condition in nfsd_file_do_acquire
+with unlink to that prior of the regression.
+
+Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache")
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index f40d8f3b35a4c..ee9c923192e08 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1099,8 +1099,6 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * then unhash.
+ */
+ if (status != nfs_ok || inode->i_nlink == 0)
+- status = nfserr_jukebox;
+- if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ if (status == nfs_ok)
+--
+2.43.0
+
--- /dev/null
+From f494a5af1d58eb2bba67f9dfe869001e4b7f8ee9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:39:00 -0500
+Subject: nfsd: NFSD_FILE_KEY_INODE only needs to find GC'ed entries
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 6c31e4c98853a4ba47355ea151b36a77c42b7734 ]
+
+Since v4 files are expected to be long-lived, there's little value in
+closing them out of the cache when there is conflicting access.
+
+Change the comparator to also match the gc value in the key. Change both
+of the current users of that key to set the gc value in the key to
+"true".
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 5b5d39ec7b010..c36e3032d4386 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -175,6 +175,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+
+ switch (key->type) {
+ case NFSD_FILE_KEY_INODE:
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
++ return 1;
+ if (nf->nf_inode != key->inode)
+ return 1;
+ break;
+@@ -695,6 +697,7 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
++ .gc = true,
+ };
+ struct nfsd_file *nf;
+
+@@ -1049,6 +1052,7 @@ nfsd_file_is_cached(struct inode *inode)
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
++ .gc = true,
+ };
+ bool ret = false;
+
+--
+2.43.0
+
--- /dev/null
+From 8832f93342b22e70fa14c6a809aedfe115a37007 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:39:01 -0500
+Subject: nfsd: simplify test_bit return in NFSD_FILE_KEY_FULL comparator
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit d69b8dbfd0866abc5ec84652cc1c10fc3d4d91ef ]
+
+test_bit returns bool, so we can just compare the result of that to the
+key->gc value without the "!!".
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index c36e3032d4386..568963b8a4777 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -189,7 +189,7 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ return 1;
+ if (!nfsd_match_cred(nf->nf_cred, key->cred))
+ return 1;
+- if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+ return 1;
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+ return 1;
+--
+2.43.0
+
--- /dev/null
+From b6ea58695aec8f2b36a931723f6630cdc3ac409f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Apr 2023 17:31:44 -0400
+Subject: nfsd: simplify the delayed disposal list code
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 92e4a6733f922f0fef1d0995f7b2d0eaff86c7ea ]
+
+When queueing a dispose list to the appropriate "freeme" lists, it
+pointlessly queues the objects one at a time to an intermediate list.
+
+Remove a few helpers and just open code a list_move to make it more
+clear and efficient. Better document the resulting functions with
+kerneldoc comments.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 64 ++++++++++++++++-----------------------------
+ 1 file changed, 22 insertions(+), 42 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 2f0b2d964cbb1..f40d8f3b35a4c 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -402,49 +402,26 @@ nfsd_file_dispose_list(struct list_head *dispose)
+ }
+ }
+
+-static void
+-nfsd_file_list_remove_disposal(struct list_head *dst,
+- struct nfsd_fcache_disposal *l)
+-{
+- spin_lock(&l->lock);
+- list_splice_init(&l->freeme, dst);
+- spin_unlock(&l->lock);
+-}
+-
+-static void
+-nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+-{
+- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+- struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+-
+- spin_lock(&l->lock);
+- list_splice_tail_init(files, &l->freeme);
+- spin_unlock(&l->lock);
+- queue_work(nfsd_filecache_wq, &l->work);
+-}
+-
+-static void
+-nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+- struct net *net)
+-{
+- struct nfsd_file *nf, *tmp;
+-
+- list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+- if (nf->nf_net == net)
+- list_move_tail(&nf->nf_lru, dst);
+- }
+-}
+-
++/**
++ * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
++ * @dispose: list of nfsd_files to be disposed
++ *
++ * Transfers each file to the "freeme" list for its nfsd_net, to eventually
++ * be disposed of by the per-net garbage collector.
++ */
+ static void
+ nfsd_file_dispose_list_delayed(struct list_head *dispose)
+ {
+- LIST_HEAD(list);
+- struct nfsd_file *nf;
+-
+ while(!list_empty(dispose)) {
+- nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+- nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+- nfsd_file_list_add_disposal(&list, nf->nf_net);
++ struct nfsd_file *nf = list_first_entry(dispose,
++ struct nfsd_file, nf_lru);
++ struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
++ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
++
++ spin_lock(&l->lock);
++ list_move_tail(&nf->nf_lru, &l->freeme);
++ spin_unlock(&l->lock);
++ queue_work(nfsd_filecache_wq, &l->work);
+ }
+ }
+
+@@ -665,8 +642,8 @@ nfsd_file_close_inode_sync(struct inode *inode)
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+- * Walk the LRU list and destroy any entries that have not been used since
+- * the last scan.
++ * Scrape the freeme list for this nfsd_net, and then dispose of them
++ * all.
+ */
+ static void
+ nfsd_file_delayed_close(struct work_struct *work)
+@@ -675,7 +652,10 @@ nfsd_file_delayed_close(struct work_struct *work)
+ struct nfsd_fcache_disposal *l = container_of(work,
+ struct nfsd_fcache_disposal, work);
+
+- nfsd_file_list_remove_disposal(&head, l);
++ spin_lock(&l->lock);
++ list_splice_init(&l->freeme, &head);
++ spin_unlock(&l->lock);
++
+ nfsd_file_dispose_list(&head);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 40fe2393e752ab49b382d502c2f9ae0d31e4ac19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Jan 2023 12:21:16 -0500
+Subject: nfsd: update comment over __nfsd_file_cache_purge
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 972cc0e0924598cb293b919d39c848dc038b2c28 ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/filecache.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 51e2947c21a7d..9b7082fdd2115 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -907,7 +907,8 @@ nfsd_file_cache_init(void)
+ * @net: net-namespace to shut down the cache (may be NULL)
+ *
+ * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
+- * then close out everything. Called when an nfsd instance is being shut down.
++ * then close out everything. Called when an nfsd instance is being shut down,
++ * and when the exports table is flushed.
+ */
+ static void
+ __nfsd_file_cache_purge(struct net *net)
+--
+2.43.0
+
--- /dev/null
+md-fix-data-corruption-for-raid456-when-reshape-rest.patch
+md-raid10-prevent-soft-lockup-while-flush-writes.patch
+io_uring-unix-drop-usage-of-io_uring-socket.patch
+io_uring-drop-any-code-related-to-scm_rights.patch
+nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch
+nfsd-don-t-open-code-clear_and_wake_up_bit.patch
+nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch
+nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch
+nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch
+nfsd-add-some-comments-to-nfsd_file_do_acquire.patch
+nfsd-don-t-take-put-an-extra-reference-when-putting-.patch
+nfsd-update-comment-over-__nfsd_file_cache_purge.patch
+nfsd-allow-reaping-files-still-under-writeback.patch
+nfsd-convert-filecache-to-rhltable.patch
+nfsd-simplify-the-delayed-disposal-list-code.patch
+nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch
+nfsd-add-an-nfsd4_encode_nfstime4-helper.patch
+nfsd-fix-creation-time-serialization-order.patch