From: Sasha Levin Date: Mon, 18 Mar 2024 00:20:44 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v6.8.2~104 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=82879666bcbae5233cebe247aa1568f0c437a5a6;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/io_uring-drop-any-code-related-to-scm_rights.patch b/queue-6.1/io_uring-drop-any-code-related-to-scm_rights.patch new file mode 100644 index 00000000000..74e12cd58e3 --- /dev/null +++ b/queue-6.1/io_uring-drop-any-code-related-to-scm_rights.patch @@ -0,0 +1,344 @@ +From 6e16ae9b86871d8b78fd8999d6b8c396ed42993a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Mar 2024 18:15:05 -0600 +Subject: io_uring: drop any code related to SCM_RIGHTS + +From: Jens Axboe + +Commit 6e5e6d274956305f1fc0340522b38f5f5be74bdb upstream. + +This is dead code after we dropped support for passing io_uring fds +over SCM_RIGHTS, get rid of it. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + include/linux/io_uring_types.h | 3 - + io_uring/filetable.c | 10 +-- + io_uring/io_uring.c | 31 +------ + io_uring/rsrc.c | 151 +-------------------------------- + io_uring/rsrc.h | 15 ---- + 5 files changed, 8 insertions(+), 202 deletions(-) + +diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h +index f5b687a787a34..37aeea266ebb3 100644 +--- a/include/linux/io_uring_types.h ++++ b/include/linux/io_uring_types.h +@@ -330,9 +330,6 @@ struct io_ring_ctx { + + struct list_head io_buffers_pages; + +- #if defined(CONFIG_UNIX) +- struct socket *ring_sock; +- #endif + /* hashed buffered write serialization */ + struct io_wq_hash *hash_map; + +diff --git a/io_uring/filetable.c b/io_uring/filetable.c +index b80614e7d6051..4660cb89ea9f5 100644 +--- a/io_uring/filetable.c ++++ b/io_uring/filetable.c +@@ -95,12 +95,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, + needs_switch = true; + } + +- ret = io_scm_file_account(ctx, file); +- if (!ret) { +- *io_get_tag_slot(ctx->file_data, slot_index) = 0; +- io_fixed_file_set(file_slot, file); +- io_file_bitmap_set(&ctx->file_table, slot_index); +- } ++ *io_get_tag_slot(ctx->file_data, slot_index) = 0; ++ io_fixed_file_set(file_slot, file); ++ io_file_bitmap_set(&ctx->file_table, slot_index); ++ return 0; + err: + if (needs_switch) + io_rsrc_node_switch(ctx, ctx->file_data); +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index cf7dd62da0e37..415248c1f82c6 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -60,7 +60,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -2628,12 +2627,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) + WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); + WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); + +-#if defined(CONFIG_UNIX) +- if (ctx->ring_sock) { +- ctx->ring_sock->file = NULL; /* so that iput() is called */ +- sock_release(ctx->ring_sock); +- } +-#endif + WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); + + if (ctx->mm_account) { +@@ -3438,32 +3431,12 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) + /* + * Allocate an anonymous fd, this is what constitutes the application + * visible backing of an io_uring instance. The application mmaps this +- * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, +- * we have to tie this fd to a socket for file garbage collection purposes. ++ * fd to gain access to the SQ/CQ ring details. + */ + static struct file *io_uring_get_file(struct io_ring_ctx *ctx) + { +- struct file *file; +-#if defined(CONFIG_UNIX) +- int ret; +- +- ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, +- &ctx->ring_sock); +- if (ret) +- return ERR_PTR(ret); +-#endif +- +- file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, ++ return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + O_RDWR | O_CLOEXEC, NULL); +-#if defined(CONFIG_UNIX) +- if (IS_ERR(file)) { +- sock_release(ctx->ring_sock); +- ctx->ring_sock = NULL; +- } else { +- ctx->ring_sock->file = file; +- } +-#endif +- return file; + } + + static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, +diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c +index 7ada0339b3870..ac658cfa89c63 100644 +--- a/io_uring/rsrc.c ++++ b/io_uring/rsrc.c +@@ -494,11 +494,6 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, + err = -EBADF; + break; + } +- err = io_scm_file_account(ctx, file); +- if (err) { +- fput(file); +- break; +- } + *io_get_tag_slot(data, i) = tag; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, i); +@@ -762,22 +757,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) + for (i = 0; i < ctx->nr_user_files; i++) { + struct file *file = io_file_from_index(&ctx->file_table, i); + +- /* skip scm accounted files, they'll be freed by ->ring_sock */ +- if (!file || io_file_need_scm(file)) ++ if (!file) + continue; + io_file_bitmap_clear(&ctx->file_table, i); + fput(file); + } + +-#if defined(CONFIG_UNIX) +- if (ctx->ring_sock) { +- struct sock *sock = ctx->ring_sock->sk; +- struct sk_buff *skb; +- +- while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) +- kfree_skb(skb); +- } +-#endif + io_free_file_tables(&ctx->file_table); + io_file_table_set_alloc_range(ctx, 0, 0); + io_rsrc_data_free(ctx->file_data); +@@ -805,134 +790,11 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx) + return ret; + } + +-/* +- * Ensure the UNIX gc is aware of our file set, so we are certain that +- * the io_uring can be safely unregistered on process exit, even if we have +- * loops in the file referencing. We account only files that can hold other +- * files because otherwise they can't form a loop and so are not interesting +- * for GC. +- */ +-int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) +-{ +-#if defined(CONFIG_UNIX) +- struct sock *sk = ctx->ring_sock->sk; +- struct sk_buff_head *head = &sk->sk_receive_queue; +- struct scm_fp_list *fpl; +- struct sk_buff *skb; +- +- if (likely(!io_file_need_scm(file))) +- return 0; +- +- /* +- * See if we can merge this file into an existing skb SCM_RIGHTS +- * file set. If there's no room, fall back to allocating a new skb +- * and filling it in. +- */ +- spin_lock_irq(&head->lock); +- skb = skb_peek(head); +- if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) +- __skb_unlink(skb, head); +- else +- skb = NULL; +- spin_unlock_irq(&head->lock); +- +- if (!skb) { +- fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); +- if (!fpl) +- return -ENOMEM; +- +- skb = alloc_skb(0, GFP_KERNEL); +- if (!skb) { +- kfree(fpl); +- return -ENOMEM; +- } +- +- fpl->user = get_uid(current_user()); +- fpl->max = SCM_MAX_FD; +- fpl->count = 0; +- +- UNIXCB(skb).fp = fpl; +- skb->sk = sk; +- skb->scm_io_uring = 1; +- skb->destructor = unix_destruct_scm; +- refcount_add(skb->truesize, &sk->sk_wmem_alloc); +- } +- +- fpl = UNIXCB(skb).fp; +- fpl->fp[fpl->count++] = get_file(file); +- unix_inflight(fpl->user, file); +- skb_queue_head(head, skb); +- fput(file); +-#endif +- return 0; +-} +- + static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) + { + struct file *file = prsrc->file; +-#if defined(CONFIG_UNIX) +- struct sock *sock = ctx->ring_sock->sk; +- struct sk_buff_head list, *head = &sock->sk_receive_queue; +- struct sk_buff *skb; +- int i; +- +- if (!io_file_need_scm(file)) { +- fput(file); +- return; +- } +- +- __skb_queue_head_init(&list); +- +- /* +- * Find the skb that holds this file in its SCM_RIGHTS. When found, +- * remove this entry and rearrange the file array. +- */ +- skb = skb_dequeue(head); +- while (skb) { +- struct scm_fp_list *fp; + +- fp = UNIXCB(skb).fp; +- for (i = 0; i < fp->count; i++) { +- int left; +- +- if (fp->fp[i] != file) +- continue; +- +- unix_notinflight(fp->user, fp->fp[i]); +- left = fp->count - 1 - i; +- if (left) { +- memmove(&fp->fp[i], &fp->fp[i + 1], +- left * sizeof(struct file *)); +- } +- fp->count--; +- if (!fp->count) { +- kfree_skb(skb); +- skb = NULL; +- } else { +- __skb_queue_tail(&list, skb); +- } +- fput(file); +- file = NULL; +- break; +- } +- +- if (!file) +- break; +- +- __skb_queue_tail(&list, skb); +- +- skb = skb_dequeue(head); +- } +- +- if (skb_peek(&list)) { +- spin_lock_irq(&head->lock); +- while ((skb = __skb_dequeue(&list)) != NULL) +- __skb_queue_tail(head, skb); +- spin_unlock_irq(&head->lock); +- } +-#else + fput(file); +-#endif + } + + int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, +@@ -986,21 +848,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, + goto fail; + + /* +- * Don't allow io_uring instances to be registered. If UNIX +- * isn't enabled, then this causes a reference cycle and this +- * instance can never get freed. If UNIX is enabled we'll +- * handle it just fine, but there's still no point in allowing +- * a ring fd as it doesn't support regular read/write anyway. ++ * Don't allow io_uring instances to be registered. + */ + if (io_is_uring_fops(file)) { + fput(file); + goto fail; + } +- ret = io_scm_file_account(ctx, file); +- if (ret) { +- fput(file); +- goto fail; +- } + file_slot = io_fixed_file_slot(&ctx->file_table, i); + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, i); +diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h +index acaf8dad05401..85f145607c620 100644 +--- a/io_uring/rsrc.h ++++ b/io_uring/rsrc.h +@@ -77,21 +77,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx); + int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, + unsigned nr_args, u64 __user *tags); + +-int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); +- +-static inline bool io_file_need_scm(struct file *filp) +-{ +- return false; +-} +- +-static inline int io_scm_file_account(struct io_ring_ctx *ctx, +- struct file *file) +-{ +- if (likely(!io_file_need_scm(file))) +- return 0; +- return __io_scm_file_account(ctx, file); +-} +- + int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, + unsigned nr_args); + int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, +-- +2.43.0 + diff --git a/queue-6.1/io_uring-unix-drop-usage-of-io_uring-socket.patch b/queue-6.1/io_uring-unix-drop-usage-of-io_uring-socket.patch new file mode 100644 index 00000000000..48477b2ad8f --- /dev/null +++ b/queue-6.1/io_uring-unix-drop-usage-of-io_uring-socket.patch @@ -0,0 +1,134 @@ +From dc638a609ff754e234ca3900338628b94617e4a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Mar 2024 18:10:12 -0600 +Subject: io_uring/unix: drop usage of io_uring socket + +From: Jens Axboe + +Commit a4104821ad651d8a0b374f0b2474c345bbb42f82 upstream. + +Since we no longer allow sending io_uring fds over SCM_RIGHTS, move to +using io_is_uring_fops() to detect whether this is a io_uring fd or not. +With that done, kill off io_uring_get_socket() as nobody calls it +anymore. + +This is in preparation to yanking out the rest of the core related to +unix gc with io_uring. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + include/linux/io_uring.h | 10 +++++----- + io_uring/io_uring.c | 13 ------------- + io_uring/io_uring.h | 1 - + net/core/scm.c | 2 +- + net/unix/scm.c | 4 +--- + 5 files changed, 7 insertions(+), 23 deletions(-) + +diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h +index a1484cdb3158e..a8f3058448eaa 100644 +--- a/include/linux/io_uring.h ++++ b/include/linux/io_uring.h +@@ -42,11 +42,11 @@ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, + unsigned issue_flags); + void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned)); +-struct sock *io_uring_get_socket(struct file *file); + void __io_uring_cancel(bool cancel_all); + void __io_uring_free(struct task_struct *tsk); + void io_uring_unreg_ringfd(void); + const char *io_uring_get_opcode(u8 opcode); ++bool io_is_uring_fops(struct file *file); + + static inline void io_uring_files_cancel(void) + { +@@ -71,6 +71,10 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + { + return -EOPNOTSUPP; + } ++static inline bool io_is_uring_fops(struct file *file) ++{ ++ return false; ++} + static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, + ssize_t ret2, unsigned issue_flags) + { +@@ -79,10 +83,6 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) + { + } +-static inline struct sock *io_uring_get_socket(struct file *file) +-{ +- return NULL; +-} + static inline void io_uring_task_cancel(void) + { + } +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 35894955b4549..cf7dd62da0e37 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -153,19 +153,6 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx); + + static struct kmem_cache *req_cachep; + +-struct sock *io_uring_get_socket(struct file *file) +-{ +-#if defined(CONFIG_UNIX) +- if (io_is_uring_fops(file)) { +- struct io_ring_ctx *ctx = file->private_data; +- +- return ctx->ring_sock->sk; +- } +-#endif +- return NULL; +-} +-EXPORT_SYMBOL(io_uring_get_socket); +- + static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) + { + if (!wq_list_empty(&ctx->submit_state.compl_reqs)) +diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h +index 019600570ee49..59e6f755f12c6 100644 +--- a/io_uring/io_uring.h ++++ b/io_uring/io_uring.h +@@ -52,7 +52,6 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) + } + + void __io_req_task_work_add(struct io_kiocb *req, bool allow_local); +-bool io_is_uring_fops(struct file *file); + bool io_alloc_async_data(struct io_kiocb *req); + void io_req_task_queue(struct io_kiocb *req); + void io_queue_iowq(struct io_kiocb *req, bool *dont_use); +diff --git a/net/core/scm.c b/net/core/scm.c +index e762a4b8a1d22..a877c4ef4c256 100644 +--- a/net/core/scm.c ++++ b/net/core/scm.c +@@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) + if (fd < 0 || !(file = fget_raw(fd))) + return -EBADF; + /* don't allow io_uring files */ +- if (io_uring_get_socket(file)) { ++ if (io_is_uring_fops(file)) { + fput(file); + return -EINVAL; + } +diff --git a/net/unix/scm.c b/net/unix/scm.c +index e8e2a00bb0f58..d1048b4c2baaf 100644 +--- a/net/unix/scm.c ++++ b/net/unix/scm.c +@@ -34,10 +34,8 @@ struct sock *unix_get_socket(struct file *filp) + /* PF_UNIX ? */ + if (s && sock->ops && sock->ops->family == PF_UNIX) + u_sock = s; +- } else { +- /* Could be an io_uring instance */ +- u_sock = io_uring_get_socket(filp); + } ++ + return u_sock; + } + EXPORT_SYMBOL(unix_get_socket); +-- +2.43.0 + diff --git a/queue-6.1/md-fix-data-corruption-for-raid456-when-reshape-rest.patch b/queue-6.1/md-fix-data-corruption-for-raid456-when-reshape-rest.patch new file mode 100644 index 00000000000..a408267d733 --- /dev/null +++ b/queue-6.1/md-fix-data-corruption-for-raid456-when-reshape-rest.patch @@ -0,0 +1,60 @@ +From 90a9cc9319044b9183d226527684e42547c5703c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 May 2023 09:56:07 +0800 +Subject: md: fix data corruption for raid456 when reshape restart while grow + up + +From: Yu Kuai + +[ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ] + +Currently, if reshape is interrupted, echo "reshape" to sync_action will +restart reshape from scratch, for example: + +echo frozen > sync_action +echo reshape > sync_action + +This will corrupt data before reshape_position if the array is growing, +fix the problem by continue reshape from reshape_position. + +Reported-by: Peter Neuwirth +Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/ +Signed-off-by: Yu Kuai +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com +Signed-off-by: Sasha Levin +--- + drivers/md/md.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/md/md.c b/drivers/md/md.c +index 846bdee4daa0e..1c87f3e708094 100644 +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -4903,11 +4903,21 @@ action_store(struct mddev *mddev, const char *page, size_t len) + return -EINVAL; + err = mddev_lock(mddev); + if (!err) { +- if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) ++ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { + err = -EBUSY; +- else { ++ } else if (mddev->reshape_position == MaxSector || ++ mddev->pers->check_reshape == NULL || ++ mddev->pers->check_reshape(mddev)) { + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + err = mddev->pers->start_reshape(mddev); ++ } else { ++ /* ++ * If reshape is still in progress, and ++ * md_check_recovery() can continue to reshape, ++ * don't restart reshape because data can be ++ * corrupted for raid456. ++ */ ++ clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + } + mddev_unlock(mddev); + } +-- +2.43.0 + diff --git a/queue-6.1/md-raid10-prevent-soft-lockup-while-flush-writes.patch b/queue-6.1/md-raid10-prevent-soft-lockup-while-flush-writes.patch new file mode 100644 index 00000000000..28085ff5a3e --- /dev/null +++ b/queue-6.1/md-raid10-prevent-soft-lockup-while-flush-writes.patch @@ -0,0 +1,79 @@ +From 5036866ba258e5dd80caf93c4ce92bf13eef7d82 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 May 2023 21:11:00 +0800 +Subject: md/raid10: prevent soft lockup while flush writes + +From: Yu Kuai + +[ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ] + +Currently, there is no limit for raid1/raid10 plugged bio. While flushing +writes, raid1 has cond_resched() while raid10 doesn't, and too many +writes can cause soft lockup. + +Follow up soft lockup can be triggered easily with writeback test for +raid10 with ramdisks: + +watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293] +Call Trace: + + call_rcu+0x16/0x20 + put_object+0x41/0x80 + __delete_object+0x50/0x90 + delete_object_full+0x2b/0x40 + kmemleak_free+0x46/0xa0 + slab_free_freelist_hook.constprop.0+0xed/0x1a0 + kmem_cache_free+0xfd/0x300 + mempool_free_slab+0x1f/0x30 + mempool_free+0x3a/0x100 + bio_free+0x59/0x80 + bio_put+0xcf/0x2c0 + free_r10bio+0xbf/0xf0 + raid_end_bio_io+0x78/0xb0 + one_write_done+0x8a/0xa0 + raid10_end_write_request+0x1b4/0x430 + bio_endio+0x175/0x320 + brd_submit_bio+0x3b9/0x9b7 [brd] + __submit_bio+0x69/0xe0 + submit_bio_noacct_nocheck+0x1e6/0x5a0 + submit_bio_noacct+0x38c/0x7e0 + flush_pending_writes+0xf0/0x240 + raid10d+0xac/0x1ed0 + +Fix the problem by adding cond_resched() to raid10 like what raid1 did. + +Note that unlimited plugged bio still need to be optimized, for example, +in the case of lots of dirty pages writeback, this will take lots of +memory and io will spend a long time in plug, hence io latency is bad. + +Signed-off-by: Yu Kuai +Signed-off-by: Song Liu +Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com +Signed-off-by: Sasha Levin +--- + drivers/md/raid10.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c +index 7b318e7e8d459..009f7ffe4e10c 100644 +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -920,6 +920,7 @@ static void flush_pending_writes(struct r10conf *conf) + + raid1_submit_write(bio); + bio = next; ++ cond_resched(); + } + blk_finish_plug(&plug); + } else +@@ -1130,6 +1131,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) + + raid1_submit_write(bio); + bio = next; ++ cond_resched(); + } + kfree(plug); + } +-- +2.43.0 + diff --git a/queue-6.1/nfsd-add-an-nfsd4_encode_nfstime4-helper.patch b/queue-6.1/nfsd-add-an-nfsd4_encode_nfstime4-helper.patch new file mode 100644 index 00000000000..b896b59bdf1 --- /dev/null +++ b/queue-6.1/nfsd-add-an-nfsd4_encode_nfstime4-helper.patch @@ -0,0 +1,97 @@ +From c64e15be4c1424636df5a25108b846072809d0d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Jun 2023 10:13:39 -0400 +Subject: NFSD: Add an nfsd4_encode_nfstime4() helper + +From: Chuck Lever + +[ Upstream commit 262176798b18b12fd8ab84c94cfece0a6a652476 ] + +Clean up: de-duplicate some common code. + +Reviewed-by: Jeff Layton +Acked-by: Tom Talpey +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++-------------------- + 1 file changed, 26 insertions(+), 20 deletions(-) + +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index 597f14a80512f..514f4456cf5c6 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -2541,6 +2541,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, + return p; + } + ++static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr, ++ struct timespec64 *tv) ++{ ++ __be32 *p; ++ ++ p = xdr_reserve_space(xdr, XDR_UNIT * 3); ++ if (!p) ++ return nfserr_resource; ++ ++ p = xdr_encode_hyper(p, (s64)tv->tv_sec); ++ *p = cpu_to_be32(tv->tv_nsec); ++ return nfs_ok; ++} ++ + /* + * ctime (in NFSv4, time_metadata) is not writeable, and the client + * doesn't really care what resolution could theoretically be stored by +@@ -3346,11 +3360,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + p = xdr_encode_hyper(p, dummy64); + } + if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { +- p = xdr_reserve_space(xdr, 12); +- if (!p) +- goto out_resource; +- p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec); +- *p++ = cpu_to_be32(stat.atime.tv_nsec); ++ status = nfsd4_encode_nfstime4(xdr, &stat.atime); ++ if (status) ++ goto out; + } + if (bmval1 & FATTR4_WORD1_TIME_DELTA) { + p = xdr_reserve_space(xdr, 12); +@@ -3359,25 +3371,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + p = encode_time_delta(p, d_inode(dentry)); + } + if (bmval1 & FATTR4_WORD1_TIME_METADATA) { +- p = xdr_reserve_space(xdr, 12); +- if (!p) +- goto out_resource; +- p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec); +- *p++ = cpu_to_be32(stat.ctime.tv_nsec); ++ status = nfsd4_encode_nfstime4(xdr, &stat.ctime); ++ if (status) ++ goto out; + } + if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { +- p = xdr_reserve_space(xdr, 12); +- if (!p) +- goto out_resource; +- p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); +- *p++ = cpu_to_be32(stat.mtime.tv_nsec); ++ status = nfsd4_encode_nfstime4(xdr, &stat.mtime); ++ if (status) ++ goto out; + } + if (bmval1 & FATTR4_WORD1_TIME_CREATE) { +- p = xdr_reserve_space(xdr, 12); +- if (!p) +- goto out_resource; +- p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); +- *p++ = cpu_to_be32(stat.btime.tv_nsec); ++ status = nfsd4_encode_nfstime4(xdr, &stat.btime); ++ if (status) ++ goto out; + } + if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { + u64 ino = stat.ino; +-- +2.43.0 + diff --git a/queue-6.1/nfsd-add-some-comments-to-nfsd_file_do_acquire.patch b/queue-6.1/nfsd-add-some-comments-to-nfsd_file_do_acquire.patch new file mode 100644 index 00000000000..6d3946dfdf6 --- /dev/null +++ b/queue-6.1/nfsd-add-some-comments-to-nfsd_file_do_acquire.patch @@ -0,0 +1,39 @@ +From fd22589985ab174696c30d86be0cdea35b00176b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Jan 2023 07:15:12 -0500 +Subject: nfsd: add some comments to nfsd_file_do_acquire + +From: Jeff Layton + +[ Upstream commit b680cb9b737331aad271feebbedafb865504e234 ] + +David Howells mentioned that he found this bit of code confusing, so +sprinkle in some comments to clarify. + +Reported-by: David Howells +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index ab37b85b72077..50349449a4e52 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -1094,6 +1094,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + rcu_read_unlock(); + + if (nf) { ++ /* ++ * If the nf is on the LRU then it holds an extra reference ++ * that must be put if it's removed. It had better not be ++ * the last one however, since we should hold another. ++ */ + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); + goto wait_for_construction; +-- +2.43.0 + diff --git a/queue-6.1/nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch b/queue-6.1/nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch new file mode 100644 index 00000000000..308030e65de --- /dev/null +++ b/queue-6.1/nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch @@ -0,0 +1,61 @@ +From 84897a6adc1d73b3d75791c1336ca59ca96bded8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Jan 2023 10:33:47 -0500 +Subject: nfsd: allow nfsd_file_get to sanely handle a NULL pointer + +From: Jeff Layton + +[ Upstream commit 70f62231cdfd52357836733dd31db787e0412ab2 ] + +...and remove some now-useless NULL pointer checks in its callers. + +Suggested-by: NeilBrown +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 5 ++--- + fs/nfsd/nfs4state.c | 4 +--- + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 697acf5c3c681..6e8712bd7c998 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -431,7 +431,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf) + struct nfsd_file * + nfsd_file_get(struct nfsd_file *nf) + { +- if (likely(refcount_inc_not_zero(&nf->nf_ref))) ++ if (nf && refcount_inc_not_zero(&nf->nf_ref)) + return nf; + return NULL; + } +@@ -1086,8 +1086,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + rcu_read_lock(); + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); +- if (nf) +- nf = nfsd_file_get(nf); ++ nf = nfsd_file_get(nf); + rcu_read_unlock(); + + if (nf) { +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index b9d694ec25d19..e4522e86e984e 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -602,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi) + static struct nfsd_file * + __nfs4_get_fd(struct nfs4_file *f, int oflag) + { +- if (f->fi_fds[oflag]) +- return nfsd_file_get(f->fi_fds[oflag]); +- return NULL; ++ return nfsd_file_get(f->fi_fds[oflag]); + } + + static struct nfsd_file * +-- +2.43.0 + diff --git a/queue-6.1/nfsd-allow-reaping-files-still-under-writeback.patch b/queue-6.1/nfsd-allow-reaping-files-still-under-writeback.patch new file mode 100644 index 00000000000..1f919d3e825 --- /dev/null +++ b/queue-6.1/nfsd-allow-reaping-files-still-under-writeback.patch @@ -0,0 +1,95 @@ +From ac9a9f41a15c31910d32eee697d7fab55053c493 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 06:53:54 -0500 +Subject: nfsd: allow reaping files still under writeback + +From: Jeff Layton + +[ Upstream commit dcb779fcd4ed5984ad15991d574943d12a8693d1 ] + +On most filesystems, there is no reason to delay reaping an nfsd_file +just because its underlying inode is still under writeback. nfsd just +relies on client activity or the local flusher threads to do writeback. + +The main exception is NFS, which flushes all of its dirty data on last +close. Add a new EXPORT_OP_FLUSH_ON_CLOSE flag to allow filesystems to +signal that they do this, and only skip closing files under writeback on +such filesystems. + +Also, remove a redundant NULL file pointer check in +nfsd_file_check_writeback, and clean up nfs's export op flag +definitions. + +Signed-off-by: Jeff Layton +Acked-by: Anna Schumaker +[ cel: adjusted to apply to v6.1.y ] +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfs/export.c | 9 ++++++--- + fs/nfsd/filecache.c | 12 +++++++++++- + include/linux/exportfs.h | 1 + + 3 files changed, 18 insertions(+), 4 deletions(-) + +diff --git a/fs/nfs/export.c b/fs/nfs/export.c +index 01596f2d0a1ed..9fe9586a51b71 100644 +--- a/fs/nfs/export.c ++++ b/fs/nfs/export.c +@@ -156,7 +156,10 @@ const struct export_operations nfs_export_ops = { + .fh_to_dentry = nfs_fh_to_dentry, + .get_parent = nfs_get_parent, + .fetch_iversion = nfs_fetch_iversion, +- .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| +- EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| +- EXPORT_OP_NOATOMIC_ATTR, ++ .flags = EXPORT_OP_NOWCC | ++ EXPORT_OP_NOSUBTREECHK | ++ EXPORT_OP_CLOSE_BEFORE_UNLINK | ++ EXPORT_OP_REMOTE_FS | ++ EXPORT_OP_NOATOMIC_ATTR | ++ EXPORT_OP_FLUSH_ON_CLOSE, + }; +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 9b7082fdd2115..a6fa6e9802772 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -402,13 +402,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) + struct file *file = nf->nf_file; + struct address_space *mapping; + +- if (!file || !(file->f_mode & FMODE_WRITE)) ++ /* File not open for write? */ ++ if (!(file->f_mode & FMODE_WRITE)) + return false; ++ ++ /* ++ * Some filesystems (e.g. NFS) flush all dirty data on close. ++ * On others, there is no need to wait for writeback. ++ */ ++ if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) ++ return false; ++ + mapping = file->f_mapping; + return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); + } + ++ + static bool nfsd_file_lru_add(struct nfsd_file *nf) + { + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); +diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h +index fe848901fcc3a..218fc5c54e901 100644 +--- a/include/linux/exportfs.h ++++ b/include/linux/exportfs.h +@@ -221,6 +221,7 @@ struct export_operations { + #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply + atomic attribute updates + */ ++#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ + unsigned long flags; + }; + +-- +2.43.0 + diff --git a/queue-6.1/nfsd-convert-filecache-to-rhltable.patch b/queue-6.1/nfsd-convert-filecache-to-rhltable.patch new file mode 100644 index 00000000000..5cbc9a80a68 --- /dev/null +++ b/queue-6.1/nfsd-convert-filecache-to-rhltable.patch @@ -0,0 +1,578 @@ +From e0179f3964a7b79117b258fcdf461385cf2f7d62 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Nov 2022 15:09:04 -0500 +Subject: NFSD: Convert filecache to rhltable + +From: Chuck Lever + +[ Upstream commit c4c649ab413ba6a785b25f0edbb12f617c87db2a ] + +While we were converting the nfs4_file hashtable to use the kernel's +resizable hashtable data structure, Neil Brown observed that the +list variant (rhltable) would be better for managing nfsd_file items +as well. The nfsd_file hash table will contain multiple entries for +the same inode -- these should be kept together on a list. And, it +could be possible for exotic or malicious client behavior to cause +the hash table to resize itself on every insertion. + +A nice simplification is that rhltable_lookup() can return a list +that contains only nfsd_file items that match a given inode, which +enables us to eliminate specialized hash table helper functions and +use the default functions provided by the rhashtable implementation). + +Since we are now storing nfsd_file items for the same inode on a +single list, that effectively reduces the number of hash entries +that have to be tracked in the hash table. The mininum bucket count +is therefore lowered. + +Light testing with fstests generic/531 show no regressions. + +Suggested-by: Neil Brown +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 311 ++++++++++++++++++-------------------------- + fs/nfsd/filecache.h | 9 +- + 2 files changed, 133 insertions(+), 187 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index a6fa6e9802772..2f0b2d964cbb1 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -74,70 +74,9 @@ static struct list_lru nfsd_file_lru; + static unsigned long nfsd_file_flags; + static struct fsnotify_group *nfsd_file_fsnotify_group; + static struct delayed_work nfsd_filecache_laundrette; +-static struct rhashtable nfsd_file_rhash_tbl ++static struct rhltable nfsd_file_rhltable + ____cacheline_aligned_in_smp; + +-enum nfsd_file_lookup_type { +- NFSD_FILE_KEY_INODE, +- NFSD_FILE_KEY_FULL, +-}; +- +-struct nfsd_file_lookup_key { +- struct inode *inode; +- struct net *net; +- const struct cred *cred; +- unsigned char need; +- bool gc; +- enum nfsd_file_lookup_type type; +-}; +- +-/* +- * The returned hash value is based solely on the address of an in-code +- * inode, a pointer to a slab-allocated object. The entropy in such a +- * pointer is concentrated in its middle bits. +- */ +-static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) +-{ +- unsigned long ptr = (unsigned long)inode; +- u32 k; +- +- k = ptr >> L1_CACHE_SHIFT; +- k &= 0x00ffffff; +- return jhash2(&k, 1, seed); +-} +- +-/** +- * nfsd_file_key_hashfn - Compute the hash value of a lookup key +- * @data: key on which to compute the hash value +- * @len: rhash table's key_len parameter (unused) +- * @seed: rhash table's random seed of the day +- * +- * Return value: +- * Computed 32-bit hash value +- */ +-static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) +-{ +- const struct nfsd_file_lookup_key *key = data; +- +- return nfsd_file_inode_hash(key->inode, seed); +-} +- +-/** +- * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file +- * @data: object on which to compute the hash value +- * @len: rhash table's key_len parameter (unused) +- * @seed: rhash table's random seed of the day +- * +- * Return value: +- * Computed 32-bit hash value +- */ +-static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) +-{ +- const struct nfsd_file *nf = data; +- +- return nfsd_file_inode_hash(nf->nf_inode, seed); +-} +- + static bool + nfsd_match_cred(const struct cred *c1, const struct cred *c2) + { +@@ -158,55 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2) + return true; + } + +-/** +- * nfsd_file_obj_cmpfn - Match a cache item against search criteria +- * @arg: search criteria +- * @ptr: cache item to check +- * +- * Return values: +- * %0 - Item matches search criteria +- * %1 - Item does not match search criteria +- */ +-static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, +- const void *ptr) +-{ +- const struct nfsd_file_lookup_key *key = arg->key; +- const struct nfsd_file *nf = ptr; +- +- switch (key->type) { +- case NFSD_FILE_KEY_INODE: +- if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) +- return 1; +- if (nf->nf_inode != key->inode) +- return 1; +- break; +- case NFSD_FILE_KEY_FULL: +- if (nf->nf_inode != key->inode) +- return 1; +- if (nf->nf_may != key->need) +- return 1; +- if (nf->nf_net != key->net) +- return 1; +- if (!nfsd_match_cred(nf->nf_cred, key->cred)) +- return 1; +- if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) +- return 1; +- if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) +- return 1; +- break; +- } +- return 0; +-} +- + static const struct rhashtable_params nfsd_file_rhash_params = { + .key_len = sizeof_field(struct nfsd_file, nf_inode), + .key_offset = offsetof(struct nfsd_file, nf_inode), +- .head_offset = offsetof(struct nfsd_file, nf_rhash), +- .hashfn = nfsd_file_key_hashfn, +- .obj_hashfn = nfsd_file_obj_hashfn, +- .obj_cmpfn = nfsd_file_obj_cmpfn, +- /* Reduce resizing churn on light workloads */ +- .min_size = 512, /* buckets */ ++ .head_offset = offsetof(struct nfsd_file, nf_rlist), ++ ++ /* ++ * Start with a single page hash table to reduce resizing churn ++ * on light workloads. ++ */ ++ .min_size = 256, + .automatic_shrinking = true, + }; + +@@ -309,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) + } + + static struct nfsd_file * +-nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) ++nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, ++ bool want_gc) + { + struct nfsd_file *nf; + + nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); +- if (nf) { +- INIT_LIST_HEAD(&nf->nf_lru); +- nf->nf_birthtime = ktime_get(); +- nf->nf_file = NULL; +- nf->nf_cred = get_current_cred(); +- nf->nf_net = key->net; +- nf->nf_flags = 0; +- __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); +- __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); +- if (key->gc) +- __set_bit(NFSD_FILE_GC, &nf->nf_flags); +- nf->nf_inode = key->inode; +- refcount_set(&nf->nf_ref, 1); +- nf->nf_may = key->need; +- nf->nf_mark = NULL; +- } ++ if (unlikely(!nf)) ++ return NULL; ++ ++ INIT_LIST_HEAD(&nf->nf_lru); ++ nf->nf_birthtime = ktime_get(); ++ nf->nf_file = NULL; ++ nf->nf_cred = get_current_cred(); ++ nf->nf_net = net; ++ nf->nf_flags = want_gc ? ++ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : ++ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); ++ nf->nf_inode = inode; ++ refcount_set(&nf->nf_ref, 1); ++ nf->nf_may = need; ++ nf->nf_mark = NULL; + return nf; + } + +@@ -354,8 +254,8 @@ static void + nfsd_file_hash_remove(struct nfsd_file *nf) + { + trace_nfsd_file_unhash(nf); +- rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, +- nfsd_file_rhash_params); ++ rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, ++ nfsd_file_rhash_params); + } + + static bool +@@ -688,8 +588,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * +- * An nfsd_file represents a struct file being held open on behalf of nfsd. An +- * open file however can block other activity (such as leases), or cause ++ * An nfsd_file represents a struct file being held open on behalf of nfsd. ++ * An open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of +@@ -702,21 +602,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) + static void + nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) + { +- struct nfsd_file_lookup_key key = { +- .type = NFSD_FILE_KEY_INODE, +- .inode = inode, +- .gc = true, +- }; ++ struct rhlist_head *tmp, *list; + struct nfsd_file *nf; + + rcu_read_lock(); +- do { +- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, +- nfsd_file_rhash_params); +- if (!nf) +- break; ++ list = rhltable_lookup(&nfsd_file_rhltable, &inode, ++ nfsd_file_rhash_params); ++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { ++ if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) ++ continue; + nfsd_file_cond_queue(nf, dispose); +- } while (1); ++ } + rcu_read_unlock(); + } + +@@ -840,7 +736,7 @@ nfsd_file_cache_init(void) + if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) + return 0; + +- ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); ++ ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); + if (ret) + return ret; + +@@ -908,7 +804,7 @@ nfsd_file_cache_init(void) + nfsd_file_mark_slab = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; +- rhashtable_destroy(&nfsd_file_rhash_tbl); ++ rhltable_destroy(&nfsd_file_rhltable); + goto out; + } + +@@ -927,7 +823,7 @@ __nfsd_file_cache_purge(struct net *net) + struct nfsd_file *nf; + LIST_HEAD(dispose); + +- rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); ++ rhltable_walk_enter(&nfsd_file_rhltable, &iter); + do { + rhashtable_walk_start(&iter); + +@@ -1033,7 +929,7 @@ nfsd_file_cache_shutdown(void) + nfsd_file_mark_slab = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; +- rhashtable_destroy(&nfsd_file_rhash_tbl); ++ rhltable_destroy(&nfsd_file_rhltable); + + for_each_possible_cpu(i) { + per_cpu(nfsd_file_cache_hits, i) = 0; +@@ -1044,6 +940,35 @@ nfsd_file_cache_shutdown(void) + } + } + ++static struct nfsd_file * ++nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, ++ struct inode *inode, unsigned char need, ++ bool want_gc) ++{ ++ struct rhlist_head *tmp, *list; ++ struct nfsd_file *nf; ++ ++ list = rhltable_lookup(&nfsd_file_rhltable, &inode, ++ nfsd_file_rhash_params); ++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { ++ if (nf->nf_may != need) ++ continue; ++ if (nf->nf_net != net) ++ continue; ++ if (!nfsd_match_cred(nf->nf_cred, cred)) ++ continue; ++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) ++ continue; ++ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) ++ continue; ++ ++ if (!nfsd_file_get(nf)) ++ continue; ++ return nf; ++ } ++ return NULL; ++} ++ + /** + * nfsd_file_is_cached - are there any cached open files for this inode? + * @inode: inode to check +@@ -1058,16 +983,20 @@ nfsd_file_cache_shutdown(void) + bool + nfsd_file_is_cached(struct inode *inode) + { +- struct nfsd_file_lookup_key key = { +- .type = NFSD_FILE_KEY_INODE, +- .inode = inode, +- .gc = true, +- }; ++ struct rhlist_head *tmp, *list; ++ struct nfsd_file *nf; + bool ret = false; + +- if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, +- nfsd_file_rhash_params) != NULL) +- ret = true; ++ rcu_read_lock(); ++ list = rhltable_lookup(&nfsd_file_rhltable, &inode, ++ nfsd_file_rhash_params); ++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) ++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { ++ ret = true; ++ break; ++ } ++ rcu_read_unlock(); ++ + trace_nfsd_file_is_cached(inode, (int)ret); + return ret; + } +@@ -1077,14 +1006,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) + { +- struct nfsd_file_lookup_key key = { +- .type = NFSD_FILE_KEY_FULL, +- .need = may_flags & NFSD_FILE_MAY_MASK, +- .net = SVC_NET(rqstp), +- .gc = want_gc, +- }; ++ unsigned char need = may_flags & NFSD_FILE_MAY_MASK; ++ struct net *net = SVC_NET(rqstp); ++ struct nfsd_file *new, *nf; ++ const struct cred *cred; + bool open_retry = true; +- struct nfsd_file *nf; ++ struct inode *inode; + __be32 status; + int ret; + +@@ -1092,14 +1019,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + may_flags|NFSD_MAY_OWNER_OVERRIDE); + if (status != nfs_ok) + return status; +- key.inode = d_inode(fhp->fh_dentry); +- key.cred = get_current_cred(); ++ inode = d_inode(fhp->fh_dentry); ++ cred = get_current_cred(); + + retry: + rcu_read_lock(); +- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, +- nfsd_file_rhash_params); +- nf = nfsd_file_get(nf); ++ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); + rcu_read_unlock(); + + if (nf) { +@@ -1113,21 +1038,32 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + goto wait_for_construction; + } + +- nf = nfsd_file_alloc(&key, may_flags); +- if (!nf) { ++ new = nfsd_file_alloc(net, inode, need, want_gc); ++ if (!new) { + status = nfserr_jukebox; + goto out; + } + +- ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, +- &key, &nf->nf_rhash, +- nfsd_file_rhash_params); ++ rcu_read_lock(); ++ spin_lock(&inode->i_lock); ++ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc); ++ if (unlikely(nf)) { ++ spin_unlock(&inode->i_lock); ++ rcu_read_unlock(); ++ nfsd_file_slab_free(&new->nf_rcu); ++ goto wait_for_construction; ++ } ++ nf = new; ++ ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, ++ nfsd_file_rhash_params); ++ spin_unlock(&inode->i_lock); ++ rcu_read_unlock(); + if (likely(ret == 0)) + goto open_file; + + if (ret == -EEXIST) + goto retry; +- trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); ++ trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); + status = nfserr_jukebox; + goto construction_err; + +@@ -1136,7 +1072,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + + /* Did construction of this file fail? */ + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { +- trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); ++ trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); + if (!open_retry) { + status = nfserr_jukebox; + goto construction_err; +@@ -1158,13 +1094,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + nfsd_file_check_write_error(nf); + *pnf = nf; + } +- put_cred(key.cred); +- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); ++ put_cred(cred); ++ trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); + return status; + + open_file: + trace_nfsd_file_alloc(nf); +- nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); ++ nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); + if (nf->nf_mark) { + if (file) { + get_file(file); +@@ -1182,7 +1118,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + * If construction failed, or we raced with a call to unlink() + * then unhash. + */ +- if (status == nfs_ok && key.inode->i_nlink == 0) ++ if (status != nfs_ok || inode->i_nlink == 0) + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); +@@ -1209,8 +1145,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + * seconds after the final nfsd_file_put() in case the caller + * wants to re-use it. + * +- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in +- * network byte order is returned. ++ * Return values: ++ * %nfs_ok - @pnf points to an nfsd_file with its reference ++ * count boosted. ++ * ++ * On error, an nfsstat value in network byte order is returned. + */ + __be32 + nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, +@@ -1230,8 +1169,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). + * +- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in +- * network byte order is returned. ++ * Return values: ++ * %nfs_ok - @pnf points to an nfsd_file with its reference ++ * count boosted. ++ * ++ * On error, an nfsstat value in network byte order is returned. + */ + __be32 + nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, +@@ -1252,8 +1194,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. + * +- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in +- * network byte order is returned. ++ * Return values: ++ * %nfs_ok - @pnf points to an nfsd_file with its reference ++ * count boosted. ++ * ++ * On error, an nfsstat value in network byte order is returned. + */ + __be32 + nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, +@@ -1284,7 +1229,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) + lru = list_lru_count(&nfsd_file_lru); + + rcu_read_lock(); +- ht = &nfsd_file_rhash_tbl; ++ ht = &nfsd_file_rhltable.ht; + count = atomic_read(&ht->nelems); + tbl = rht_dereference_rcu(ht->tbl, ht); + buckets = tbl->size; +@@ -1300,7 +1245,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) + evictions += per_cpu(nfsd_file_evictions, i); + } + +- seq_printf(m, "total entries: %u\n", count); ++ seq_printf(m, "total inodes: %u\n", count); + seq_printf(m, "hash buckets: %u\n", buckets); + seq_printf(m, "lru entries: %lu\n", lru); + seq_printf(m, "cache hits: %lu\n", hits); +diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h +index 41516a4263ea5..e54165a3224f0 100644 +--- a/fs/nfsd/filecache.h ++++ b/fs/nfsd/filecache.h +@@ -29,9 +29,8 @@ struct nfsd_file_mark { + * never be dereferenced, only used for comparison. + */ + struct nfsd_file { +- struct rhash_head nf_rhash; +- struct list_head nf_lru; +- struct rcu_head nf_rcu; ++ struct rhlist_head nf_rlist; ++ void *nf_inode; + struct file *nf_file; + const struct cred *nf_cred; + struct net *nf_net; +@@ -40,10 +39,12 @@ struct nfsd_file { + #define NFSD_FILE_REFERENCED (2) + #define NFSD_FILE_GC (3) + unsigned long nf_flags; +- struct inode *nf_inode; /* don't deref */ + refcount_t nf_ref; + unsigned char nf_may; ++ + struct nfsd_file_mark *nf_mark; ++ struct list_head nf_lru; ++ struct rcu_head nf_rcu; + ktime_t nf_birthtime; + }; + +-- +2.43.0 + diff --git a/queue-6.1/nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch b/queue-6.1/nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch new file mode 100644 index 00000000000..b8aef4ecb7b --- /dev/null +++ b/queue-6.1/nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch @@ -0,0 +1,101 @@ +From 3ffb64192436277690d09cc004c73101b7734148 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Jan 2023 07:15:11 -0500 +Subject: nfsd: don't kill nfsd_files because of lease break error + +From: Jeff Layton + +[ Upstream commit c6593366c0bf222be9c7561354dfb921c611745e ] + +An error from break_lease is non-fatal, so we needn't destroy the +nfsd_file in that case. Just put the reference like we normally would +and return the error. + +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 29 +++++++++++++++-------------- + 1 file changed, 15 insertions(+), 14 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 568963b8a4777..ab37b85b72077 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -1102,7 +1102,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + nf = nfsd_file_alloc(&key, may_flags); + if (!nf) { + status = nfserr_jukebox; +- goto out_status; ++ goto out; + } + + ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, +@@ -1111,13 +1111,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + if (likely(ret == 0)) + goto open_file; + +- nfsd_file_slab_free(&nf->nf_rcu); +- nf = NULL; + if (ret == -EEXIST) + goto retry; + trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); + status = nfserr_jukebox; +- goto out_status; ++ goto construction_err; + + wait_for_construction: + wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); +@@ -1127,29 +1125,25 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); + if (!open_retry) { + status = nfserr_jukebox; +- goto out; ++ goto construction_err; + } + open_retry = false; +- if (refcount_dec_and_test(&nf->nf_ref)) +- nfsd_file_free(nf); + goto retry; + } +- + this_cpu_inc(nfsd_file_cache_hits); + + status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); ++ if (status != nfs_ok) { ++ nfsd_file_put(nf); ++ nf = NULL; ++ } ++ + out: + if (status == nfs_ok) { + this_cpu_inc(nfsd_file_acquisitions); + nfsd_file_check_write_error(nf); + *pnf = nf; +- } else { +- if (refcount_dec_and_test(&nf->nf_ref)) +- nfsd_file_free(nf); +- nf = NULL; + } +- +-out_status: + put_cred(key.cred); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + return status; +@@ -1179,6 +1173,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + if (status != nfs_ok) + nfsd_file_unhash(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); ++ if (status == nfs_ok) ++ goto out; ++ ++construction_err: ++ if (refcount_dec_and_test(&nf->nf_ref)) ++ nfsd_file_free(nf); ++ nf = NULL; + goto out; + } + +-- +2.43.0 + diff --git a/queue-6.1/nfsd-don-t-open-code-clear_and_wake_up_bit.patch b/queue-6.1/nfsd-don-t-open-code-clear_and_wake_up_bit.patch new file mode 100644 index 00000000000..bfb67f4ac32 --- /dev/null +++ b/queue-6.1/nfsd-don-t-open-code-clear_and_wake_up_bit.patch @@ -0,0 +1,34 @@ +From f9a7567d2186c97f9e03d5d7ed90c33431a472ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Jan 2023 07:15:09 -0500 +Subject: nfsd: don't open-code clear_and_wake_up_bit + +From: Jeff Layton + +[ Upstream commit b8bea9f6cdd7236c7c2238d022145e9b2f8aac22 ] + +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 6e8712bd7c998..5b5d39ec7b010 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -1174,9 +1174,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); +- clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); +- smp_mb__after_atomic(); +- wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); ++ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); + goto out; + } + +-- +2.43.0 + diff --git a/queue-6.1/nfsd-don-t-take-put-an-extra-reference-when-putting-.patch b/queue-6.1/nfsd-don-t-take-put-an-extra-reference-when-putting-.patch new file mode 100644 index 00000000000..60bc534a0ac --- /dev/null +++ b/queue-6.1/nfsd-don-t-take-put-an-extra-reference-when-putting-.patch @@ -0,0 +1,38 @@ +From 51d7b689d98814aec1c0dee5d591bb57eecb47d0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 18 Jan 2023 12:31:37 -0500 +Subject: nfsd: don't take/put an extra reference when putting a file + +From: Jeff Layton + +[ Upstream commit b2ff1bd71db2a1b193a6dde0845adcd69cbcf75e ] + +The last thing that filp_close does is an fput, so don't bother taking +and putting the extra reference. + +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 50349449a4e52..51e2947c21a7d 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -382,10 +382,8 @@ nfsd_file_free(struct nfsd_file *nf) + if (nf->nf_mark) + nfsd_file_mark_put(nf->nf_mark); + if (nf->nf_file) { +- get_file(nf->nf_file); +- filp_close(nf->nf_file, NULL); + nfsd_file_check_write_error(nf); +- fput(nf->nf_file); ++ filp_close(nf->nf_file, NULL); + } + + /* +-- +2.43.0 + diff --git a/queue-6.1/nfsd-fix-creation-time-serialization-order.patch b/queue-6.1/nfsd-fix-creation-time-serialization-order.patch new file mode 100644 index 00000000000..b20d9934b39 --- /dev/null +++ b/queue-6.1/nfsd-fix-creation-time-serialization-order.patch @@ -0,0 +1,64 @@ +From a90a656e0be7aca2852443302021edca6b6c014a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Jun 2023 17:09:06 -0400 +Subject: nfsd: Fix creation time serialization order + +From: Tavian Barnes + +In nfsd4_encode_fattr(), TIME_CREATE was being written out after all +other times. However, they should be written out in an order that +matches the bit flags in bmval1, which in this case are + + #define FATTR4_WORD1_TIME_ACCESS (1UL << 15) + #define FATTR4_WORD1_TIME_CREATE (1UL << 18) + #define FATTR4_WORD1_TIME_DELTA (1UL << 19) + #define FATTR4_WORD1_TIME_METADATA (1UL << 20) + #define FATTR4_WORD1_TIME_MODIFY (1UL << 21) + +so TIME_CREATE should come second. + +I noticed this on a FreeBSD NFSv4.2 client, which supports creation +times. On this client, file times were weirdly permuted. With this +patch applied on the server, times looked normal on the client. + +Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute") +Link: https://unix.stackexchange.com/q/749605/56202 +Signed-off-by: Tavian Barnes +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4xdr.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c +index 514f4456cf5c6..4ed9fef14adc2 100644 +--- a/fs/nfsd/nfs4xdr.c ++++ b/fs/nfsd/nfs4xdr.c +@@ -3364,6 +3364,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + if (status) + goto out; + } ++ if (bmval1 & FATTR4_WORD1_TIME_CREATE) { ++ status = nfsd4_encode_nfstime4(xdr, &stat.btime); ++ if (status) ++ goto out; ++ } + if (bmval1 & FATTR4_WORD1_TIME_DELTA) { + p = xdr_reserve_space(xdr, 12); + if (!p) +@@ -3380,11 +3385,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, + if (status) + goto out; + } +- if (bmval1 & FATTR4_WORD1_TIME_CREATE) { +- status = nfsd4_encode_nfstime4(xdr, &stat.btime); +- if (status) +- goto out; +- } + if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { + u64 ino = stat.ino; + +-- +2.43.0 + diff --git a/queue-6.1/nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch b/queue-6.1/nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch new file mode 100644 index 00000000000..023f7334deb --- /dev/null +++ b/queue-6.1/nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch @@ -0,0 +1,48 @@ +From a756d96c306e659f97ceacf72814028f0d6342fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Apr 2023 10:53:18 -0700 +Subject: NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop + +From: Dai Ngo + +[ Upstream commit 147abcacee33781e75588869e944ddb07528a897 ] + +The following request sequence to the same file causes the NFS client and +server getting into an infinite loop with COMMIT and NFS4ERR_DELAY: + +OPEN +REMOVE +WRITE +COMMIT + +Problem reported by recall11, recall12, recall14, recall20, recall22, +recall40, recall42, recall48, recall50 of nfstest suite. + +This patch restores the handling of race condition in nfsd_file_do_acquire +with unlink to that prior of the regression. + +Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache") +Signed-off-by: Dai Ngo +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index f40d8f3b35a4c..ee9c923192e08 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -1099,8 +1099,6 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + * then unhash. + */ + if (status != nfs_ok || inode->i_nlink == 0) +- status = nfserr_jukebox; +- if (status != nfs_ok) + nfsd_file_unhash(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (status == nfs_ok) +-- +2.43.0 + diff --git a/queue-6.1/nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch b/queue-6.1/nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch new file mode 100644 index 00000000000..0305b70daaa --- /dev/null +++ b/queue-6.1/nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch @@ -0,0 +1,55 @@ +From f494a5af1d58eb2bba67f9dfe869001e4b7f8ee9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Jan 2023 10:39:00 -0500 +Subject: nfsd: NFSD_FILE_KEY_INODE only needs to find GC'ed entries + +From: Jeff Layton + +[ Upstream commit 6c31e4c98853a4ba47355ea151b36a77c42b7734 ] + +Since v4 files are expected to be long-lived, there's little value in +closing them out of the cache when there is conflicting access. + +Change the comparator to also match the gc value in the key. Change both +of the current users of that key to set the gc value in the key to +"true". + +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 5b5d39ec7b010..c36e3032d4386 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -175,6 +175,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, + + switch (key->type) { + case NFSD_FILE_KEY_INODE: ++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) ++ return 1; + if (nf->nf_inode != key->inode) + return 1; + break; +@@ -695,6 +697,7 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, ++ .gc = true, + }; + struct nfsd_file *nf; + +@@ -1049,6 +1052,7 @@ nfsd_file_is_cached(struct inode *inode) + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, ++ .gc = true, + }; + bool ret = false; + +-- +2.43.0 + diff --git a/queue-6.1/nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch b/queue-6.1/nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch new file mode 100644 index 00000000000..7e7f3b50775 --- /dev/null +++ b/queue-6.1/nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch @@ -0,0 +1,35 @@ +From 8832f93342b22e70fa14c6a809aedfe115a37007 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 Jan 2023 10:39:01 -0500 +Subject: nfsd: simplify test_bit return in NFSD_FILE_KEY_FULL comparator + +From: Jeff Layton + +[ Upstream commit d69b8dbfd0866abc5ec84652cc1c10fc3d4d91ef ] + +test_bit returns bool, so we can just compare the result of that to the +key->gc value without the "!!". + +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index c36e3032d4386..568963b8a4777 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -189,7 +189,7 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, + return 1; + if (!nfsd_match_cred(nf->nf_cred, key->cred)) + return 1; +- if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) ++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + return 1; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + return 1; +-- +2.43.0 + diff --git a/queue-6.1/nfsd-simplify-the-delayed-disposal-list-code.patch b/queue-6.1/nfsd-simplify-the-delayed-disposal-list-code.patch new file mode 100644 index 00000000000..820f019a982 --- /dev/null +++ b/queue-6.1/nfsd-simplify-the-delayed-disposal-list-code.patch @@ -0,0 +1,119 @@ +From b6ea58695aec8f2b36a931723f6630cdc3ac409f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Apr 2023 17:31:44 -0400 +Subject: nfsd: simplify the delayed disposal list code + +From: Jeff Layton + +[ Upstream commit 92e4a6733f922f0fef1d0995f7b2d0eaff86c7ea ] + +When queueing a dispose list to the appropriate "freeme" lists, it +pointlessly queues the objects one at a time to an intermediate list. + +Remove a few helpers and just open code a list_move to make it more +clear and efficient. Better document the resulting functions with +kerneldoc comments. + +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 64 ++++++++++++++++----------------------------- + 1 file changed, 22 insertions(+), 42 deletions(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 2f0b2d964cbb1..f40d8f3b35a4c 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -402,49 +402,26 @@ nfsd_file_dispose_list(struct list_head *dispose) + } + } + +-static void +-nfsd_file_list_remove_disposal(struct list_head *dst, +- struct nfsd_fcache_disposal *l) +-{ +- spin_lock(&l->lock); +- list_splice_init(&l->freeme, dst); +- spin_unlock(&l->lock); +-} +- +-static void +-nfsd_file_list_add_disposal(struct list_head *files, struct net *net) +-{ +- struct nfsd_net *nn = net_generic(net, nfsd_net_id); +- struct nfsd_fcache_disposal *l = nn->fcache_disposal; +- +- spin_lock(&l->lock); +- list_splice_tail_init(files, &l->freeme); +- spin_unlock(&l->lock); +- queue_work(nfsd_filecache_wq, &l->work); +-} +- +-static void +-nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, +- struct net *net) +-{ +- struct nfsd_file *nf, *tmp; +- +- list_for_each_entry_safe(nf, tmp, src, nf_lru) { +- if (nf->nf_net == net) +- list_move_tail(&nf->nf_lru, dst); +- } +-} +- ++/** ++ * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list ++ * @dispose: list of nfsd_files to be disposed ++ * ++ * Transfers each file to the "freeme" list for its nfsd_net, to eventually ++ * be disposed of by the per-net garbage collector. ++ */ + static void + nfsd_file_dispose_list_delayed(struct list_head *dispose) + { +- LIST_HEAD(list); +- struct nfsd_file *nf; +- + while(!list_empty(dispose)) { +- nf = list_first_entry(dispose, struct nfsd_file, nf_lru); +- nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); +- nfsd_file_list_add_disposal(&list, nf->nf_net); ++ struct nfsd_file *nf = list_first_entry(dispose, ++ struct nfsd_file, nf_lru); ++ struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); ++ struct nfsd_fcache_disposal *l = nn->fcache_disposal; ++ ++ spin_lock(&l->lock); ++ list_move_tail(&nf->nf_lru, &l->freeme); ++ spin_unlock(&l->lock); ++ queue_work(nfsd_filecache_wq, &l->work); + } + } + +@@ -665,8 +642,8 @@ nfsd_file_close_inode_sync(struct inode *inode) + * nfsd_file_delayed_close - close unused nfsd_files + * @work: dummy + * +- * Walk the LRU list and destroy any entries that have not been used since +- * the last scan. ++ * Scrape the freeme list for this nfsd_net, and then dispose of them ++ * all. + */ + static void + nfsd_file_delayed_close(struct work_struct *work) +@@ -675,7 +652,10 @@ nfsd_file_delayed_close(struct work_struct *work) + struct nfsd_fcache_disposal *l = container_of(work, + struct nfsd_fcache_disposal, work); + +- nfsd_file_list_remove_disposal(&head, l); ++ spin_lock(&l->lock); ++ list_splice_init(&l->freeme, &head); ++ spin_unlock(&l->lock); ++ + nfsd_file_dispose_list(&head); + } + +-- +2.43.0 + diff --git a/queue-6.1/nfsd-update-comment-over-__nfsd_file_cache_purge.patch b/queue-6.1/nfsd-update-comment-over-__nfsd_file_cache_purge.patch new file mode 100644 index 00000000000..9e2affaf6fd --- /dev/null +++ b/queue-6.1/nfsd-update-comment-over-__nfsd_file_cache_purge.patch @@ -0,0 +1,33 @@ +From 40fe2393e752ab49b382d502c2f9ae0d31e4ac19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Jan 2023 12:21:16 -0500 +Subject: nfsd: update comment over __nfsd_file_cache_purge + +From: Jeff Layton + +[ Upstream commit 972cc0e0924598cb293b919d39c848dc038b2c28 ] + +Signed-off-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/filecache.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c +index 51e2947c21a7d..9b7082fdd2115 100644 +--- a/fs/nfsd/filecache.c ++++ b/fs/nfsd/filecache.c +@@ -907,7 +907,8 @@ nfsd_file_cache_init(void) + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, +- * then close out everything. Called when an nfsd instance is being shut down. ++ * then close out everything. Called when an nfsd instance is being shut down, ++ * and when the exports table is flushed. + */ + static void + __nfsd_file_cache_purge(struct net *net) +-- +2.43.0 + diff --git a/queue-6.1/series b/queue-6.1/series new file mode 100644 index 00000000000..109865ba4d5 --- /dev/null +++ b/queue-6.1/series @@ -0,0 +1,18 @@ +md-fix-data-corruption-for-raid456-when-reshape-rest.patch +md-raid10-prevent-soft-lockup-while-flush-writes.patch +io_uring-unix-drop-usage-of-io_uring-socket.patch +io_uring-drop-any-code-related-to-scm_rights.patch +nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch +nfsd-don-t-open-code-clear_and_wake_up_bit.patch +nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch +nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch +nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch +nfsd-add-some-comments-to-nfsd_file_do_acquire.patch +nfsd-don-t-take-put-an-extra-reference-when-putting-.patch +nfsd-update-comment-over-__nfsd_file_cache_purge.patch +nfsd-allow-reaping-files-still-under-writeback.patch +nfsd-convert-filecache-to-rhltable.patch +nfsd-simplify-the-delayed-disposal-list-code.patch +nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch +nfsd-add-an-nfsd4_encode_nfstime4-helper.patch +nfsd-fix-creation-time-serialization-order.patch