--- /dev/null
+From eca9cede7c963937bf47f86b0c4cd5e4c22f1cf7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:21 +0300
+Subject: dnotify: use fsnotify group lock helpers
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit aabb45fdcb31f00f1e7cae2bce83e83474a87c03 ]
+
+Before commit 9542e6a643fc6 ("nfsd: Containerise filecache laundrette")
+nfsd would close open files in direct reclaim context. There is no
+guarantee that others memory shrinkers don't do the same and no
+guarantee that future shrinkers won't do that.
+
+For example, if overlayfs implements inode cache of fscache would
+keep open files to cached objects, inode shrinkers could end up closing
+open files to underlying fs.
+
+Direct reclaim from dnotify mark allocation context may try to close
+open files that have dnotify marks of the same group and hit a deadlock
+on mark_mutex.
+
+Set the FSNOTIFY_GROUP_NOFS flag to prevent going into direct reclaim
+from allocations under dnotify group lock and use the safe group lock
+helpers.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-11-amir73il@gmail.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220321112310.vpr7oxro2xkz5llh@quack3.lan/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/dnotify/dnotify.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
+index 6c586802c50e6..fa81c59a2ad41 100644
+--- a/fs/notify/dnotify/dnotify.c
++++ b/fs/notify/dnotify/dnotify.c
+@@ -150,7 +150,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
+ return;
+ dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
+
+- mutex_lock(&dnotify_group->mark_mutex);
++ fsnotify_group_lock(dnotify_group);
+
+ spin_lock(&fsn_mark->lock);
+ prev = &dn_mark->dn;
+@@ -173,7 +173,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
+ free = true;
+ }
+
+- mutex_unlock(&dnotify_group->mark_mutex);
++ fsnotify_group_unlock(dnotify_group);
+
+ if (free)
+ fsnotify_free_mark(fsn_mark);
+@@ -306,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+ new_dn_mark->dn = NULL;
+
+ /* this is needed to prevent the fcntl/close race described below */
+- mutex_lock(&dnotify_group->mark_mutex);
++ fsnotify_group_lock(dnotify_group);
+
+ /* add the new_fsn_mark or find an old one. */
+ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
+@@ -316,7 +316,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+ } else {
+ error = fsnotify_add_inode_mark_locked(new_fsn_mark, inode, 0);
+ if (error) {
+- mutex_unlock(&dnotify_group->mark_mutex);
++ fsnotify_group_unlock(dnotify_group);
+ goto out_err;
+ }
+ spin_lock(&new_fsn_mark->lock);
+@@ -365,7 +365,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+
+ if (destroy)
+ fsnotify_detach_mark(fsn_mark);
+- mutex_unlock(&dnotify_group->mark_mutex);
++ fsnotify_group_unlock(dnotify_group);
+ if (destroy)
+ fsnotify_free_mark(fsn_mark);
+ fsnotify_put_mark(fsn_mark);
+@@ -383,7 +383,8 @@ static int __init dnotify_init(void)
+ SLAB_PANIC|SLAB_ACCOUNT);
+ dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
+
+- dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, 0);
++ dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops,
++ FSNOTIFY_GROUP_NOFS);
+ if (IS_ERR(dnotify_group))
+ panic("unable to allocate fsnotify group for dnotify\n");
+ return 0;
+--
+2.43.0
+
--- /dev/null
+From dc2504b41717fb8701bd18e01b8c9b1c25f1a31c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:46 -0300
+Subject: docs: Document the FAN_FS_ERROR event
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit c0baf9ac0b05d53dfe0436661dbdc5e43c01c5e0 ]
+
+Document the FAN_FS_ERROR event for user administrators and user space
+developers.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-32-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ .../admin-guide/filesystem-monitoring.rst | 74 +++++++++++++++++++
+ Documentation/admin-guide/index.rst | 1 +
+ 2 files changed, 75 insertions(+)
+ create mode 100644 Documentation/admin-guide/filesystem-monitoring.rst
+
+diff --git a/Documentation/admin-guide/filesystem-monitoring.rst b/Documentation/admin-guide/filesystem-monitoring.rst
+new file mode 100644
+index 0000000000000..5a3c84e60095f
+--- /dev/null
++++ b/Documentation/admin-guide/filesystem-monitoring.rst
+@@ -0,0 +1,74 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++====================================
++File system Monitoring with fanotify
++====================================
++
++File system Error Reporting
++===========================
++
++Fanotify supports the FAN_FS_ERROR event type for file system-wide error
++reporting. It is meant to be used by file system health monitoring
++daemons, which listen for these events and take actions (notify
++sysadmin, start recovery) when a file system problem is detected.
++
++By design, a FAN_FS_ERROR notification exposes sufficient information
++for a monitoring tool to know a problem in the file system has happened.
++It doesn't necessarily provide a user space application with semantics
++to verify an IO operation was successfully executed. That is out of
++scope for this feature. Instead, it is only meant as a framework for
++early file system problem detection and reporting recovery tools.
++
++When a file system operation fails, it is common for dozens of kernel
++errors to cascade after the initial failure, hiding the original failure
++log, which is usually the most useful debug data to troubleshoot the
++problem. For this reason, FAN_FS_ERROR tries to report only the first
++error that occurred for a file system since the last notification, and
++it simply counts additional errors. This ensures that the most
++important pieces of information are never lost.
++
++FAN_FS_ERROR requires the fanotify group to be setup with the
++FAN_REPORT_FID flag.
++
++At the time of this writing, the only file system that emits FAN_FS_ERROR
++notifications is Ext4.
++
++A FAN_FS_ERROR Notification has the following format::
++
++ [ Notification Metadata (Mandatory) ]
++ [ Generic Error Record (Mandatory) ]
++ [ FID record (Mandatory) ]
++
++The order of records is not guaranteed, and new records might be added
++in the future. Therefore, applications must not rely on the order and
++must be prepared to skip over unknown records. Please refer to
++``samples/fanotify/fs-monitor.c`` for an example parser.
++
++Generic error record
++--------------------
++
++The generic error record provides enough information for a file system
++agnostic tool to learn about a problem in the file system, without
++providing any additional details about the problem. This record is
++identified by ``struct fanotify_event_info_header.info_type`` being set
++to FAN_EVENT_INFO_TYPE_ERROR.
++
++ struct fanotify_event_info_error {
++ struct fanotify_event_info_header hdr;
++ __s32 error;
++ __u32 error_count;
++ };
++
++The `error` field identifies the type of error using errno values.
++`error_count` tracks the number of errors that occurred and were
++suppressed to preserve the original error information, since the last
++notification.
++
++FID record
++----------
++
++The FID record can be used to uniquely identify the inode that triggered
++the error through the combination of fsid and file handle. A file system
++specific application can use that information to attempt a recovery
++procedure. Errors that are not related to an inode are reported with an
++empty file handle of type FILEID_INVALID.
+diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
+index dc00afcabb95f..1bedab498104a 100644
+--- a/Documentation/admin-guide/index.rst
++++ b/Documentation/admin-guide/index.rst
+@@ -82,6 +82,7 @@ configure specific aspects of kernel behavior to your liking.
+ edid
+ efi-stub
+ ext4
++ filesystem-monitoring
+ nfs/index
+ gpio/index
+ highuid
+--
+2.43.0
+
--- /dev/null
+From bb21d3e2cdf7d108ce611c039aede97ed2f14c93 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Aug 2023 15:04:23 -0400
+Subject: Documentation: Add missing documentation for EXPORT_OP flags
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit b38a6023da6a12b561f0421c6a5a1f7624a1529c ]
+
+The commits that introduced these flags neglected to update the
+Documentation/filesystems/nfs/exporting.rst file.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ Documentation/filesystems/nfs/exporting.rst | 26 +++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
+index 0e98edd353b5f..6f59a364f84cd 100644
+--- a/Documentation/filesystems/nfs/exporting.rst
++++ b/Documentation/filesystems/nfs/exporting.rst
+@@ -215,3 +215,29 @@ following flags are defined:
+ This flag causes nfsd to close any open files for this inode _before_
+ calling into the vfs to do an unlink or a rename that would replace
+ an existing file.
++
++ EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote
++ PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to
++ write to one bdi (the final bdi) in order to free up writes queued
++ to another bdi (the client bdi). Such threads get a private balance
++ of dirty pages so that dirty pages for the client bdi do not imact
++ the daemon writing to the final bdi. For filesystems whose durable
++ storage is not local (such as exported NFS filesystems), this
++ constraint has negative consequences. EXPORT_OP_REMOTE_FS enables
++ an export to disable writeback throttling.
++
++ EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically
++ EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem
++ cannot provide the semantics required by the "atomic" boolean in
++ NFSv4's change_info4. This boolean indicates to a client whether the
++ returned before and after change attributes were obtained atomically
++ with the respect to the requested metadata operation (UNLINK,
++ OPEN/CREATE, MKDIR, etc).
++
++ EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2)
++ On most filesystems, inodes can remain under writeback after the
++ file is closed. NFSD relies on client activity or local flusher
++ threads to handle writeback. Certain filesystems, such as NFS, flush
++ all of an inode's dirty data on last close. Exports that behave this
++ way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
++ waiting for writeback when closing such files.
+--
+2.43.0
+
--- /dev/null
+From d6253314d943ecb9c27feb969e17b27ba64ce234 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Nov 2021 10:27:36 -0600
+Subject: exit: Implement kthread_exit
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+[ Upstream commit bbda86e988d4c124e4cfa816291cbd583ae8bfb1 ]
+
+The way the per task_struct exit_code is used by kernel threads is not
+quite compatible how it is used by userspace applications. The low
+byte of the userspace exit_code value encodes the exit signal. While
+kthreads just use the value as an int holding ordinary kernel function
+exit status like -EPERM.
+
+Add kthread_exit to clearly separate the two kinds of uses.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Stable-dep-of: ca3574bd653a ("exit: Rename module_put_and_exit to module_put_and_kthread_exit")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ include/linux/kthread.h | 1 +
+ kernel/kthread.c | 23 +++++++++++++++++++----
+ tools/objtool/check.c | 1 +
+ 3 files changed, 21 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/kthread.h b/include/linux/kthread.h
+index db47aae7c481b..8e21bd13c36dd 100644
+--- a/include/linux/kthread.h
++++ b/include/linux/kthread.h
+@@ -95,6 +95,7 @@ void *kthread_probe_data(struct task_struct *k);
+ int kthread_park(struct task_struct *k);
+ void kthread_unpark(struct task_struct *k);
+ void kthread_parkme(void);
++void kthread_exit(long result) __noreturn;
+
+ int kthreadd(void *unused);
+ extern struct task_struct *kthreadd_task;
+diff --git a/kernel/kthread.c b/kernel/kthread.c
+index e319a1b62586e..4cc6897b7ca40 100644
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -268,6 +268,21 @@ void kthread_parkme(void)
+ }
+ EXPORT_SYMBOL_GPL(kthread_parkme);
+
++/**
++ * kthread_exit - Cause the current kthread return @result to kthread_stop().
++ * @result: The integer value to return to kthread_stop().
++ *
++ * While kthread_exit can be called directly, it exists so that
++ * functions which do some additional work in non-modular code such as
++ * module_put_and_kthread_exit can be implemented.
++ *
++ * Does not return.
++ */
++void __noreturn kthread_exit(long result)
++{
++ do_exit(result);
++}
++
+ static int kthread(void *_create)
+ {
+ /* Copy data: it's on kthread's stack */
+@@ -285,13 +300,13 @@ static int kthread(void *_create)
+ done = xchg(&create->done, NULL);
+ if (!done) {
+ kfree(create);
+- do_exit(-EINTR);
++ kthread_exit(-EINTR);
+ }
+
+ if (!self) {
+ create->result = ERR_PTR(-ENOMEM);
+ complete(done);
+- do_exit(-ENOMEM);
++ kthread_exit(-ENOMEM);
+ }
+
+ self->threadfn = threadfn;
+@@ -318,7 +333,7 @@ static int kthread(void *_create)
+ __kthread_parkme(self);
+ ret = threadfn(data);
+ }
+- do_exit(ret);
++ kthread_exit(ret);
+ }
+
+ /* called from kernel_clone() to get node information for about to be created task */
+@@ -628,7 +643,7 @@ EXPORT_SYMBOL_GPL(kthread_park);
+ * instead of calling wake_up_process(): the thread will exit without
+ * calling threadfn().
+ *
+- * If threadfn() may call do_exit() itself, the caller must ensure
++ * If threadfn() may call kthread_exit() itself, the caller must ensure
+ * task_struct can't go away.
+ *
+ * Returns the result of threadfn(), or %-EINTR if wake_up_process()
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index c3bb96e5bfa64..f066837d8e1aa 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -169,6 +169,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+ "panic",
+ "do_exit",
+ "do_task_dead",
++ "kthread_exit",
+ "make_task_dead",
+ "__module_put_and_exit",
+ "complete_and_exit",
+--
+2.43.0
+
--- /dev/null
+From 779a0ed86bb5e411a365ab158ab31ae1f0134341 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Dec 2021 11:00:19 -0600
+Subject: exit: Rename module_put_and_exit to module_put_and_kthread_exit
+
+From: Eric W. Biederman <ebiederm@xmission.com>
+
+[ Upstream commit ca3574bd653aba234a4b31955f2778947403be16 ]
+
+Update module_put_and_exit to call kthread_exit instead of do_exit.
+
+Change the name to reflect this change in functionality. All of the
+users of module_put_and_exit are causing the current kthread to exit
+so this change makes it clear what is happening. There is no
+functional change.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ crypto/algboss.c | 4 ++--
+ fs/cifs/connect.c | 2 +-
+ fs/nfs/callback.c | 4 ++--
+ fs/nfs/nfs4state.c | 2 +-
+ fs/nfsd/nfssvc.c | 2 +-
+ include/linux/module.h | 6 +++---
+ kernel/module.c | 6 +++---
+ net/bluetooth/bnep/core.c | 2 +-
+ net/bluetooth/cmtp/core.c | 2 +-
+ net/bluetooth/hidp/core.c | 2 +-
+ tools/objtool/check.c | 2 +-
+ 11 files changed, 17 insertions(+), 17 deletions(-)
+
+diff --git a/crypto/algboss.c b/crypto/algboss.c
+index 1814d2c5188a3..eb5fe84efb83e 100644
+--- a/crypto/algboss.c
++++ b/crypto/algboss.c
+@@ -67,7 +67,7 @@ static int cryptomgr_probe(void *data)
+ complete_all(¶m->larval->completion);
+ crypto_alg_put(¶m->larval->alg);
+ kfree(param);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ }
+
+ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
+@@ -190,7 +190,7 @@ static int cryptomgr_test(void *data)
+ crypto_alg_tested(param->driver, err);
+
+ kfree(param);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ }
+
+ static int cryptomgr_schedule_test(struct crypto_alg *alg)
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index a3e4811b7871e..1cbfb74c53804 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -1145,7 +1145,7 @@ cifs_demultiplex_thread(void *p)
+ }
+
+ memalloc_noreclaim_restore(noreclaim_flag);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ }
+
+ /*
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 86d856de1389b..3c86a559a321a 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -93,7 +93,7 @@ nfs4_callback_svc(void *vrqstp)
+ svc_process(rqstp);
+ }
+ svc_exit_thread(rqstp);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+@@ -137,7 +137,7 @@ nfs41_callback_svc(void *vrqstp)
+ }
+ }
+ svc_exit_thread(rqstp);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index 258e6b167285c..d7868cc527805 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -2766,6 +2766,6 @@ static int nfs4_run_state_manager(void *ptr)
+ goto again;
+
+ nfs_put_client(clp);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 408cff8fe32d3..0f84151011088 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -986,7 +986,7 @@ nfsd(void *vrqstp)
+
+ /* Release module */
+ mutex_unlock(&nfsd_mutex);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+diff --git a/include/linux/module.h b/include/linux/module.h
+index 701c150485b2f..fb9762e16f285 100644
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -595,9 +595,9 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+ /* Look for this name: can be of form module:name. */
+ unsigned long module_kallsyms_lookup_name(const char *name);
+
+-extern void __noreturn __module_put_and_exit(struct module *mod,
++extern void __noreturn __module_put_and_kthread_exit(struct module *mod,
+ long code);
+-#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code)
++#define module_put_and_kthread_exit(code) __module_put_and_kthread_exit(THIS_MODULE, code)
+
+ #ifdef CONFIG_MODULE_UNLOAD
+ int module_refcount(struct module *mod);
+@@ -790,7 +790,7 @@ static inline int unregister_module_notifier(struct notifier_block *nb)
+ return 0;
+ }
+
+-#define module_put_and_exit(code) do_exit(code)
++#define module_put_and_kthread_exit(code) kthread_exit(code)
+
+ static inline void print_modules(void)
+ {
+diff --git a/kernel/module.c b/kernel/module.c
+index 4d49c32af570c..f2b8314546f17 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -337,12 +337,12 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
+ * A thread that wants to hold a reference to a module only while it
+ * is running can call this to safely exit. nfsd and lockd use this.
+ */
+-void __noreturn __module_put_and_exit(struct module *mod, long code)
++void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
+ {
+ module_put(mod);
+- do_exit(code);
++ kthread_exit(code);
+ }
+-EXPORT_SYMBOL(__module_put_and_exit);
++EXPORT_SYMBOL(__module_put_and_kthread_exit);
+
+ /* Find a module section: 0 means not found. */
+ static unsigned int find_sec(const struct load_info *info, const char *name)
+diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
+index 72f47b372705d..a796d72c7dbaa 100644
+--- a/net/bluetooth/bnep/core.c
++++ b/net/bluetooth/bnep/core.c
+@@ -535,7 +535,7 @@ static int bnep_session(void *arg)
+
+ up_write(&bnep_session_sem);
+ free_netdev(dev);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
+index 83eb84e8e688f..90d130588a3e5 100644
+--- a/net/bluetooth/cmtp/core.c
++++ b/net/bluetooth/cmtp/core.c
+@@ -323,7 +323,7 @@ static int cmtp_session(void *arg)
+ up_write(&cmtp_session_sem);
+
+ kfree(session);
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
+index 021ab957a5c4d..8ff45fb6f7007 100644
+--- a/net/bluetooth/hidp/core.c
++++ b/net/bluetooth/hidp/core.c
+@@ -1305,7 +1305,7 @@ static int hidp_session_thread(void *arg)
+ l2cap_unregister_user(session->conn, &session->user);
+ hidp_session_put(session);
+
+- module_put_and_exit(0);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index f066837d8e1aa..fd6714de2260e 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -171,7 +171,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+ "do_task_dead",
+ "kthread_exit",
+ "make_task_dead",
+- "__module_put_and_exit",
++ "__module_put_and_kthread_exit",
+ "complete_and_exit",
+ "__reiserfs_panic",
+ "lbug_with_loc",
+--
+2.43.0
+
--- /dev/null
+From 40a63eb4ade7b15c74241fef62f6e1c414f21f70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Oct 2022 14:24:14 +0200
+Subject: exportfs: use pr_debug for unreachable debug statements
+
+From: David Disseldorp <ddiss@suse.de>
+
+[ Upstream commit 427505ffeaa464f683faba945a88d3e3248f6979 ]
+
+expfs.c has a bunch of dprintk statements which are unusable due to:
+ #define dprintk(fmt, args...) do{}while(0)
+Use pr_debug so that they can be enabled dynamically.
+Also make some minor changes to the debug statements to fix some
+incorrect types, and remove __func__ which can be handled by dynamic
+debug separately.
+
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/exportfs/expfs.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
+index 3ef80d000e13d..584bdd912cdd4 100644
+--- a/fs/exportfs/expfs.c
++++ b/fs/exportfs/expfs.c
+@@ -18,7 +18,7 @@
+ #include <linux/sched.h>
+ #include <linux/cred.h>
+
+-#define dprintk(fmt, args...) do{}while(0)
++#define dprintk(fmt, args...) pr_debug(fmt, ##args)
+
+
+ static int get_name(const struct path *path, char *name, struct dentry *child);
+@@ -132,8 +132,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
+ inode_unlock(dentry->d_inode);
+
+ if (IS_ERR(parent)) {
+- dprintk("%s: get_parent of %ld failed, err %d\n",
+- __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
++ dprintk("get_parent of %lu failed, err %ld\n",
++ dentry->d_inode->i_ino, PTR_ERR(parent));
+ return parent;
+ }
+
+@@ -147,7 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
+ dprintk("%s: found name: %s\n", __func__, nbuf);
+ tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf));
+ if (IS_ERR(tmp)) {
+- dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
++ dprintk("lookup failed: %ld\n", PTR_ERR(tmp));
+ err = PTR_ERR(tmp);
+ goto out_err;
+ }
+--
+2.43.0
+
--- /dev/null
+From ec606e6f8b81c9158798988bc99e3ca7920e4f4c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Oct 2021 14:33:02 -0300
+Subject: ext4: fix error code saved on super block during file system abort
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 124e7c61deb27d758df5ec0521c36cf08d417f7a ]
+
+ext4_abort will eventually call ext4_errno_to_code, which translates the
+errno to an EXT4_ERR specific error. This means that ext4_abort expects
+an errno. By using EXT4_ERR_ here, it gets misinterpreted (as an errno),
+and ends up saving EXT4_ERR_EBUSY on the superblock during an abort,
+which makes no sense.
+
+ESHUTDOWN will get properly translated to EXT4_ERR_SHUTDOWN, so use that
+instead.
+
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Link: https://lore.kernel.org/r/20211026173302.84000-1-krisman@collabora.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/ext4/super.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index f69e7bf52c578..8b276b95a7904 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -5851,7 +5851,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
+ }
+
+ if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+- ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
++ ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
+
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
+--
+2.43.0
+
--- /dev/null
+From 2323b54c288bd22c9d9afb59ea25a1e3f5f77e6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:44 -0300
+Subject: ext4: Send notifications on error
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 9a089b21f79b47eed240d4da7ea0d049de7c9b4d ]
+
+Send a FS_ERROR message via fsnotify to a userspace monitoring tool
+whenever a ext4 error condition is triggered. This follows the existing
+error conditions in ext4, so it is hooked to the ext4_error* functions.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-30-krisman@collabora.com
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Acked-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/ext4/super.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 65716a17059d0..f69e7bf52c578 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -46,6 +46,7 @@
+ #include <linux/part_stat.h>
+ #include <linux/kthread.h>
+ #include <linux/freezer.h>
++#include <linux/fsnotify.h>
+
+ #include "ext4.h"
+ #include "ext4_extents.h" /* Needed for trace points definition */
+@@ -753,6 +754,8 @@ void __ext4_error(struct super_block *sb, const char *function,
+ sb->s_id, function, line, current->comm, &vaf);
+ va_end(args);
+ }
++ fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
++
+ ext4_handle_error(sb, force_ro, error, 0, block, function, line);
+ }
+
+@@ -783,6 +786,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
+ current->comm, &vaf);
+ va_end(args);
+ }
++ fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
++
+ ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
+ function, line);
+ }
+@@ -821,6 +826,8 @@ void __ext4_error_file(struct file *file, const char *function,
+ current->comm, path, &vaf);
+ va_end(args);
+ }
++ fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
++
+ ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
+ function, line);
+ }
+@@ -888,6 +895,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ }
++ fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
+
+ ext4_handle_error(sb, false, -errno, 0, 0, function, line);
+ }
+--
+2.43.0
+
--- /dev/null
+From 9864c2a9d17a03a82c9b8f27b3efbcae30ac16de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:38 -0300
+Subject: fanotify: Add helpers to decide whether to report FID/DFID
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 4bd5a5c8e6e5cd964e9738e6ef87f6c2cb453edf ]
+
+Now that there is an event that reports FID records even for a zeroed
+file handle, wrap the logic that deides whether to issue the records
+into helper functions. This shouldn't have any impact on the code, but
+simplifies further patches.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-24-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.h | 10 ++++++++++
+ fs/notify/fanotify/fanotify_user.c | 13 +++++++------
+ 2 files changed, 17 insertions(+), 6 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 3510d06654ed0..80af269eebb89 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -264,6 +264,16 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event)
+ return info ? fanotify_info_dir_fh_len(info) : 0;
+ }
+
++static inline bool fanotify_event_has_object_fh(struct fanotify_event *event)
++{
++ return fanotify_event_object_fh_len(event) > 0;
++}
++
++static inline bool fanotify_event_has_dir_fh(struct fanotify_event *event)
++{
++ return fanotify_event_dir_fh_len(event) > 0;
++}
++
+ struct fanotify_path_event {
+ struct fanotify_event fae;
+ struct path path;
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index b3cbcb0e71c99..c053038e1cf3c 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -140,10 +140,9 @@ static size_t fanotify_event_len(unsigned int info_mode,
+ return event_len;
+
+ info = fanotify_event_info(event);
+- dir_fh_len = fanotify_event_dir_fh_len(event);
+- fh_len = fanotify_event_object_fh_len(event);
+
+- if (dir_fh_len) {
++ if (fanotify_event_has_dir_fh(event)) {
++ dir_fh_len = fanotify_event_dir_fh_len(event);
+ event_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
+ } else if ((info_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
+@@ -157,8 +156,10 @@ static size_t fanotify_event_len(unsigned int info_mode,
+ if (info_mode & FAN_REPORT_PIDFD)
+ event_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
+
+- if (fh_len)
++ if (fanotify_event_has_object_fh(event)) {
++ fh_len = fanotify_event_object_fh_len(event);
+ event_len += fanotify_fid_info_len(fh_len, dot_len);
++ }
+
+ return event_len;
+ }
+@@ -451,7 +452,7 @@ static int copy_info_records_to_user(struct fanotify_event *event,
+ /*
+ * Event info records order is as follows: dir fid + name, child fid.
+ */
+- if (fanotify_event_dir_fh_len(event)) {
++ if (fanotify_event_has_dir_fh(event)) {
+ info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
+ FAN_EVENT_INFO_TYPE_DFID;
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+@@ -467,7 +468,7 @@ static int copy_info_records_to_user(struct fanotify_event *event,
+ total_bytes += ret;
+ }
+
+- if (fanotify_event_object_fh_len(event)) {
++ if (fanotify_event_has_object_fh(event)) {
+ const char *dot = NULL;
+ int dot_len = 0;
+
+--
+2.43.0
+
--- /dev/null
+From 2905be0626ad3c500ab68f0d63dd3c895983ba4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:29 -0300
+Subject: fanotify: Allow file handle encoding for unhashed events
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 74fe4734897a2da2ae2a665a5e622cd490d36eaf ]
+
+Allow passing a NULL hash to fanotify_encode_fh and avoid calculating
+the hash if not needed.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-15-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 397ee623ff1e8..ec84fee7ad01c 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -403,8 +403,12 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
+ fh->type = type;
+ fh->len = fh_len;
+
+- /* Mix fh into event merge key */
+- *hash ^= fanotify_hash_fh(fh);
++ /*
++ * Mix fh into event merge key. Hash might be NULL in case of
++ * unhashed FID events (i.e. FAN_FS_ERROR).
++ */
++ if (hash)
++ *hash ^= fanotify_hash_fh(fh);
+
+ return FANOTIFY_FH_HDR_LEN + fh_len;
+
+--
+2.43.0
+
--- /dev/null
+From 9359d51caa6e2c1c83d3865db56754a50c5e881c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:43 -0300
+Subject: fanotify: Allow users to request FAN_FS_ERROR events
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 9709bd548f11a092d124698118013f66e1740f9b ]
+
+Wire up the FAN_FS_ERROR event in the fanotify_mark syscall, allowing
+user space to request the monitoring of FAN_FS_ERROR events.
+
+These events are limited to filesystem marks, so check it is the
+case in the syscall handler.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-29-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 2 +-
+ fs/notify/fanotify/fanotify_user.c | 4 ++++
+ include/linux/fanotify.h | 6 +++++-
+ 3 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index af61425e6e3bf..b6091775aa6ef 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -822,7 +822,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
+ BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
+
+- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
++ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
+
+ mask = fanotify_group_event_mask(group, iter_info, mask, data,
+ data_type, dir);
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 133d9b5ffdb10..58b0a7fabd4a6 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1538,6 +1538,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ group->priority == FS_PRIO_0)
+ goto fput_and_out;
+
++ if (mask & FAN_FS_ERROR &&
++ mark_type != FAN_MARK_FILESYSTEM)
++ goto fput_and_out;
++
+ /*
+ * Events that do not carry enough information to report
+ * event->fd require a group that supports reporting fid. Those
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index 52d464802d99f..616af2ea20f30 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -91,9 +91,13 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \
+ FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
+
++/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
++#define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR)
++
+ /* Events that user can request to be notified on */
+ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
+- FANOTIFY_INODE_EVENTS)
++ FANOTIFY_INODE_EVENTS | \
++ FANOTIFY_ERROR_EVENTS)
+
+ /* Events that require a permission response from user */
+ #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
+--
+2.43.0
+
--- /dev/null
+From aa45eba6ef21b0e74df76cd704645d5aa068555c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jun 2022 17:42:09 +0300
+Subject: fanotify: cleanups for fanotify_mark() input validations
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 8afd7215aa97f8868d033f6e1d01a276ab2d29c0 ]
+
+Create helper fanotify_may_update_existing_mark() for checking for
+conflicts between existing mark flags and fanotify_mark() flags.
+
+Use variable mark_cmd to make the checks for mark command bits
+cleaner.
+
+Link: https://lore.kernel.org/r/20220629144210.2983229-3-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 30 +++++++++++++++++++++---------
+ include/linux/fanotify.h | 9 +++++----
+ 2 files changed, 26 insertions(+), 13 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 9367216263cab..870db0f361f4c 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1183,6 +1183,19 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
+ sizeof(struct fanotify_error_event));
+ }
+
++static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
++ unsigned int fan_flags)
++{
++ /*
++ * Non evictable mark cannot be downgraded to evictable mark.
++ */
++ if (fan_flags & FAN_MARK_EVICTABLE &&
++ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
++ return -EEXIST;
++
++ return 0;
++}
++
+ static int fanotify_add_mark(struct fsnotify_group *group,
+ fsnotify_connp_t *connp, unsigned int obj_type,
+ __u32 mask, unsigned int fan_flags,
+@@ -1204,13 +1217,11 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ }
+
+ /*
+- * Non evictable mark cannot be downgraded to evictable mark.
++ * Check if requested mark flags conflict with an existing mark flags.
+ */
+- if (fan_flags & FAN_MARK_EVICTABLE &&
+- !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) {
+- ret = -EEXIST;
++ ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
++ if (ret)
+ goto out;
+- }
+
+ /*
+ * Error events are pre-allocated per group, only if strictly
+@@ -1567,6 +1578,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ __kernel_fsid_t __fsid, *fsid = NULL;
+ u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
++ unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
+ bool ignore = flags & FAN_MARK_IGNORED_MASK;
+ unsigned int obj_type, fid_mode;
+ u32 umask = 0;
+@@ -1596,7 +1608,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ return -EINVAL;
+ }
+
+- switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
++ switch (mark_cmd) {
+ case FAN_MARK_ADD:
+ case FAN_MARK_REMOVE:
+ if (!mask)
+@@ -1685,7 +1697,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
+ goto fput_and_out;
+
+- if (flags & FAN_MARK_FLUSH) {
++ if (mark_cmd == FAN_MARK_FLUSH) {
+ ret = 0;
+ if (mark_type == FAN_MARK_MOUNT)
+ fsnotify_clear_vfsmount_marks_by_group(group);
+@@ -1701,7 +1713,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ if (ret)
+ goto fput_and_out;
+
+- if (flags & FAN_MARK_ADD) {
++ if (mark_cmd == FAN_MARK_ADD) {
+ ret = fanotify_events_supported(group, &path, mask, flags);
+ if (ret)
+ goto path_put_and_out;
+@@ -1739,7 +1751,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ }
+
+ /* create/update an inode mark */
+- switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
++ switch (mark_cmd) {
+ case FAN_MARK_ADD:
+ if (mark_type == FAN_MARK_MOUNT)
+ ret = fanotify_add_vfsmount_mark(group, mnt, mask,
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index 4f6cbe6c6e235..c9e185407ebcb 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -61,15 +61,16 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+ #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \
+ FAN_MARK_FILESYSTEM)
+
++#define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
++ FAN_MARK_FLUSH)
++
+ #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \
+- FAN_MARK_ADD | \
+- FAN_MARK_REMOVE | \
++ FANOTIFY_MARK_CMD_BITS | \
+ FAN_MARK_DONT_FOLLOW | \
+ FAN_MARK_ONLYDIR | \
+ FAN_MARK_IGNORED_MASK | \
+ FAN_MARK_IGNORED_SURV_MODIFY | \
+- FAN_MARK_EVICTABLE | \
+- FAN_MARK_FLUSH)
++ FAN_MARK_EVICTABLE)
+
+ /*
+ * Events that can be reported with data type FSNOTIFY_EVENT_PATH.
+--
+2.43.0
+
--- /dev/null
+From 4c836fa45543a8e0389800b2c07732551bc85c5a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:23 +0300
+Subject: fanotify: create helper fanotify_mark_user_flags()
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 4adce25ccfff215939ee465b8c0aa70526d5c352 ]
+
+To translate from fsnotify mark flags to user visible flags.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-13-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.h | 10 ++++++++++
+ fs/notify/fdinfo.c | 6 ++----
+ 2 files changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index a3d5b751cac5b..87142bc0131a4 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -490,3 +490,13 @@ static inline unsigned int fanotify_event_hash_bucket(
+ {
+ return event->hash & FANOTIFY_HTABLE_MASK;
+ }
++
++static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
++{
++ unsigned int mflags = 0;
++
++ if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
++ mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
++
++ return mflags;
++}
+diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
+index 1f34c5c29fdbd..59fb40abe33d3 100644
+--- a/fs/notify/fdinfo.c
++++ b/fs/notify/fdinfo.c
+@@ -14,6 +14,7 @@
+ #include <linux/exportfs.h>
+
+ #include "inotify/inotify.h"
++#include "fanotify/fanotify.h"
+ #include "fdinfo.h"
+ #include "fsnotify.h"
+
+@@ -103,12 +104,9 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f)
+
+ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+ {
+- unsigned int mflags = 0;
++ unsigned int mflags = fanotify_mark_user_flags(mark);
+ struct inode *inode;
+
+- if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
+- mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
+-
+ if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) {
+ inode = igrab(fsnotify_conn_inode(mark->connector));
+ if (!inode)
+--
+2.43.0
+
--- /dev/null
+From a747c1e5dbcb7a3cf189c267d94c2552ec71a4e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 May 2022 11:00:28 +0300
+Subject: fanotify: do not allow setting dirent events in mask of non-dir
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit ceaf69f8eadcafb323392be88e7a5248c415d423 ]
+
+Dirent events (create/delete/move) are only reported on watched
+directory inodes, but in fanotify as well as in legacy inotify, it was
+always allowed to set them on non-dir inode, which does not result in
+any meaningful outcome.
+
+Until kernel v5.17, dirent events in fanotify also differed from events
+"on child" (e.g. FAN_OPEN) in the information provided in the event.
+For example, FAN_OPEN could be set in the mask of a non-dir or the mask
+of its parent and event would report the fid of the child regardless of
+the marked object.
+By contrast, FAN_DELETE is not reported if the child is marked and the
+child fid was not reported in the events.
+
+Since kernel v5.17, with fanotify group flag FAN_REPORT_TARGET_FID, the
+fid of the child is reported with dirent events, like events "on child",
+which may create confusion for users expecting the same behavior as
+events "on child" when setting events in the mask on a child.
+
+The desired semantics of setting dirent events in the mask of a child
+are not clear, so for now, deny this action for a group initialized
+with flag FAN_REPORT_TARGET_FID and for the new event FAN_RENAME.
+We may relax this restriction in the future if we decide on the
+semantics and implement them.
+
+Fixes: d61fd650e9d2 ("fanotify: introduce group flag FAN_REPORT_TARGET_FID")
+Fixes: 8cc3b1ccd930 ("fanotify: wire up FAN_RENAME event")
+Link: https://lore.kernel.org/linux-fsdevel/20220505133057.zm5t6vumc4xdcnsg@quack3.lan/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220507080028.219826-1-amir73il@gmail.com
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 4f607fd793f3a..336ccec2abed3 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1671,6 +1671,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ else
+ mnt = path.mnt;
+
++ /*
++ * FAN_RENAME is not allowed on non-dir (for now).
++ * We shouldn't have allowed setting any dirent events in mask of
++ * non-dir, but because we always allowed it, error only if group
++ * was initialized with the new flag FAN_REPORT_TARGET_FID.
++ */
++ ret = -ENOTDIR;
++ if (inode && !S_ISDIR(inode->i_mode) &&
++ ((mask & FAN_RENAME) ||
++ ((mask & FANOTIFY_DIRENT_EVENTS) &&
++ FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID))))
++ goto path_put_and_out;
++
+ /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
+ if (mnt || !S_ISDIR(inode->i_mode)) {
+ mask &= ~FAN_EVENT_ON_CHILD;
+--
+2.43.0
+
--- /dev/null
+From a44e1a032774859839af39721e9303968cbcfebb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:42 -0300
+Subject: fanotify: Emit generic error info for error event
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 130a3c742107acff985541c28360c8b40203559c ]
+
+The error info is a record sent to users on FAN_FS_ERROR events
+documenting the type of error. It also carries an error count,
+documenting how many errors were observed since the last reporting.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-28-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 1 +
+ fs/notify/fanotify/fanotify.h | 1 +
+ fs/notify/fanotify/fanotify_user.c | 36 ++++++++++++++++++++++++++++++
+ include/uapi/linux/fanotify.h | 7 ++++++
+ 4 files changed, 45 insertions(+)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 465f07e70e6dc..af61425e6e3bf 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -621,6 +621,7 @@ static struct fanotify_event *fanotify_alloc_error_event(
+ return NULL;
+
+ fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
++ fee->error = report->error;
+ fee->err_count = 1;
+ fee->fsid = *fsid;
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index edd7587adcc59..d25f500bf7e79 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -205,6 +205,7 @@ FANOTIFY_NE(struct fanotify_event *event)
+
+ struct fanotify_error_event {
+ struct fanotify_event fae;
++ s32 error; /* Error reported by the Filesystem. */
+ u32 err_count; /* Suppressed errors count */
+
+ __kernel_fsid_t fsid; /* FSID this error refers to. */
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index fa3dac9c59f69..133d9b5ffdb10 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -115,6 +115,8 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
+ (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
+ #define FANOTIFY_PIDFD_INFO_HDR_LEN \
+ sizeof(struct fanotify_event_info_pidfd)
++#define FANOTIFY_ERROR_INFO_LEN \
++ (sizeof(struct fanotify_event_info_error))
+
+ static int fanotify_fid_info_len(int fh_len, int name_len)
+ {
+@@ -139,6 +141,9 @@ static size_t fanotify_event_len(unsigned int info_mode,
+ if (!info_mode)
+ return event_len;
+
++ if (fanotify_is_error_event(event->mask))
++ event_len += FANOTIFY_ERROR_INFO_LEN;
++
+ info = fanotify_event_info(event);
+
+ if (fanotify_event_has_dir_fh(event)) {
+@@ -324,6 +329,28 @@ static int process_access_response(struct fsnotify_group *group,
+ return -ENOENT;
+ }
+
++static size_t copy_error_info_to_user(struct fanotify_event *event,
++ char __user *buf, int count)
++{
++ struct fanotify_event_info_error info;
++ struct fanotify_error_event *fee = FANOTIFY_EE(event);
++
++ info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR;
++ info.hdr.pad = 0;
++ info.hdr.len = FANOTIFY_ERROR_INFO_LEN;
++
++ if (WARN_ON(count < info.hdr.len))
++ return -EFAULT;
++
++ info.error = fee->error;
++ info.error_count = fee->err_count;
++
++ if (copy_to_user(buf, &info, sizeof(info)))
++ return -EFAULT;
++
++ return info.hdr.len;
++}
++
+ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+ int info_type, const char *name,
+ size_t name_len,
+@@ -530,6 +557,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
+ total_bytes += ret;
+ }
+
++ if (fanotify_is_error_event(event->mask)) {
++ ret = copy_error_info_to_user(event, buf, count);
++ if (ret < 0)
++ return ret;
++ buf += ret;
++ count -= ret;
++ total_bytes += ret;
++ }
++
+ return total_bytes;
+ }
+
+diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
+index 2990731ddc8bc..bd1932c2074d5 100644
+--- a/include/uapi/linux/fanotify.h
++++ b/include/uapi/linux/fanotify.h
+@@ -126,6 +126,7 @@ struct fanotify_event_metadata {
+ #define FAN_EVENT_INFO_TYPE_DFID_NAME 2
+ #define FAN_EVENT_INFO_TYPE_DFID 3
+ #define FAN_EVENT_INFO_TYPE_PIDFD 4
++#define FAN_EVENT_INFO_TYPE_ERROR 5
+
+ /* Variable length info record following event metadata */
+ struct fanotify_event_info_header {
+@@ -160,6 +161,12 @@ struct fanotify_event_info_pidfd {
+ __s32 pidfd;
+ };
+
++struct fanotify_event_info_error {
++ struct fanotify_event_info_header hdr;
++ __s32 error;
++ __u32 error_count;
++};
++
+ struct fanotify_response {
+ __s32 fd;
+ __u32 response;
+--
+2.43.0
+
--- /dev/null
+From 4df08f398fd86a07d5cd34636846b9925779ee4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:27 +0300
+Subject: fanotify: enable "evictable" inode marks
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 5f9d3bd520261fd7a850818c71809fd580e0f30c ]
+
+Now that the direct reclaim path is handled we can enable evictable
+inode marks.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-17-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 2 +-
+ include/linux/fanotify.h | 1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index b4d16caa98d80..4471043955f87 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1814,7 +1814,7 @@ static int __init fanotify_user_setup(void)
+
+ BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
+- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
++ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
+
+ fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+ SLAB_PANIC|SLAB_ACCOUNT);
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index 3afdf339d53c9..81f45061c1b18 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -68,6 +68,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+ FAN_MARK_ONLYDIR | \
+ FAN_MARK_IGNORED_MASK | \
+ FAN_MARK_IGNORED_SURV_MODIFY | \
++ FAN_MARK_EVICTABLE | \
+ FAN_MARK_FLUSH)
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From eb8f7522749dc78bc02c885ba4837c010ff96681 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:30 -0300
+Subject: fanotify: Encode empty file handle when no inode is provided
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 272531ac619b374ab474e989eb387162fded553f ]
+
+Instead of failing, encode an invalid file handle in fanotify_encode_fh
+if no inode is provided. This bogus file handle will be reported by
+FAN_FS_ERROR for non-inode errors.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-16-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index ec84fee7ad01c..c64d61b673caf 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -370,8 +370,14 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
+ fh->type = FILEID_ROOT;
+ fh->len = 0;
+ fh->flags = 0;
++
++ /*
++ * Invalid FHs are used by FAN_FS_ERROR for errors not
++ * linked to any inode. The f_handle won't be reported
++ * back to userspace.
++ */
+ if (!inode)
+- return 0;
++ goto out;
+
+ /*
+ * !gpf means preallocated variable size fh, but fh_len could
+@@ -403,6 +409,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
+ fh->type = type;
+ fh->len = fh_len;
+
++out:
+ /*
+ * Mix fh into event merge key. Hash might be NULL in case of
+ * unhashed FID events (i.e. FAN_FS_ERROR).
+--
+2.43.0
+
--- /dev/null
+From 364cd1470d1df0a78308c2b4f475d7c05aae2f85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:24 +0300
+Subject: fanotify: factor out helper fanotify_mark_update_flags()
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 8998d110835e3781ccd3f1ae061a590b4aaba911 ]
+
+Handle FAN_MARK_IGNORED_SURV_MODIFY flag change in a helper that
+is called after updating the mark mask.
+
+Replace the added and removed return values and help variables with
+bool recalc return values and help variable, which makes the code a
+bit easier to follow.
+
+Rename flags argument to fan_flags to emphasize the difference from
+mark->flags.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-14-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 47 ++++++++++++++++--------------
+ 1 file changed, 25 insertions(+), 22 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index f23326be0d371..0ea0047c6340a 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1077,42 +1077,45 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
+ flags, umask);
+ }
+
+-static void fanotify_mark_add_ignored_mask(struct fsnotify_mark *fsn_mark,
+- __u32 mask, unsigned int flags,
+- __u32 *removed)
++static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
++ unsigned int fan_flags)
+ {
+- fsn_mark->ignored_mask |= mask;
++ bool recalc = false;
+
+ /*
+ * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
+ * the removal of the FS_MODIFY bit in calculated mask if it was set
+ * because of an ignored mask that is now going to survive FS_MODIFY.
+ */
+- if ((flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
++ if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
++ (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
+ fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
+ if (!(fsn_mark->mask & FS_MODIFY))
+- *removed = FS_MODIFY;
++ recalc = true;
+ }
++
++ return recalc;
+ }
+
+-static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+- __u32 mask, unsigned int flags,
+- __u32 *removed)
++static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
++ __u32 mask, unsigned int fan_flags)
+ {
+- __u32 oldmask, newmask;
++ bool recalc;
+
+ spin_lock(&fsn_mark->lock);
+- oldmask = fsnotify_calc_mask(fsn_mark);
+- if (!(flags & FAN_MARK_IGNORED_MASK)) {
++ if (!(fan_flags & FAN_MARK_IGNORED_MASK))
+ fsn_mark->mask |= mask;
+- } else {
+- fanotify_mark_add_ignored_mask(fsn_mark, mask, flags, removed);
+- }
+- newmask = fsnotify_calc_mask(fsn_mark);
++ else
++ fsn_mark->ignored_mask |= mask;
++
++ recalc = fsnotify_calc_mask(fsn_mark) &
++ ~fsnotify_conn_mask(fsn_mark->connector);
++
++ recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags);
+ spin_unlock(&fsn_mark->lock);
+
+- return newmask & ~oldmask;
++ return recalc;
+ }
+
+ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
+@@ -1166,11 +1169,11 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
+
+ static int fanotify_add_mark(struct fsnotify_group *group,
+ fsnotify_connp_t *connp, unsigned int obj_type,
+- __u32 mask, unsigned int flags,
++ __u32 mask, unsigned int fan_flags,
+ __kernel_fsid_t *fsid)
+ {
+ struct fsnotify_mark *fsn_mark;
+- __u32 added, removed = 0;
++ bool recalc;
+ int ret = 0;
+
+ mutex_lock(&group->mark_mutex);
+@@ -1187,14 +1190,14 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ * Error events are pre-allocated per group, only if strictly
+ * needed (i.e. FAN_FS_ERROR was requested).
+ */
+- if (!(flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
++ if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
+ ret = fanotify_group_init_error_pool(group);
+ if (ret)
+ goto out;
+ }
+
+- added = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &removed);
+- if (removed || (added & ~fsnotify_conn_mask(fsn_mark->connector)))
++ recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags);
++ if (recalc)
+ fsnotify_recalc_mask(fsn_mark->connector);
+
+ out:
+--
+2.43.0
+
--- /dev/null
+From 7766bc9976d4eafd4553be61983fb04cd78c32d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 May 2022 15:08:02 +0300
+Subject: fanotify: fix incorrect fmode_t casts
+
+From: Vasily Averin <vvs@openvz.org>
+
+[ Upstream commit dccd855771b37820b6d976a99729c88259549f85 ]
+
+Fixes sparce warnings:
+fs/notify/fanotify/fanotify_user.c:267:63: sparse:
+ warning: restricted fmode_t degrades to integer
+fs/notify/fanotify/fanotify_user.c:1351:28: sparse:
+ warning: restricted fmode_t degrades to integer
+
+FMODE_NONTIFY have bitwise fmode_t type and requires __force attribute
+for any casts.
+
+Signed-off-by: Vasily Averin <vvs@openvz.org>
+Reviewed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/9adfd6ac-1b89-791e-796b-49ada3293985@openvz.org
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 4471043955f87..6db5a0b03a78d 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -260,7 +260,7 @@ static int create_fd(struct fsnotify_group *group, struct path *path,
+ * originally opened O_WRONLY.
+ */
+ new_file = dentry_open(path,
+- group->fanotify_data.f_flags | FMODE_NONOTIFY,
++ group->fanotify_data.f_flags | __FMODE_NONOTIFY,
+ current_cred());
+ if (IS_ERR(new_file)) {
+ /*
+@@ -1373,7 +1373,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
+ (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
+ return -EINVAL;
+
+- f_flags = O_RDWR | FMODE_NONOTIFY;
++ f_flags = O_RDWR | __FMODE_NONOTIFY;
+ if (flags & FAN_CLOEXEC)
+ f_flags |= O_CLOEXEC;
+ if (flags & FAN_NONBLOCK)
+--
+2.43.0
+
--- /dev/null
+From 067a49df6efb4f2329df5ebf64a3f253ff3bbd07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:20 -0300
+Subject: fanotify: Fold event size calculation to its own function
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit b9928e80dda84b349ba8de01780b9bef2fc36ffa ]
+
+Every time this function is invoked, it is immediately added to
+FAN_EVENT_METADATA_LEN, since there is no need to just calculate the
+length of info records. This minor clean up folds the rest of the
+calculation into the function, which now operates in terms of events,
+returning the size of the entire event, including metadata.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-6-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 35 +++++++++++++++++-------------
+ 1 file changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 0e2a0eb7cb9e0..20b743b05b997 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -126,17 +126,24 @@ static int fanotify_fid_info_len(int fh_len, int name_len)
+ FANOTIFY_EVENT_ALIGN);
+ }
+
+-static int fanotify_event_info_len(unsigned int info_mode,
+- struct fanotify_event *event)
++static size_t fanotify_event_len(unsigned int info_mode,
++ struct fanotify_event *event)
+ {
+- struct fanotify_info *info = fanotify_event_info(event);
+- int dir_fh_len = fanotify_event_dir_fh_len(event);
+- int fh_len = fanotify_event_object_fh_len(event);
+- int info_len = 0;
++ size_t event_len = FAN_EVENT_METADATA_LEN;
++ struct fanotify_info *info;
++ int dir_fh_len;
++ int fh_len;
+ int dot_len = 0;
+
++ if (!info_mode)
++ return event_len;
++
++ info = fanotify_event_info(event);
++ dir_fh_len = fanotify_event_dir_fh_len(event);
++ fh_len = fanotify_event_object_fh_len(event);
++
+ if (dir_fh_len) {
+- info_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
++ event_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
+ } else if ((info_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
+ /*
+@@ -147,12 +154,12 @@ static int fanotify_event_info_len(unsigned int info_mode,
+ }
+
+ if (info_mode & FAN_REPORT_PIDFD)
+- info_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
++ event_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
+
+ if (fh_len)
+- info_len += fanotify_fid_info_len(fh_len, dot_len);
++ event_len += fanotify_fid_info_len(fh_len, dot_len);
+
+- return info_len;
++ return event_len;
+ }
+
+ /*
+@@ -181,7 +188,7 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
+ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
+ size_t count)
+ {
+- size_t event_size = FAN_EVENT_METADATA_LEN;
++ size_t event_size;
+ struct fanotify_event *event = NULL;
+ struct fsnotify_event *fsn_event;
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
+@@ -194,8 +201,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
+ goto out;
+
+ event = FANOTIFY_E(fsn_event);
+- if (info_mode)
+- event_size += fanotify_event_info_len(info_mode, event);
++ event_size = fanotify_event_len(info_mode, event);
+
+ if (event_size > count) {
+ event = ERR_PTR(-EINVAL);
+@@ -537,8 +543,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
+
+ pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+- metadata.event_len = FAN_EVENT_METADATA_LEN +
+- fanotify_event_info_len(info_mode, event);
++ metadata.event_len = fanotify_event_len(info_mode, event);
+ metadata.metadata_len = FAN_EVENT_METADATA_LEN;
+ metadata.vers = FANOTIFY_METADATA_VERSION;
+ metadata.reserved = 0;
+--
+2.43.0
+
--- /dev/null
+From 25543eea3fd876a092dbdb3017ed47a48dce405a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:25 +0300
+Subject: fanotify: implement "evictable" inode marks
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 7d5e005d982527e4029b0139823d179986e34cdc ]
+
+When an inode mark is created with flag FAN_MARK_EVICTABLE, it will not
+pin the marked inode to inode cache, so when inode is evicted from cache
+due to memory pressure, the mark will be lost.
+
+When an inode mark with flag FAN_MARK_EVICATBLE is updated without using
+this flag, the marked inode is pinned to inode cache.
+
+When an inode mark is updated with flag FAN_MARK_EVICTABLE but an
+existing mark already has the inode pinned, the mark update fails with
+error EEXIST.
+
+Evictable inode marks can be used to setup inode marks with ignored mask
+to suppress events from uninteresting files or directories in a lazy
+manner, upon receiving the first event, without having to iterate all
+the uninteresting files or directories before hand.
+
+The evictbale inode mark feature allows performing this lazy marks setup
+without exhausting the system memory with pinned inodes.
+
+This change does not enable the feature yet.
+
+Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxiRDpuS=2uA6+ZUM7yG9vVU-u212tkunBmSnP_u=mkv=Q@mail.gmail.com/
+Link: https://lore.kernel.org/r/20220422120327.3459282-15-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.h | 2 ++
+ fs/notify/fanotify/fanotify_user.c | 38 ++++++++++++++++++++++++++++--
+ include/uapi/linux/fanotify.h | 1 +
+ 3 files changed, 39 insertions(+), 2 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 87142bc0131a4..80e0ec95b1131 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -497,6 +497,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
+
+ if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
+ mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
++ if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
++ mflags |= FAN_MARK_EVICTABLE;
+
+ return mflags;
+ }
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 0ea0047c6340a..9bb182dc3f9b3 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1080,6 +1080,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
+ static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
+ unsigned int fan_flags)
+ {
++ bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
+ bool recalc = false;
+
+ /*
+@@ -1095,7 +1096,18 @@ static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
+ recalc = true;
+ }
+
+- return recalc;
++ if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE ||
++ want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
++ return recalc;
++
++ /*
++ * NO_IREF may be removed from a mark, but not added.
++ * When removed, fsnotify_recalc_mask() will take the inode ref.
++ */
++ WARN_ON_ONCE(!want_iref);
++ fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF;
++
++ return true;
+ }
+
+ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+@@ -1121,6 +1133,7 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
+ fsnotify_connp_t *connp,
+ unsigned int obj_type,
++ unsigned int fan_flags,
+ __kernel_fsid_t *fsid)
+ {
+ struct ucounts *ucounts = group->fanotify_data.ucounts;
+@@ -1143,6 +1156,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
+ }
+
+ fsnotify_init_mark(mark, group);
++ if (fan_flags & FAN_MARK_EVICTABLE)
++ mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF;
++
+ ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid);
+ if (ret) {
+ fsnotify_put_mark(mark);
+@@ -1179,13 +1195,23 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ mutex_lock(&group->mark_mutex);
+ fsn_mark = fsnotify_find_mark(connp, group);
+ if (!fsn_mark) {
+- fsn_mark = fanotify_add_new_mark(group, connp, obj_type, fsid);
++ fsn_mark = fanotify_add_new_mark(group, connp, obj_type,
++ fan_flags, fsid);
+ if (IS_ERR(fsn_mark)) {
+ mutex_unlock(&group->mark_mutex);
+ return PTR_ERR(fsn_mark);
+ }
+ }
+
++ /*
++ * Non evictable mark cannot be downgraded to evictable mark.
++ */
++ if (fan_flags & FAN_MARK_EVICTABLE &&
++ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) {
++ ret = -EEXIST;
++ goto out;
++ }
++
+ /*
+ * Error events are pre-allocated per group, only if strictly
+ * needed (i.e. FAN_FS_ERROR was requested).
+@@ -1615,6 +1641,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ mark_type != FAN_MARK_FILESYSTEM)
+ goto fput_and_out;
+
++ /*
++ * Evictable is only relevant for inode marks, because only inode object
++ * can be evicted on memory pressure.
++ */
++ if (flags & FAN_MARK_EVICTABLE &&
++ mark_type != FAN_MARK_INODE)
++ goto fput_and_out;
++
+ /*
+ * Events that do not carry enough information to report
+ * event->fd require a group that supports reporting fid. Those
+diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
+index e8ac38cc2fd6d..f1f89132d60e2 100644
+--- a/include/uapi/linux/fanotify.h
++++ b/include/uapi/linux/fanotify.h
+@@ -82,6 +82,7 @@
+ #define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
+ #define FAN_MARK_FLUSH 0x00000080
+ /* FAN_MARK_FILESYSTEM is 0x00000100 */
++#define FAN_MARK_EVICTABLE 0x00000200
+
+ /* These are NOT bitwise flags. Both bits can be used togther. */
+ #define FAN_MARK_INODE 0x00000000
+--
+2.43.0
+
--- /dev/null
+From f14742e3fcd5cd88688c3dab1d7c413dad5f84b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jun 2022 17:42:10 +0300
+Subject: fanotify: introduce FAN_MARK_IGNORE
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit e252f2ed1c8c6c3884ab5dd34e003ed21f1fe6e0 ]
+
+This flag is a new way to configure ignore mask which allows adding and
+removing the event flags FAN_ONDIR and FAN_EVENT_ON_CHILD in ignore mask.
+
+The legacy FAN_MARK_IGNORED_MASK flag would always ignore events on
+directories and would ignore events on children depending on whether
+the FAN_EVENT_ON_CHILD flag was set in the (non ignored) mask.
+
+FAN_MARK_IGNORE can be used to ignore events on children without setting
+FAN_EVENT_ON_CHILD in the mark's mask and will not ignore events on
+directories unconditionally, only when FAN_ONDIR is set in ignore mask.
+
+The new behavior is non-downgradable. After calling fanotify_mark() with
+FAN_MARK_IGNORE once, calling fanotify_mark() with FAN_MARK_IGNORED_MASK
+on the same object will return EEXIST error.
+
+Setting the event flags with FAN_MARK_IGNORE on a non-dir inode mark
+has no meaning and will return ENOTDIR error.
+
+The meaning of FAN_MARK_IGNORED_SURV_MODIFY is preserved with the new
+FAN_MARK_IGNORE flag, but with a few semantic differences:
+
+1. FAN_MARK_IGNORED_SURV_MODIFY is required for filesystem and mount
+ marks and on an inode mark on a directory. Omitting this flag
+ will return EINVAL or EISDIR error.
+
+2. An ignore mask on a non-directory inode that survives modify could
+ never be downgraded to an ignore mask that does not survive modify.
+ With new FAN_MARK_IGNORE semantics we make that rule explicit -
+ trying to update a surviving ignore mask without the flag
+ FAN_MARK_IGNORED_SURV_MODIFY will return EEXIST error.
+
+The conveniene macro FAN_MARK_IGNORE_SURV is added for
+(FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY), because the
+common case should use short constant names.
+
+Link: https://lore.kernel.org/r/20220629144210.2983229-4-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.h | 2 +
+ fs/notify/fanotify/fanotify_user.c | 63 +++++++++++++++++++++++++-----
+ include/linux/fanotify.h | 5 ++-
+ include/uapi/linux/fanotify.h | 8 ++++
+ 4 files changed, 67 insertions(+), 11 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 80e0ec95b1131..1d9f11255c64f 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -499,6 +499,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
+ mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
+ if (mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)
+ mflags |= FAN_MARK_EVICTABLE;
++ if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
++ mflags |= FAN_MARK_IGNORE;
+
+ return mflags;
+ }
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 870db0f361f4c..879cd65b15187 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1005,7 +1005,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
+ mask &= ~umask;
+ spin_lock(&fsn_mark->lock);
+ oldmask = fsnotify_calc_mask(fsn_mark);
+- if (!(flags & FAN_MARK_IGNORED_MASK)) {
++ if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) {
+ fsn_mark->mask &= ~mask;
+ } else {
+ fsn_mark->ignore_mask &= ~mask;
+@@ -1081,15 +1081,24 @@ static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
+ unsigned int fan_flags)
+ {
+ bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE);
++ unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS;
+ bool recalc = false;
+
++ /*
++ * When using FAN_MARK_IGNORE for the first time, mark starts using
++ * independent event flags in ignore mask. After that, trying to
++ * update the ignore mask with the old FAN_MARK_IGNORED_MASK API
++ * will result in EEXIST error.
++ */
++ if (ignore == FAN_MARK_IGNORE)
++ fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS;
++
+ /*
+ * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
+ * the removal of the FS_MODIFY bit in calculated mask if it was set
+ * because of an ignore mask that is now going to survive FS_MODIFY.
+ */
+- if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
+- (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
++ if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
+ fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
+ if (!(fsn_mark->mask & FS_MODIFY))
+@@ -1116,7 +1125,7 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+ bool recalc;
+
+ spin_lock(&fsn_mark->lock);
+- if (!(fan_flags & FAN_MARK_IGNORED_MASK))
++ if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS))
+ fsn_mark->mask |= mask;
+ else
+ fsn_mark->ignore_mask |= mask;
+@@ -1193,6 +1202,24 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
+ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
+ return -EEXIST;
+
++ /*
++ * New ignore mask semantics cannot be downgraded to old semantics.
++ */
++ if (fan_flags & FAN_MARK_IGNORED_MASK &&
++ fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
++ return -EEXIST;
++
++ /*
++ * An ignore mask that survives modify could never be downgraded to not
++ * survive modify. With new FAN_MARK_IGNORE semantics we make that rule
++ * explicit and return an error when trying to update the ignore mask
++ * without the original FAN_MARK_IGNORED_SURV_MODIFY value.
++ */
++ if (fan_flags & FAN_MARK_IGNORE &&
++ !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
++ fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
++ return -EEXIST;
++
+ return 0;
+ }
+
+@@ -1227,7 +1254,8 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ * Error events are pre-allocated per group, only if strictly
+ * needed (i.e. FAN_FS_ERROR was requested).
+ */
+- if (!(fan_flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
++ if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) &&
++ (mask & FAN_FS_ERROR)) {
+ ret = fanotify_group_init_error_pool(group);
+ if (ret)
+ goto out;
+@@ -1271,7 +1299,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
+ * an ignore mask, unless that ignore mask is supposed to survive
+ * modification changes anyway.
+ */
+- if ((flags & FAN_MARK_IGNORED_MASK) &&
++ if ((flags & FANOTIFY_MARK_IGNORE_BITS) &&
+ !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+ inode_is_open_for_write(inode))
+ return 0;
+@@ -1527,7 +1555,8 @@ static int fanotify_events_supported(struct fsnotify_group *group,
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+ /* Strict validation of events in non-dir inode mask with v5.17+ APIs */
+ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
+- (mask & FAN_RENAME);
++ (mask & FAN_RENAME) ||
++ (flags & FAN_MARK_IGNORE);
+
+ /*
+ * Some filesystems such as 'proc' acquire unusual locks when opening
+@@ -1579,7 +1608,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+ unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
+- bool ignore = flags & FAN_MARK_IGNORED_MASK;
++ unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS;
+ unsigned int obj_type, fid_mode;
+ u32 umask = 0;
+ int ret;
+@@ -1628,12 +1657,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ if (mask & ~valid_mask)
+ return -EINVAL;
+
++
++ /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */
++ if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK))
++ return -EINVAL;
++
+ /*
+ * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with
+ * FAN_MARK_IGNORED_MASK.
+ */
+- if (ignore)
++ if (ignore == FAN_MARK_IGNORED_MASK) {
+ mask &= ~FANOTIFY_EVENT_FLAGS;
++ umask = FANOTIFY_EVENT_FLAGS;
++ }
+
+ f = fdget(fanotify_fd);
+ if (unlikely(!f.file))
+@@ -1737,6 +1773,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ else
+ mnt = path.mnt;
+
++ ret = mnt ? -EINVAL : -EISDIR;
++ /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */
++ if (mark_cmd == FAN_MARK_ADD && ignore == FAN_MARK_IGNORE &&
++ (mnt || S_ISDIR(inode->i_mode)) &&
++ !(flags & FAN_MARK_IGNORED_SURV_MODIFY))
++ goto path_put_and_out;
++
+ /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
+ if (mnt || !S_ISDIR(inode->i_mode)) {
+ mask &= ~FAN_EVENT_ON_CHILD;
+@@ -1829,7 +1872,7 @@ static int __init fanotify_user_setup(void)
+
+ BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
+- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
++ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11);
+
+ fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+ SLAB_PANIC|SLAB_ACCOUNT);
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index c9e185407ebcb..558844c8d2598 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -64,11 +64,14 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+ #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \
+ FAN_MARK_FLUSH)
+
++#define FANOTIFY_MARK_IGNORE_BITS (FAN_MARK_IGNORED_MASK | \
++ FAN_MARK_IGNORE)
++
+ #define FANOTIFY_MARK_FLAGS (FANOTIFY_MARK_TYPE_BITS | \
+ FANOTIFY_MARK_CMD_BITS | \
++ FANOTIFY_MARK_IGNORE_BITS | \
+ FAN_MARK_DONT_FOLLOW | \
+ FAN_MARK_ONLYDIR | \
+- FAN_MARK_IGNORED_MASK | \
+ FAN_MARK_IGNORED_SURV_MODIFY | \
+ FAN_MARK_EVICTABLE)
+
+diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
+index f1f89132d60e2..d8536d77fea1c 100644
+--- a/include/uapi/linux/fanotify.h
++++ b/include/uapi/linux/fanotify.h
+@@ -83,12 +83,20 @@
+ #define FAN_MARK_FLUSH 0x00000080
+ /* FAN_MARK_FILESYSTEM is 0x00000100 */
+ #define FAN_MARK_EVICTABLE 0x00000200
++/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
++#define FAN_MARK_IGNORE 0x00000400
+
+ /* These are NOT bitwise flags. Both bits can be used togther. */
+ #define FAN_MARK_INODE 0x00000000
+ #define FAN_MARK_MOUNT 0x00000010
+ #define FAN_MARK_FILESYSTEM 0x00000100
+
++/*
++ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
++ * for non-inode mark types.
++ */
++#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
++
+ /* Deprecated - do not use this in programs and do not add new flags here! */
+ #define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
+ FAN_MARK_REMOVE |\
+--
+2.43.0
+
--- /dev/null
+From 57db11aee01ee83133cdb5eee9fdcf954960978c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:29 +0200
+Subject: fanotify: introduce group flag FAN_REPORT_TARGET_FID
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit d61fd650e9d206a71fda789f02a1ced4b19944c4 ]
+
+FAN_REPORT_FID is ambiguous in that it reports the fid of the child for
+some events and the fid of the parent for create/delete/move events.
+
+The new FAN_REPORT_TARGET_FID flag is an implicit request to report
+the fid of the target object of the operation (a.k.a the child inode)
+also in create/delete/move events in addition to the fid of the parent
+and the name of the child.
+
+To reduce the test matrix for uninteresting use cases, the new
+FAN_REPORT_TARGET_FID flag requires both FAN_REPORT_NAME and
+FAN_REPORT_FID. The convenience macro FAN_REPORT_DFID_NAME_TARGET
+combines FAN_REPORT_TARGET_FID with all the required flags.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-4-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 48 ++++++++++++++++++++++--------
+ fs/notify/fanotify/fanotify_user.c | 11 ++++++-
+ include/linux/fanotify.h | 2 +-
+ include/uapi/linux/fanotify.h | 4 +++
+ 4 files changed, 51 insertions(+), 14 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 652fe84cb8acd..85e542b164c8c 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -458,17 +458,41 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
+ }
+
+ /*
+- * The inode to use as identifier when reporting fid depends on the event.
+- * Report the modified directory inode on dirent modification events.
+- * Report the "victim" inode otherwise.
++ * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for
++ * some events and the fid of the parent for create/delete/move events.
++ *
++ * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported
++ * also in create/delete/move events in addition to the fid of the parent
++ * and the name of the child.
++ */
++static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask)
++{
++ if (mask & ALL_FSNOTIFY_DIRENT_EVENTS)
++ return (fid_mode & FAN_REPORT_TARGET_FID);
++
++ return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR);
++}
++
++/*
++ * The inode to use as identifier when reporting fid depends on the event
++ * and the group flags.
++ *
++ * With the group flag FAN_REPORT_TARGET_FID, always report the child fid.
++ *
++ * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory
++ * fid on dirent events and the child fid otherwise.
++ *
+ * For example:
+- * FS_ATTRIB reports the child inode even if reported on a watched parent.
+- * FS_CREATE reports the modified dir inode and not the created inode.
++ * FS_ATTRIB reports the child fid even if reported on a watched parent.
++ * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID.
++ * and reports the created child fid with FAN_REPORT_TARGET_FID.
+ */
+ static struct inode *fanotify_fid_inode(u32 event_mask, const void *data,
+- int data_type, struct inode *dir)
++ int data_type, struct inode *dir,
++ unsigned int fid_mode)
+ {
+- if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
++ if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) &&
++ !(fid_mode & FAN_REPORT_TARGET_FID))
+ return dir;
+
+ return fsnotify_data_inode(data, data_type);
+@@ -647,10 +671,11 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ {
+ struct fanotify_event *event = NULL;
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
+- struct inode *id = fanotify_fid_inode(mask, data, data_type, dir);
++ unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
++ struct inode *id = fanotify_fid_inode(mask, data, data_type, dir,
++ fid_mode);
+ struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
+ const struct path *path = fsnotify_data_path(data, data_type);
+- unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ struct mem_cgroup *old_memcg;
+ struct inode *child = NULL;
+ bool name_event = false;
+@@ -660,11 +685,10 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+
+ if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
+ /*
+- * With both flags FAN_REPORT_DIR_FID and FAN_REPORT_FID, we
+- * report the child fid for events reported on a non-dir child
++ * For certain events and group flags, report the child fid
+ * in addition to reporting the parent fid and maybe child name.
+ */
+- if ((fid_mode & FAN_REPORT_FID) && id != dirid && !ondir)
++ if (fanotify_report_child_fid(fid_mode, mask) && id != dirid)
+ child = id;
+
+ id = dirid;
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index e8f6c843e9204..bfafda0447ea7 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1275,6 +1275,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
+ if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID))
+ return -EINVAL;
+
++ /*
++ * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID
++ * and is used as an indication to report both dir and child fid on all
++ * dirent events.
++ */
++ if ((fid_mode & FAN_REPORT_TARGET_FID) &&
++ (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
++ return -EINVAL;
++
+ f_flags = O_RDWR | FMODE_NONOTIFY;
+ if (flags & FAN_CLOEXEC)
+ f_flags |= O_CLOEXEC;
+@@ -1685,7 +1694,7 @@ static int __init fanotify_user_setup(void)
+ FANOTIFY_DEFAULT_MAX_USER_MARKS);
+
+ BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
+- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 11);
++ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+
+ fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index 616af2ea20f30..376e050e6f384 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -25,7 +25,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+
+ #define FANOTIFY_CLASS_BITS (FAN_CLASS_NOTIF | FANOTIFY_PERM_CLASSES)
+
+-#define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME)
++#define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET)
+
+ #define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD)
+
+diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
+index bd1932c2074d5..60f73639a896a 100644
+--- a/include/uapi/linux/fanotify.h
++++ b/include/uapi/linux/fanotify.h
+@@ -57,9 +57,13 @@
+ #define FAN_REPORT_FID 0x00000200 /* Report unique file id */
+ #define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
+ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */
++#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
+
+ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
+ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
++/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
++#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
++ FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
+
+ /* Deprecated - do not use this in programs and do not add new flags here! */
+ #define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \
+--
+2.43.0
+
--- /dev/null
+From 914d6d2cec8745b36a47b856d8a58078be18135c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:34 -0300
+Subject: fanotify: Pre-allocate pool of error events
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 734a1a5eccc5f7473002b0669f788e135f1f64aa ]
+
+Pre-allocate slots for file system errors to have greater chances of
+succeeding, since error events can happen in GFP_NOFS context. This
+patch introduces a group-wide mempool of error events, shared by all
+FAN_FS_ERROR marks in this group.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-20-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 3 +++
+ fs/notify/fanotify/fanotify.h | 11 +++++++++++
+ fs/notify/fanotify/fanotify_user.c | 26 +++++++++++++++++++++++++-
+ include/linux/fsnotify_backend.h | 2 ++
+ 4 files changed, 41 insertions(+), 1 deletion(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 8f152445d75c4..01d68dfc74aa2 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -819,6 +819,9 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
+ if (group->fanotify_data.ucounts)
+ dec_ucount(group->fanotify_data.ucounts,
+ UCOUNT_FANOTIFY_GROUPS);
++
++ if (mempool_initialized(&group->fanotify_data.error_events_pool))
++ mempool_exit(&group->fanotify_data.error_events_pool);
+ }
+
+ static void fanotify_free_path_event(struct fanotify_event *event)
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index c42cf8fd7d798..a577e87fac2b4 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -141,6 +141,7 @@ enum fanotify_event_type {
+ FANOTIFY_EVENT_TYPE_PATH,
+ FANOTIFY_EVENT_TYPE_PATH_PERM,
+ FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
++ FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
+ __FANOTIFY_EVENT_TYPE_NUM
+ };
+
+@@ -196,6 +197,16 @@ FANOTIFY_NE(struct fanotify_event *event)
+ return container_of(event, struct fanotify_name_event, fae);
+ }
+
++struct fanotify_error_event {
++ struct fanotify_event fae;
++};
++
++static inline struct fanotify_error_event *
++FANOTIFY_EE(struct fanotify_event *event)
++{
++ return container_of(event, struct fanotify_error_event, fae);
++}
++
+ static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
+ {
+ if (event->type == FANOTIFY_EVENT_TYPE_FID)
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 8598b00f7e9c8..b3cbcb0e71c99 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -30,6 +30,7 @@
+ #define FANOTIFY_DEFAULT_MAX_EVENTS 16384
+ #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192
+ #define FANOTIFY_DEFAULT_MAX_GROUPS 128
++#define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32
+
+ /*
+ * Legacy fanotify marks limits (8192) is per group and we introduced a tunable
+@@ -1054,6 +1055,15 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
+ return ERR_PTR(ret);
+ }
+
++static int fanotify_group_init_error_pool(struct fsnotify_group *group)
++{
++ if (mempool_initialized(&group->fanotify_data.error_events_pool))
++ return 0;
++
++ return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool,
++ FANOTIFY_DEFAULT_FEE_POOL_SIZE,
++ sizeof(struct fanotify_error_event));
++}
+
+ static int fanotify_add_mark(struct fsnotify_group *group,
+ fsnotify_connp_t *connp, unsigned int type,
+@@ -1062,6 +1072,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ {
+ struct fsnotify_mark *fsn_mark;
+ __u32 added;
++ int ret = 0;
+
+ mutex_lock(&group->mark_mutex);
+ fsn_mark = fsnotify_find_mark(connp, group);
+@@ -1072,13 +1083,26 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ return PTR_ERR(fsn_mark);
+ }
+ }
++
++ /*
++ * Error events are pre-allocated per group, only if strictly
++ * needed (i.e. FAN_FS_ERROR was requested).
++ */
++ if (!(flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
++ ret = fanotify_group_init_error_pool(group);
++ if (ret)
++ goto out;
++ }
++
+ added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
+ if (added & ~fsnotify_conn_mask(fsn_mark->connector))
+ fsnotify_recalc_mask(fsn_mark->connector);
++
++out:
+ mutex_unlock(&group->mark_mutex);
+
+ fsnotify_put_mark(fsn_mark);
+- return 0;
++ return ret;
+ }
+
+ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 00dbaafbcf953..51ef2b079bfa0 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -19,6 +19,7 @@
+ #include <linux/atomic.h>
+ #include <linux/user_namespace.h>
+ #include <linux/refcount.h>
++#include <linux/mempool.h>
+
+ /*
+ * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
+@@ -246,6 +247,7 @@ struct fsnotify_group {
+ int flags; /* flags from fanotify_init() */
+ int f_flags; /* event_f_flags from fanotify_init() */
+ struct ucounts *ucounts;
++ mempool_t error_events_pool;
+ } fanotify_data;
+ #endif /* CONFIG_FANOTIFY */
+ };
+--
+2.43.0
+
--- /dev/null
+From b8c5b07fa3901a490aaeed4dd5714a28b8a2911b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jun 2022 17:42:08 +0300
+Subject: fanotify: prepare for setting event flags in ignore mask
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 31a371e419c885e0f137ce70395356ba8639dc52 ]
+
+Setting flags FAN_ONDIR FAN_EVENT_ON_CHILD in ignore mask has no effect.
+The FAN_EVENT_ON_CHILD flag in mask implicitly applies to ignore mask and
+ignore mask is always implicitly applied to events on directories.
+
+Define a mark flag that replaces this legacy behavior with logic of
+applying the ignore mask according to event flags in ignore mask.
+
+Implement the new logic to prepare for supporting an ignore mask that
+ignores events on children and ignore mask that does not ignore events
+on directories.
+
+To emphasize the change in terminology, also rename ignored_mask mark
+member to ignore_mask and use accessors to get only the effective
+ignored events or the ignored events and flags.
+
+This change in terminology finally aligns with the "ignore mask"
+language in man pages and in most of the comments.
+
+Link: https://lore.kernel.org/r/20220629144210.2983229-2-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 19 ++++---
+ fs/notify/fanotify/fanotify_user.c | 21 ++++---
+ fs/notify/fdinfo.c | 6 +-
+ fs/notify/fsnotify.c | 21 ++++---
+ include/linux/fsnotify_backend.h | 89 ++++++++++++++++++++++++++++--
+ 5 files changed, 121 insertions(+), 35 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 4f897e1095470..cd7d09a569fff 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -295,12 +295,13 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ const void *data, int data_type,
+ struct inode *dir)
+ {
+- __u32 marks_mask = 0, marks_ignored_mask = 0;
++ __u32 marks_mask = 0, marks_ignore_mask = 0;
+ __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
+ FANOTIFY_EVENT_FLAGS;
+ const struct path *path = fsnotify_data_path(data, data_type);
+ unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ struct fsnotify_mark *mark;
++ bool ondir = event_mask & FAN_ONDIR;
+ int type;
+
+ pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
+@@ -315,19 +316,21 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ return 0;
+ } else if (!(fid_mode & FAN_REPORT_FID)) {
+ /* Do we have a directory inode to report? */
+- if (!dir && !(event_mask & FS_ISDIR))
++ if (!dir && !ondir)
+ return 0;
+ }
+
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+- /* Apply ignore mask regardless of mark's ISDIR flag */
+- marks_ignored_mask |= mark->ignored_mask;
++ /*
++ * Apply ignore mask depending on event flags in ignore mask.
++ */
++ marks_ignore_mask |=
++ fsnotify_effective_ignore_mask(mark, ondir, type);
+
+ /*
+- * If the event is on dir and this mark doesn't care about
+- * events on dir, don't send it!
++ * Send the event depending on event flags in mark mask.
+ */
+- if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
++ if (!fsnotify_mask_applicable(mark->mask, ondir, type))
+ continue;
+
+ marks_mask |= mark->mask;
+@@ -336,7 +339,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ *match_mask |= 1U << type;
+ }
+
+- test_mask = event_mask & marks_mask & ~marks_ignored_mask;
++ test_mask = event_mask & marks_mask & ~marks_ignore_mask;
+
+ /*
+ * For dirent modification events (create/delete/move) that do not carry
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 433c89fdcf0cd..9367216263cab 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1008,7 +1008,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
+ if (!(flags & FAN_MARK_IGNORED_MASK)) {
+ fsn_mark->mask &= ~mask;
+ } else {
+- fsn_mark->ignored_mask &= ~mask;
++ fsn_mark->ignore_mask &= ~mask;
+ }
+ newmask = fsnotify_calc_mask(fsn_mark);
+ /*
+@@ -1017,7 +1017,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
+ * changes to the mask.
+ * Destroy mark when only umask bits remain.
+ */
+- *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
++ *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask);
+ spin_unlock(&fsn_mark->lock);
+
+ return oldmask & ~newmask;
+@@ -1086,7 +1086,7 @@ static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark,
+ /*
+ * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
+ * the removal of the FS_MODIFY bit in calculated mask if it was set
+- * because of an ignored mask that is now going to survive FS_MODIFY.
++ * because of an ignore mask that is now going to survive FS_MODIFY.
+ */
+ if ((fan_flags & FAN_MARK_IGNORED_MASK) &&
+ (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+@@ -1119,7 +1119,7 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+ if (!(fan_flags & FAN_MARK_IGNORED_MASK))
+ fsn_mark->mask |= mask;
+ else
+- fsn_mark->ignored_mask |= mask;
++ fsn_mark->ignore_mask |= mask;
+
+ recalc = fsnotify_calc_mask(fsn_mark) &
+ ~fsnotify_conn_mask(fsn_mark->connector);
+@@ -1257,7 +1257,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
+
+ /*
+ * If some other task has this inode open for write we should not add
+- * an ignored mark, unless that ignored mark is supposed to survive
++ * an ignore mask, unless that ignore mask is supposed to survive
+ * modification changes anyway.
+ */
+ if ((flags & FAN_MARK_IGNORED_MASK) &&
+@@ -1567,7 +1567,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ __kernel_fsid_t __fsid, *fsid = NULL;
+ u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+- bool ignored = flags & FAN_MARK_IGNORED_MASK;
++ bool ignore = flags & FAN_MARK_IGNORED_MASK;
+ unsigned int obj_type, fid_mode;
+ u32 umask = 0;
+ int ret;
+@@ -1616,8 +1616,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ if (mask & ~valid_mask)
+ return -EINVAL;
+
+- /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */
+- if (ignored)
++ /*
++ * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with
++ * FAN_MARK_IGNORED_MASK.
++ */
++ if (ignore)
+ mask &= ~FANOTIFY_EVENT_FLAGS;
+
+ f = fdget(fanotify_fd);
+@@ -1731,7 +1734,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ * events with parent/name info for non-directory.
+ */
+ if ((fid_mode & FAN_REPORT_DIR_FID) &&
+- (flags & FAN_MARK_ADD) && !ignored)
++ (flags & FAN_MARK_ADD) && !ignore)
+ mask |= FAN_EVENT_ON_CHILD;
+ }
+
+diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
+index 59fb40abe33d3..55081ae3a6ec0 100644
+--- a/fs/notify/fdinfo.c
++++ b/fs/notify/fdinfo.c
+@@ -113,7 +113,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+ return;
+ seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
+ inode->i_ino, inode->i_sb->s_dev,
+- mflags, mark->mask, mark->ignored_mask);
++ mflags, mark->mask, mark->ignore_mask);
+ show_mark_fhandle(m, inode);
+ seq_putc(m, '\n');
+ iput(inode);
+@@ -121,12 +121,12 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+ struct mount *mnt = fsnotify_conn_mount(mark->connector);
+
+ seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
+- mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
++ mnt->mnt_id, mflags, mark->mask, mark->ignore_mask);
+ } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
+ struct super_block *sb = fsnotify_conn_sb(mark->connector);
+
+ seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
+- sb->s_dev, mflags, mark->mask, mark->ignored_mask);
++ sb->s_dev, mflags, mark->mask, mark->ignore_mask);
+ }
+ }
+
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 0b3e74935cb4f..8687562df2e37 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -324,7 +324,8 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
+ struct fsnotify_group *group = NULL;
+ __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
+ __u32 marks_mask = 0;
+- __u32 marks_ignored_mask = 0;
++ __u32 marks_ignore_mask = 0;
++ bool is_dir = mask & FS_ISDIR;
+ struct fsnotify_mark *mark;
+ int type;
+
+@@ -336,7 +337,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+ if (!(mark->flags &
+ FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+- mark->ignored_mask = 0;
++ mark->ignore_mask = 0;
+ }
+ }
+
+@@ -344,14 +345,15 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+ group = mark->group;
+ marks_mask |= mark->mask;
+- marks_ignored_mask |= mark->ignored_mask;
++ marks_ignore_mask |=
++ fsnotify_effective_ignore_mask(mark, is_dir, type);
+ }
+
+- pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
+- __func__, group, mask, marks_mask, marks_ignored_mask,
++ pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
++ __func__, group, mask, marks_mask, marks_ignore_mask,
+ data, data_type, dir, cookie);
+
+- if (!(test_mask & marks_mask & ~marks_ignored_mask))
++ if (!(test_mask & marks_mask & ~marks_ignore_mask))
+ return 0;
+
+ if (group->ops->handle_event) {
+@@ -423,7 +425,8 @@ static bool fsnotify_iter_select_report_types(
+ * But is *this mark* watching children?
+ */
+ if (type == FSNOTIFY_ITER_TYPE_PARENT &&
+- !(mark->mask & FS_EVENT_ON_CHILD))
++ !(mark->mask & FS_EVENT_ON_CHILD) &&
++ !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD))
+ continue;
+
+ fsnotify_iter_set_report_type(iter_info, type);
+@@ -532,8 +535,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+
+
+ /*
+- * If this is a modify event we may need to clear some ignored masks.
+- * In that case, the object with ignored masks will have the FS_MODIFY
++ * If this is a modify event we may need to clear some ignore masks.
++ * In that case, the object with ignore masks will have the FS_MODIFY
+ * event in its mask.
+ * Otherwise, return if none of the marks care about this type of event.
+ */
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 9560734759fa6..d7d96c806bff2 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -518,8 +518,8 @@ struct fsnotify_mark {
+ struct hlist_node obj_list;
+ /* Head of list of marks for an object [mark ref] */
+ struct fsnotify_mark_connector *connector;
+- /* Events types to ignore [mark->lock, group->mark_mutex] */
+- __u32 ignored_mask;
++ /* Events types and flags to ignore [mark->lock, group->mark_mutex] */
++ __u32 ignore_mask;
+ /* General fsnotify mark flags */
+ #define FSNOTIFY_MARK_FLAG_ALIVE 0x0001
+ #define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002
+@@ -529,6 +529,7 @@ struct fsnotify_mark {
+ /* fanotify mark flags */
+ #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
+ #define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
++#define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS 0x0400
+ unsigned int flags; /* flags [mark->lock] */
+ };
+
+@@ -655,15 +656,91 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
+
+ /* functions used to manipulate the marks attached to inodes */
+
+-/* Get mask for calculating object interest taking ignored mask into account */
++/*
++ * Canonical "ignore mask" including event flags.
++ *
++ * Note the subtle semantic difference from the legacy ->ignored_mask.
++ * ->ignored_mask traditionally only meant which events should be ignored,
++ * while ->ignore_mask also includes flags regarding the type of objects on
++ * which events should be ignored.
++ */
++static inline __u32 fsnotify_ignore_mask(struct fsnotify_mark *mark)
++{
++ __u32 ignore_mask = mark->ignore_mask;
++
++ /* The event flags in ignore mask take effect */
++ if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS)
++ return ignore_mask;
++
++ /*
++ * Legacy behavior:
++ * - Always ignore events on dir
++ * - Ignore events on child if parent is watching children
++ */
++ ignore_mask |= FS_ISDIR;
++ ignore_mask &= ~FS_EVENT_ON_CHILD;
++ ignore_mask |= mark->mask & FS_EVENT_ON_CHILD;
++
++ return ignore_mask;
++}
++
++/* Legacy ignored_mask - only event types to ignore */
++static inline __u32 fsnotify_ignored_events(struct fsnotify_mark *mark)
++{
++ return mark->ignore_mask & ALL_FSNOTIFY_EVENTS;
++}
++
++/*
++ * Check if mask (or ignore mask) should be applied depending if victim is a
++ * directory and whether it is reported to a watching parent.
++ */
++static inline bool fsnotify_mask_applicable(__u32 mask, bool is_dir,
++ int iter_type)
++{
++ /* Should mask be applied to a directory? */
++ if (is_dir && !(mask & FS_ISDIR))
++ return false;
++
++ /* Should mask be applied to a child? */
++ if (iter_type == FSNOTIFY_ITER_TYPE_PARENT &&
++ !(mask & FS_EVENT_ON_CHILD))
++ return false;
++
++ return true;
++}
++
++/*
++ * Effective ignore mask taking into account if event victim is a
++ * directory and whether it is reported to a watching parent.
++ */
++static inline __u32 fsnotify_effective_ignore_mask(struct fsnotify_mark *mark,
++ bool is_dir, int iter_type)
++{
++ __u32 ignore_mask = fsnotify_ignored_events(mark);
++
++ if (!ignore_mask)
++ return 0;
++
++ /* For non-dir and non-child, no need to consult the event flags */
++ if (!is_dir && iter_type != FSNOTIFY_ITER_TYPE_PARENT)
++ return ignore_mask;
++
++ ignore_mask = fsnotify_ignore_mask(mark);
++ if (!fsnotify_mask_applicable(ignore_mask, is_dir, iter_type))
++ return 0;
++
++ return ignore_mask & ALL_FSNOTIFY_EVENTS;
++}
++
++/* Get mask for calculating object interest taking ignore mask into account */
+ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
+ {
+ __u32 mask = mark->mask;
+
+- if (!mark->ignored_mask)
++ if (!fsnotify_ignored_events(mark))
+ return mask;
+
+- /* Interest in FS_MODIFY may be needed for clearing ignored mask */
++ /* Interest in FS_MODIFY may be needed for clearing ignore mask */
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+ mask |= FS_MODIFY;
+
+@@ -671,7 +748,7 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
+ * If mark is interested in ignoring events on children, the object must
+ * show interest in those events for fsnotify_parent() to notice it.
+ */
+- return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS);
++ return mask | mark->ignore_mask;
+ }
+
+ /* Get mask of events for a list of marks */
+--
+2.43.0
+
--- /dev/null
+From 6578f0592a0fdef79de5e1547b5754a3c2ff2f76 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:35 +0200
+Subject: fanotify: record either old name new name or both for FAN_RENAME
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 2bfbcccde6e7a787feabad4645f628f963fe0663 ]
+
+We do not want to report the dirfid+name of a directory whose
+inode/sb are not watched, because watcher may not have permissions
+to see the directory content.
+
+Use an internal iter_info to indicate to fanotify_alloc_event()
+which marks of this group are watching FAN_RENAME, so it can decide
+if we need to record only the old parent+name, new parent+name or both.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-10-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+[JK: Modified code to pass around only mask of mark types matching
+generated event]
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 59 ++++++++++++++++++++++++++---------
+ 1 file changed, 44 insertions(+), 15 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index db81eab905442..14bc0f12cc9f3 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -284,8 +284,9 @@ static int fanotify_get_response(struct fsnotify_group *group,
+ */
+ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ struct fsnotify_iter_info *iter_info,
+- u32 event_mask, const void *data,
+- int data_type, struct inode *dir)
++ u32 *match_mask, u32 event_mask,
++ const void *data, int data_type,
++ struct inode *dir)
+ {
+ __u32 marks_mask = 0, marks_ignored_mask = 0;
+ __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
+@@ -335,6 +336,9 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ continue;
+
+ marks_mask |= mark->mask;
++
++ /* Record the mark types of this group that matched the event */
++ *match_mask |= 1U << type;
+ }
+
+ test_mask = event_mask & marks_mask & ~marks_ignored_mask;
+@@ -701,11 +705,11 @@ static struct fanotify_event *fanotify_alloc_error_event(
+ return &fee->fae;
+ }
+
+-static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+- u32 mask, const void *data,
+- int data_type, struct inode *dir,
+- const struct qstr *file_name,
+- __kernel_fsid_t *fsid)
++static struct fanotify_event *fanotify_alloc_event(
++ struct fsnotify_group *group,
++ u32 mask, const void *data, int data_type,
++ struct inode *dir, const struct qstr *file_name,
++ __kernel_fsid_t *fsid, u32 match_mask)
+ {
+ struct fanotify_event *event = NULL;
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
+@@ -753,13 +757,36 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ }
+
+ /*
+- * In the special case of FAN_RENAME event, we record both
+- * old and new parent+name.
++ * In the special case of FAN_RENAME event, use the match_mask
++ * to determine if we need to report only the old parent+name,
++ * only the new parent+name or both.
+ * 'dirid' and 'file_name' are the old parent+name and
+ * 'moved' has the new parent+name.
+ */
+- if (mask & FAN_RENAME)
+- moved = fsnotify_data_dentry(data, data_type);
++ if (mask & FAN_RENAME) {
++ bool report_old, report_new;
++
++ if (WARN_ON_ONCE(!match_mask))
++ return NULL;
++
++ /* Report both old and new parent+name if sb watching */
++ report_old = report_new =
++ match_mask & (1U << FSNOTIFY_ITER_TYPE_SB);
++ report_old |=
++ match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE);
++ report_new |=
++ match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2);
++
++ if (!report_old) {
++ /* Do not report old parent+name */
++ dirid = NULL;
++ file_name = NULL;
++ }
++ if (report_new) {
++ /* Report new parent+name */
++ moved = fsnotify_data_dentry(data, data_type);
++ }
++ }
+ }
+
+ /*
+@@ -872,6 +899,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ struct fanotify_event *event;
+ struct fsnotify_event *fsn_event;
+ __kernel_fsid_t fsid = {};
++ u32 match_mask = 0;
+
+ BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
+ BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+@@ -897,12 +925,13 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
+
+- mask = fanotify_group_event_mask(group, iter_info, mask, data,
+- data_type, dir);
++ mask = fanotify_group_event_mask(group, iter_info, &match_mask,
++ mask, data, data_type, dir);
+ if (!mask)
+ return 0;
+
+- pr_debug("%s: group=%p mask=%x\n", __func__, group, mask);
++ pr_debug("%s: group=%p mask=%x report_mask=%x\n", __func__,
++ group, mask, match_mask);
+
+ if (fanotify_is_perm_event(mask)) {
+ /*
+@@ -921,7 +950,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ }
+
+ event = fanotify_alloc_event(group, mask, data, data_type, dir,
+- file_name, &fsid);
++ file_name, &fsid, match_mask);
+ ret = -ENOMEM;
+ if (unlikely(!event)) {
+ /*
+--
+2.43.0
+
--- /dev/null
+From 16dc8b7c56eb3a07e4661c25149b47bd21424be7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:34 +0200
+Subject: fanotify: record old and new parent and name in FAN_RENAME event
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 3982534ba5ce45e890b2f5ef5e7372c1accd14c7 ]
+
+In the special case of FAN_RENAME event, we record both the old
+and new parent and name.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-9-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 42 +++++++++++++++++++++++++++++++----
+ include/uapi/linux/fanotify.h | 2 ++
+ 2 files changed, 40 insertions(+), 4 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 5f184b2d6ea7c..db81eab905442 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -592,21 +592,28 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir,
+ __kernel_fsid_t *fsid,
+ const struct qstr *name,
+ struct inode *child,
++ struct dentry *moved,
+ unsigned int *hash,
+ gfp_t gfp)
+ {
+ struct fanotify_name_event *fne;
+ struct fanotify_info *info;
+ struct fanotify_fh *dfh, *ffh;
++ struct inode *dir2 = moved ? d_inode(moved->d_parent) : NULL;
++ const struct qstr *name2 = moved ? &moved->d_name : NULL;
+ unsigned int dir_fh_len = fanotify_encode_fh_len(dir);
++ unsigned int dir2_fh_len = fanotify_encode_fh_len(dir2);
+ unsigned int child_fh_len = fanotify_encode_fh_len(child);
+ unsigned long name_len = name ? name->len : 0;
++ unsigned long name2_len = name2 ? name2->len : 0;
+ unsigned int len, size;
+
+ /* Reserve terminating null byte even for empty name */
+- size = sizeof(*fne) + name_len + 1;
++ size = sizeof(*fne) + name_len + name2_len + 2;
+ if (dir_fh_len)
+ size += FANOTIFY_FH_HDR_LEN + dir_fh_len;
++ if (dir2_fh_len)
++ size += FANOTIFY_FH_HDR_LEN + dir2_fh_len;
+ if (child_fh_len)
+ size += FANOTIFY_FH_HDR_LEN + child_fh_len;
+ fne = kmalloc(size, gfp);
+@@ -623,6 +630,11 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir,
+ len = fanotify_encode_fh(dfh, dir, dir_fh_len, hash, 0);
+ fanotify_info_set_dir_fh(info, len);
+ }
++ if (dir2_fh_len) {
++ dfh = fanotify_info_dir2_fh(info);
++ len = fanotify_encode_fh(dfh, dir2, dir2_fh_len, hash, 0);
++ fanotify_info_set_dir2_fh(info, len);
++ }
+ if (child_fh_len) {
+ ffh = fanotify_info_file_fh(info);
+ len = fanotify_encode_fh(ffh, child, child_fh_len, hash, 0);
+@@ -632,11 +644,22 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir,
+ fanotify_info_copy_name(info, name);
+ *hash ^= full_name_hash((void *)name_len, name->name, name_len);
+ }
++ if (name2_len) {
++ fanotify_info_copy_name2(info, name2);
++ *hash ^= full_name_hash((void *)name2_len, name2->name,
++ name2_len);
++ }
+
+ pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
+ __func__, size, dir_fh_len, child_fh_len,
+ info->name_len, info->name_len, fanotify_info_name(info));
+
++ if (dir2_fh_len) {
++ pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n",
++ __func__, dir2_fh_len, info->name2_len,
++ info->name2_len, fanotify_info_name2(info));
++ }
++
+ return &fne->fae;
+ }
+
+@@ -692,6 +715,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
+ const struct path *path = fsnotify_data_path(data, data_type);
+ struct mem_cgroup *old_memcg;
++ struct dentry *moved = NULL;
+ struct inode *child = NULL;
+ bool name_event = false;
+ unsigned int hash = 0;
+@@ -727,6 +751,15 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) {
+ name_event = true;
+ }
++
++ /*
++ * In the special case of FAN_RENAME event, we record both
++ * old and new parent+name.
++ * 'dirid' and 'file_name' are the old parent+name and
++ * 'moved' has the new parent+name.
++ */
++ if (mask & FAN_RENAME)
++ moved = fsnotify_data_dentry(data, data_type);
+ }
+
+ /*
+@@ -748,9 +781,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ } else if (fanotify_is_error_event(mask)) {
+ event = fanotify_alloc_error_event(group, fsid, data,
+ data_type, &hash);
+- } else if (name_event && (file_name || child)) {
+- event = fanotify_alloc_name_event(id, fsid, file_name, child,
+- &hash, gfp);
++ } else if (name_event && (file_name || moved || child)) {
++ event = fanotify_alloc_name_event(dirid, fsid, file_name, child,
++ moved, &hash, gfp);
+ } else if (fid_mode) {
+ event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
+ } else {
+@@ -860,6 +893,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
+ BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
+ BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
++ BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
+
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
+
+diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
+index 60f73639a896a..9d0e2dc5767b5 100644
+--- a/include/uapi/linux/fanotify.h
++++ b/include/uapi/linux/fanotify.h
+@@ -28,6 +28,8 @@
+
+ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
+
++#define FAN_RENAME 0x10000000 /* File was renamed */
++
+ #define FAN_ONDIR 0x40000000 /* Event occurred against dir */
+
+ /* helper events */
+--
+2.43.0
+
--- /dev/null
+From 7b6c1976f98c9cc3e39a053b35ab8730fe0a5dd8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Jun 2022 20:47:19 +0300
+Subject: fanotify: refine the validation checks on non-dir inode mask
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 8698e3bab4dd7968666e84e111d0bfd17c040e77 ]
+
+Commit ceaf69f8eadc ("fanotify: do not allow setting dirent events in
+mask of non-dir") added restrictions about setting dirent events in the
+mask of a non-dir inode mark, which does not make any sense.
+
+For backward compatibility, these restictions were added only to new
+(v5.17+) APIs.
+
+It also does not make any sense to set the flags FAN_EVENT_ON_CHILD or
+FAN_ONDIR in the mask of a non-dir inode. Add these flags to the
+dir-only restriction of the new APIs as well.
+
+Move the check of the dir-only flags for new APIs into the helper
+fanotify_events_supported(), which is only called for FAN_MARK_ADD,
+because there is no need to error on an attempt to remove the dir-only
+flags from non-dir inode.
+
+Fixes: ceaf69f8eadc ("fanotify: do not allow setting dirent events in mask of non-dir")
+Link: https://lore.kernel.org/linux-fsdevel/20220627113224.kr2725conevh53u4@quack3.lan/
+Link: https://lore.kernel.org/r/20220627174719.2838175-1-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+[ cel: adjusted to apply on v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 30 +++++++++++++++---------------
+ include/linux/fanotify.h | 4 ++++
+ 2 files changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 6db5a0b03a78d..433c89fdcf0cd 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1509,10 +1509,14 @@ static int fanotify_test_fid(struct dentry *dentry)
+ return 0;
+ }
+
+-static int fanotify_events_supported(struct path *path, __u64 mask,
++static int fanotify_events_supported(struct fsnotify_group *group,
++ struct path *path, __u64 mask,
+ unsigned int flags)
+ {
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
++ /* Strict validation of events in non-dir inode mask with v5.17+ APIs */
++ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
++ (mask & FAN_RENAME);
+
+ /*
+ * Some filesystems such as 'proc' acquire unusual locks when opening
+@@ -1540,6 +1544,15 @@ static int fanotify_events_supported(struct path *path, __u64 mask,
+ path->mnt->mnt_sb->s_flags & SB_NOUSER)
+ return -EINVAL;
+
++ /*
++ * We shouldn't have allowed setting dirent events and the directory
++ * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode,
++ * but because we always allowed it, error only when using new APIs.
++ */
++ if (strict_dir_events && mark_type == FAN_MARK_INODE &&
++ !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
++ return -ENOTDIR;
++
+ return 0;
+ }
+
+@@ -1686,7 +1699,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ goto fput_and_out;
+
+ if (flags & FAN_MARK_ADD) {
+- ret = fanotify_events_supported(&path, mask, flags);
++ ret = fanotify_events_supported(group, &path, mask, flags);
+ if (ret)
+ goto path_put_and_out;
+ }
+@@ -1709,19 +1722,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ else
+ mnt = path.mnt;
+
+- /*
+- * FAN_RENAME is not allowed on non-dir (for now).
+- * We shouldn't have allowed setting any dirent events in mask of
+- * non-dir, but because we always allowed it, error only if group
+- * was initialized with the new flag FAN_REPORT_TARGET_FID.
+- */
+- ret = -ENOTDIR;
+- if (inode && !S_ISDIR(inode->i_mode) &&
+- ((mask & FAN_RENAME) ||
+- ((mask & FANOTIFY_DIRENT_EVENTS) &&
+- FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID))))
+- goto path_put_and_out;
+-
+ /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
+ if (mnt || !S_ISDIR(inode->i_mode)) {
+ mask &= ~FAN_EVENT_ON_CHILD;
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index 81f45061c1b18..4f6cbe6c6e235 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -113,6 +113,10 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+ FANOTIFY_PERM_EVENTS | \
+ FAN_Q_OVERFLOW | FAN_ONDIR)
+
++/* Events and flags relevant only for directories */
++#define FANOTIFY_DIRONLY_EVENT_BITS (FANOTIFY_DIRENT_EVENTS | \
++ FAN_EVENT_ON_CHILD | FAN_ONDIR)
++
+ #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \
+ FANOTIFY_EVENT_FLAGS)
+
+--
+2.43.0
+
--- /dev/null
+From 91e3c5efced0504ce4ad02a82f43a42ddb53c4c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Sep 2022 10:30:18 +0800
+Subject: fanotify: Remove obsoleted fanotify_event_has_path()
+
+From: Gaosheng Cui <cuigaosheng1@huawei.com>
+
+[ Upstream commit 7a80bf902d2bc722b4477442ee772e8574603185 ]
+
+All uses of fanotify_event_has_path() have
+been removed since commit 9c61f3b560f5 ("fanotify: break up
+fanotify_alloc_event()"), now it is useless, so remove it.
+
+Link: https://lore.kernel.org/r/20220926023018.1505270-1-cuigaosheng1@huawei.com
+Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+---
+ fs/notify/fanotify/fanotify.h | 6 ------
+ 1 file changed, 6 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index bf6d4d38afa04..57f51a9a3015d 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -452,12 +452,6 @@ static inline bool fanotify_is_error_event(u32 mask)
+ return mask & FAN_FS_ERROR;
+ }
+
+-static inline bool fanotify_event_has_path(struct fanotify_event *event)
+-{
+- return event->type == FANOTIFY_EVENT_TYPE_PATH ||
+- event->type == FANOTIFY_EVENT_TYPE_PATH_PERM;
+-}
+-
+ static inline const struct path *fanotify_event_path(struct fanotify_event *event)
+ {
+ if (event->type == FANOTIFY_EVENT_TYPE_PATH)
+--
+2.43.0
+
--- /dev/null
+From bc93efe0f4a746c87be035a132919771fc82412a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:41 -0300
+Subject: fanotify: Report fid info for file related file system errors
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 936d6a38be39177495af38497bf8da1c6128fa1b ]
+
+Plumb the pieces to add a FID report to error records. Since all error
+event memory must be pre-allocated, we pre-allocate the maximum file
+handle size possible, such that it should always fit.
+
+For errors that don't expose a file handle, report it with an invalid
+FID. Internally we use zero-length FILEID_ROOT file handle for passing
+the information (which we report as zero-length FILEID_INVALID file
+handle to userspace) so we update the handle reporting code to deal with
+this case correctly.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-27-krisman@collabora.com
+Link: https://lore.kernel.org/r/20211025192746.66445-25-krisman@collabora.com
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+[Folded two patches into 2 to make series bisectable]
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 11 +++++++++++
+ fs/notify/fanotify/fanotify.h | 9 +++++++++
+ fs/notify/fanotify/fanotify_user.c | 8 +++++---
+ 3 files changed, 25 insertions(+), 3 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 45df610debbe4..465f07e70e6dc 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -609,7 +609,9 @@ static struct fanotify_event *fanotify_alloc_error_event(
+ {
+ struct fs_error_report *report =
+ fsnotify_data_error_report(data, data_type);
++ struct inode *inode;
+ struct fanotify_error_event *fee;
++ int fh_len;
+
+ if (WARN_ON_ONCE(!report))
+ return NULL;
+@@ -622,6 +624,15 @@ static struct fanotify_event *fanotify_alloc_error_event(
+ fee->err_count = 1;
+ fee->fsid = *fsid;
+
++ inode = report->inode;
++ fh_len = fanotify_encode_fh_len(inode);
++
++ /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */
++ if (!fh_len && inode)
++ inode = NULL;
++
++ fanotify_encode_fh(&fee->object_fh, inode, fh_len, NULL, 0);
++
+ *hash ^= fanotify_hash_fsid(fsid);
+
+ return &fee->fae;
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 80af269eebb89..edd7587adcc59 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -208,6 +208,8 @@ struct fanotify_error_event {
+ u32 err_count; /* Suppressed errors count */
+
+ __kernel_fsid_t fsid; /* FSID this error refers to. */
++
++ FANOTIFY_INLINE_FH(object_fh, MAX_HANDLE_SZ);
+ };
+
+ static inline struct fanotify_error_event *
+@@ -222,6 +224,8 @@ static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
+ return &FANOTIFY_FE(event)->fsid;
+ else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
+ return &FANOTIFY_NE(event)->fsid;
++ else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
++ return &FANOTIFY_EE(event)->fsid;
+ else
+ return NULL;
+ }
+@@ -233,6 +237,8 @@ static inline struct fanotify_fh *fanotify_event_object_fh(
+ return &FANOTIFY_FE(event)->object_fh;
+ else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
+ return fanotify_info_file_fh(&FANOTIFY_NE(event)->info);
++ else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
++ return &FANOTIFY_EE(event)->object_fh;
+ else
+ return NULL;
+ }
+@@ -266,6 +272,9 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event)
+
+ static inline bool fanotify_event_has_object_fh(struct fanotify_event *event)
+ {
++ /* For error events, even zeroed fh are reported. */
++ if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
++ return true;
+ return fanotify_event_object_fh_len(event) > 0;
+ }
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index c053038e1cf3c..fa3dac9c59f69 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -339,9 +339,6 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+ pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n",
+ __func__, fh_len, name_len, info_len, count);
+
+- if (!fh_len)
+- return 0;
+-
+ if (WARN_ON_ONCE(len < sizeof(info) || len > count))
+ return -EFAULT;
+
+@@ -376,6 +373,11 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+
+ handle.handle_type = fh->type;
+ handle.handle_bytes = fh_len;
++
++ /* Mangle handle_type for bad file_handle */
++ if (!fh_len)
++ handle.handle_type = FILEID_INVALID;
++
+ if (copy_to_user(buf, &handle, sizeof(handle)))
+ return -EFAULT;
+
+--
+2.43.0
+
--- /dev/null
+From b701f4c62f509cb661227b93d41d94fa3ded5283 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:36 +0200
+Subject: fanotify: report old and/or new parent+name in FAN_RENAME event
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 7326e382c21e9c23c89c88369afdc90b82a14da8 ]
+
+In the special case of FAN_RENAME event, we report old or new or both
+old and new parent+name.
+
+A single info record will be reported if either the old or new dir
+is watched and two records will be reported if both old and new dir
+(or their filesystem) are watched.
+
+The old and new parent+name are reported using new info record types
+FAN_EVENT_INFO_TYPE_{OLD,NEW}_DFID_NAME, so if a single info record
+is reported, it is clear to the application, to which dir entry the
+fid+name info is referring to.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-11-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 7 ++++
+ fs/notify/fanotify/fanotify.h | 18 +++++++++++
+ fs/notify/fanotify/fanotify_user.c | 52 +++++++++++++++++++++++++++---
+ include/uapi/linux/fanotify.h | 6 ++++
+ 4 files changed, 78 insertions(+), 5 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 14bc0f12cc9f3..0da305b6f3e2f 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -153,6 +153,13 @@ static bool fanotify_should_merge(struct fanotify_event *old,
+ if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR))
+ return false;
+
++ /*
++ * FAN_RENAME event is reported with special info record types,
++ * so we cannot merge it with other events.
++ */
++ if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME))
++ return false;
++
+ switch (old->type) {
+ case FANOTIFY_EVENT_TYPE_PATH:
+ return fanotify_path_equal(fanotify_event_path(old),
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 8fa3bc0effd45..a3d5b751cac5b 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -373,6 +373,13 @@ static inline int fanotify_event_dir_fh_len(struct fanotify_event *event)
+ return info ? fanotify_info_dir_fh_len(info) : 0;
+ }
+
++static inline int fanotify_event_dir2_fh_len(struct fanotify_event *event)
++{
++ struct fanotify_info *info = fanotify_event_info(event);
++
++ return info ? fanotify_info_dir2_fh_len(info) : 0;
++}
++
+ static inline bool fanotify_event_has_object_fh(struct fanotify_event *event)
+ {
+ /* For error events, even zeroed fh are reported. */
+@@ -386,6 +393,17 @@ static inline bool fanotify_event_has_dir_fh(struct fanotify_event *event)
+ return fanotify_event_dir_fh_len(event) > 0;
+ }
+
++static inline bool fanotify_event_has_dir2_fh(struct fanotify_event *event)
++{
++ return fanotify_event_dir2_fh_len(event) > 0;
++}
++
++static inline bool fanotify_event_has_any_dir_fh(struct fanotify_event *event)
++{
++ return fanotify_event_has_dir_fh(event) ||
++ fanotify_event_has_dir2_fh(event);
++}
++
+ struct fanotify_path_event {
+ struct fanotify_event fae;
+ struct path path;
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 526e3b5a98f34..b3ac2d877e1ee 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -129,12 +129,29 @@ static int fanotify_fid_info_len(int fh_len, int name_len)
+ FANOTIFY_EVENT_ALIGN);
+ }
+
++/* FAN_RENAME may have one or two dir+name info records */
++static int fanotify_dir_name_info_len(struct fanotify_event *event)
++{
++ struct fanotify_info *info = fanotify_event_info(event);
++ int dir_fh_len = fanotify_event_dir_fh_len(event);
++ int dir2_fh_len = fanotify_event_dir2_fh_len(event);
++ int info_len = 0;
++
++ if (dir_fh_len)
++ info_len += fanotify_fid_info_len(dir_fh_len,
++ info->name_len);
++ if (dir2_fh_len)
++ info_len += fanotify_fid_info_len(dir2_fh_len,
++ info->name2_len);
++
++ return info_len;
++}
++
+ static size_t fanotify_event_len(unsigned int info_mode,
+ struct fanotify_event *event)
+ {
+ size_t event_len = FAN_EVENT_METADATA_LEN;
+ struct fanotify_info *info;
+- int dir_fh_len;
+ int fh_len;
+ int dot_len = 0;
+
+@@ -146,9 +163,8 @@ static size_t fanotify_event_len(unsigned int info_mode,
+
+ info = fanotify_event_info(event);
+
+- if (fanotify_event_has_dir_fh(event)) {
+- dir_fh_len = fanotify_event_dir_fh_len(event);
+- event_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
++ if (fanotify_event_has_any_dir_fh(event)) {
++ event_len += fanotify_dir_name_info_len(event);
+ } else if ((info_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
+ /*
+@@ -379,6 +395,8 @@ static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+ return -EFAULT;
+ break;
+ case FAN_EVENT_INFO_TYPE_DFID_NAME:
++ case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME:
++ case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME:
+ if (WARN_ON_ONCE(!name || !name_len))
+ return -EFAULT;
+ break;
+@@ -478,11 +496,19 @@ static int copy_info_records_to_user(struct fanotify_event *event,
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
+
+ /*
+- * Event info records order is as follows: dir fid + name, child fid.
++ * Event info records order is as follows:
++ * 1. dir fid + name
++ * 2. (optional) new dir fid + new name
++ * 3. (optional) child fid
+ */
+ if (fanotify_event_has_dir_fh(event)) {
+ info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
+ FAN_EVENT_INFO_TYPE_DFID;
++
++ /* FAN_RENAME uses special info types */
++ if (event->mask & FAN_RENAME)
++ info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME;
++
+ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
+ fanotify_info_dir_fh(info),
+ info_type,
+@@ -496,6 +522,22 @@ static int copy_info_records_to_user(struct fanotify_event *event,
+ total_bytes += ret;
+ }
+
++ /* New dir fid+name may be reported in addition to old dir fid+name */
++ if (fanotify_event_has_dir2_fh(event)) {
++ info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME;
++ ret = copy_fid_info_to_user(fanotify_event_fsid(event),
++ fanotify_info_dir2_fh(info),
++ info_type,
++ fanotify_info_name2(info),
++ info->name2_len, buf, count);
++ if (ret < 0)
++ return ret;
++
++ buf += ret;
++ count -= ret;
++ total_bytes += ret;
++ }
++
+ if (fanotify_event_has_object_fh(event)) {
+ const char *dot = NULL;
+ int dot_len = 0;
+diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
+index 9d0e2dc5767b5..e8ac38cc2fd6d 100644
+--- a/include/uapi/linux/fanotify.h
++++ b/include/uapi/linux/fanotify.h
+@@ -134,6 +134,12 @@ struct fanotify_event_metadata {
+ #define FAN_EVENT_INFO_TYPE_PIDFD 4
+ #define FAN_EVENT_INFO_TYPE_ERROR 5
+
++/* Special info types for FAN_RENAME */
++#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
++/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */
++#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12
++/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */
++
+ /* Variable length info record following event metadata */
+ struct fanotify_event_info_header {
+ __u8 info_type;
+--
+2.43.0
+
--- /dev/null
+From 153286bd06f3a924ae83af5b4b477c1bfe223fb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:31 -0300
+Subject: fanotify: Require fid_mode for any non-fd event
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 4fe595cf1c80e7a5af4d00c4da29def64aff57a2 ]
+
+Like inode events, FAN_FS_ERROR will require fid mode. Therefore,
+convert the verification during fanotify_mark(2) to require fid for any
+non-fd event. This means fid_mode will not only be required for inode
+events, but for any event that doesn't provide a descriptor.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-17-krisman@collabora.com
+Suggested-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 12 ++++++------
+ include/linux/fanotify.h | 3 +++
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 8cf8e63a2c3e8..8598b00f7e9c8 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1476,14 +1476,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ goto fput_and_out;
+
+ /*
+- * Events with data type inode do not carry enough information to report
+- * event->fd, so we do not allow setting a mask for inode events unless
+- * group supports reporting fid.
+- * inode events are not supported on a mount mark, because they do not
+- * carry enough information (i.e. path) to be filtered by mount point.
++ * Events that do not carry enough information to report
++ * event->fd require a group that supports reporting fid. Those
++ * events are not supported on a mount mark, because they do not
++ * carry enough information (i.e. path) to be filtered by mount
++ * point.
+ */
+ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+- if (mask & FANOTIFY_INODE_EVENTS &&
++ if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
+ (!fid_mode || mark_type == FAN_MARK_MOUNT))
+ goto fput_and_out;
+
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index eec3b7c408115..52d464802d99f 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -84,6 +84,9 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+ */
+ #define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE)
+
++/* Events that can be reported with event->fd */
++#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
++
+ /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */
+ #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \
+ FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
+--
+2.43.0
+
--- /dev/null
+From fa8d490d64e8407d9b5b99bc482eac35dc77fcbc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:33 -0300
+Subject: fanotify: Reserve UAPI bits for FAN_FS_ERROR
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 8d11a4f43ef4679be0908026907a7613b33d7127 ]
+
+FAN_FS_ERROR allows reporting of event type FS_ERROR to userspace, which
+is a mechanism to report file system wide problems via fanotify. This
+commit preallocate userspace visible bits to match the FS_ERROR event.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-19-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 1 +
+ include/uapi/linux/fanotify.h | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index c64d61b673caf..8f152445d75c4 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -752,6 +752,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
+ BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
+ BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
++ BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
+
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
+
+diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
+index 64553df9d7350..2990731ddc8bc 100644
+--- a/include/uapi/linux/fanotify.h
++++ b/include/uapi/linux/fanotify.h
+@@ -20,6 +20,7 @@
+ #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */
+
+ #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
++#define FAN_FS_ERROR 0x00008000 /* Filesystem error */
+
+ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
+ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
+--
+2.43.0
+
--- /dev/null
+From 31d4d90185c99b8856cba30e84c926750b343221 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:21 -0300
+Subject: fanotify: Split fsid check from other fid mode checks
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 8299212cbdb01a5867e230e961f82e5c02a6de34 ]
+
+FAN_FS_ERROR will require fsid, but not necessarily require the
+filesystem to expose a file handle. Split those checks into different
+functions, so they can be used separately when setting up an event.
+
+While there, update a comment about tmpfs having 0 fsid, which is no
+longer true.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-7-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 27 ++++++++++++++++++---------
+ 1 file changed, 18 insertions(+), 9 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 20b743b05b997..8cf8e63a2c3e8 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1300,16 +1300,15 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
+ return fd;
+ }
+
+-/* Check if filesystem can encode a unique fid */
+-static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
++static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
+ {
+ __kernel_fsid_t root_fsid;
+ int err;
+
+ /*
+- * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
++ * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
+ */
+- err = vfs_get_fsid(path->dentry, fsid);
++ err = vfs_get_fsid(dentry, fsid);
+ if (err)
+ return err;
+
+@@ -1317,10 +1316,10 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
+ return -ENODEV;
+
+ /*
+- * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
++ * Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
+ * which uses a different fsid than sb root.
+ */
+- err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid);
++ err = vfs_get_fsid(dentry->d_sb->s_root, &root_fsid);
+ if (err)
+ return err;
+
+@@ -1328,6 +1327,12 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
+ root_fsid.val[1] != fsid->val[1])
+ return -EXDEV;
+
++ return 0;
++}
++
++/* Check if filesystem can encode a unique fid */
++static int fanotify_test_fid(struct dentry *dentry)
++{
+ /*
+ * We need to make sure that the file system supports at least
+ * encoding a file handle so user can use name_to_handle_at() to
+@@ -1335,8 +1340,8 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
+ * objects. However, name_to_handle_at() requires that the
+ * filesystem also supports decoding file handles.
+ */
+- if (!path->dentry->d_sb->s_export_op ||
+- !path->dentry->d_sb->s_export_op->fh_to_dentry)
++ if (!dentry->d_sb->s_export_op ||
++ !dentry->d_sb->s_export_op->fh_to_dentry)
+ return -EOPNOTSUPP;
+
+ return 0;
+@@ -1505,7 +1510,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ }
+
+ if (fid_mode) {
+- ret = fanotify_test_fid(&path, &__fsid);
++ ret = fanotify_test_fsid(path.dentry, &__fsid);
++ if (ret)
++ goto path_put_and_out;
++
++ ret = fanotify_test_fid(path.dentry);
+ if (ret)
+ goto path_put_and_out;
+
+--
+2.43.0
+
--- /dev/null
+From 881f1247e6c4d246f1193f06ed990a9d9a134aeb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:35 -0300
+Subject: fanotify: Support enqueueing of error events
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 83e9acbe13dc1b767f91b5c1350f7a65689b26f6 ]
+
+Once an error event is triggered, enqueue it in the notification group,
+similarly to what is done for other events. FAN_FS_ERROR is not
+handled specially, since the memory is now handled by a preallocated
+mempool.
+
+For now, make the event unhashed. A future patch implements merging of
+this kind of event.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-21-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 35 +++++++++++++++++++++++++++++++++++
+ fs/notify/fanotify/fanotify.h | 6 ++++++
+ 2 files changed, 41 insertions(+)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 01d68dfc74aa2..1f195c95dfcd0 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -574,6 +574,27 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
+ return &fne->fae;
+ }
+
++static struct fanotify_event *fanotify_alloc_error_event(
++ struct fsnotify_group *group,
++ __kernel_fsid_t *fsid,
++ const void *data, int data_type)
++{
++ struct fs_error_report *report =
++ fsnotify_data_error_report(data, data_type);
++ struct fanotify_error_event *fee;
++
++ if (WARN_ON_ONCE(!report))
++ return NULL;
++
++ fee = mempool_alloc(&group->fanotify_data.error_events_pool, GFP_NOFS);
++ if (!fee)
++ return NULL;
++
++ fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
++
++ return &fee->fae;
++}
++
+ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ u32 mask, const void *data,
+ int data_type, struct inode *dir,
+@@ -641,6 +662,9 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+
+ if (fanotify_is_perm_event(mask)) {
+ event = fanotify_alloc_perm_event(path, gfp);
++ } else if (fanotify_is_error_event(mask)) {
++ event = fanotify_alloc_error_event(group, fsid, data,
++ data_type);
+ } else if (name_event && (file_name || child)) {
+ event = fanotify_alloc_name_event(id, fsid, file_name, child,
+ &hash, gfp);
+@@ -850,6 +874,14 @@ static void fanotify_free_name_event(struct fanotify_event *event)
+ kfree(FANOTIFY_NE(event));
+ }
+
++static void fanotify_free_error_event(struct fsnotify_group *group,
++ struct fanotify_event *event)
++{
++ struct fanotify_error_event *fee = FANOTIFY_EE(event);
++
++ mempool_free(fee, &group->fanotify_data.error_events_pool);
++}
++
+ static void fanotify_free_event(struct fsnotify_group *group,
+ struct fsnotify_event *fsn_event)
+ {
+@@ -873,6 +905,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
+ case FANOTIFY_EVENT_TYPE_OVERFLOW:
+ kfree(event);
+ break;
++ case FANOTIFY_EVENT_TYPE_FS_ERROR:
++ fanotify_free_error_event(group, event);
++ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index a577e87fac2b4..ebef952481fa0 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -298,6 +298,11 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
+ return container_of(fse, struct fanotify_event, fse);
+ }
+
++static inline bool fanotify_is_error_event(u32 mask)
++{
++ return mask & FAN_FS_ERROR;
++}
++
+ static inline bool fanotify_event_has_path(struct fanotify_event *event)
+ {
+ return event->type == FANOTIFY_EVENT_TYPE_PATH ||
+@@ -327,6 +332,7 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
+ static inline bool fanotify_is_hashed_event(u32 mask)
+ {
+ return !(fanotify_is_perm_event(mask) ||
++ fanotify_is_error_event(mask) ||
+ fsnotify_is_overflow_event(mask));
+ }
+
+--
+2.43.0
+
--- /dev/null
+From ca61ab217c23b1cbba3712e9b0e2195fb82d7c42 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:36 -0300
+Subject: fanotify: Support merging of error events
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 8a6ae64132fd27a944faed7bc38484827609eb76 ]
+
+Error events (FAN_FS_ERROR) against the same file system can be merged
+by simply iterating the error count. The hash is taken from the fsid,
+without considering the FH. This means that only the first error object
+is reported.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-22-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 26 ++++++++++++++++++++++++--
+ fs/notify/fanotify/fanotify.h | 4 +++-
+ 2 files changed, 27 insertions(+), 3 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 1f195c95dfcd0..cedcb15468043 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -111,6 +111,16 @@ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
+ return fanotify_info_equal(info1, info2);
+ }
+
++static bool fanotify_error_event_equal(struct fanotify_error_event *fee1,
++ struct fanotify_error_event *fee2)
++{
++ /* Error events against the same file system are always merged. */
++ if (!fanotify_fsid_equal(&fee1->fsid, &fee2->fsid))
++ return false;
++
++ return true;
++}
++
+ static bool fanotify_should_merge(struct fanotify_event *old,
+ struct fanotify_event *new)
+ {
+@@ -141,6 +151,9 @@ static bool fanotify_should_merge(struct fanotify_event *old,
+ case FANOTIFY_EVENT_TYPE_FID_NAME:
+ return fanotify_name_event_equal(FANOTIFY_NE(old),
+ FANOTIFY_NE(new));
++ case FANOTIFY_EVENT_TYPE_FS_ERROR:
++ return fanotify_error_event_equal(FANOTIFY_EE(old),
++ FANOTIFY_EE(new));
+ default:
+ WARN_ON_ONCE(1);
+ }
+@@ -176,6 +189,10 @@ static int fanotify_merge(struct fsnotify_group *group,
+ break;
+ if (fanotify_should_merge(old, new)) {
+ old->mask |= new->mask;
++
++ if (fanotify_is_error_event(old->mask))
++ FANOTIFY_EE(old)->err_count++;
++
+ return 1;
+ }
+ }
+@@ -577,7 +594,8 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
+ static struct fanotify_event *fanotify_alloc_error_event(
+ struct fsnotify_group *group,
+ __kernel_fsid_t *fsid,
+- const void *data, int data_type)
++ const void *data, int data_type,
++ unsigned int *hash)
+ {
+ struct fs_error_report *report =
+ fsnotify_data_error_report(data, data_type);
+@@ -591,6 +609,10 @@ static struct fanotify_event *fanotify_alloc_error_event(
+ return NULL;
+
+ fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
++ fee->err_count = 1;
++ fee->fsid = *fsid;
++
++ *hash ^= fanotify_hash_fsid(fsid);
+
+ return &fee->fae;
+ }
+@@ -664,7 +686,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ event = fanotify_alloc_perm_event(path, gfp);
+ } else if (fanotify_is_error_event(mask)) {
+ event = fanotify_alloc_error_event(group, fsid, data,
+- data_type);
++ data_type, &hash);
+ } else if (name_event && (file_name || child)) {
+ event = fanotify_alloc_name_event(id, fsid, file_name, child,
+ &hash, gfp);
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index ebef952481fa0..2b032b79d5b06 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -199,6 +199,9 @@ FANOTIFY_NE(struct fanotify_event *event)
+
+ struct fanotify_error_event {
+ struct fanotify_event fae;
++ u32 err_count; /* Suppressed errors count */
++
++ __kernel_fsid_t fsid; /* FSID this error refers to. */
+ };
+
+ static inline struct fanotify_error_event *
+@@ -332,7 +335,6 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
+ static inline bool fanotify_is_hashed_event(u32 mask)
+ {
+ return !(fanotify_is_perm_event(mask) ||
+- fanotify_is_error_event(mask) ||
+ fsnotify_is_overflow_event(mask));
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 4c14e8ef2484b86cee2467ce880b9e91166510b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:28 -0300
+Subject: fanotify: Support null inode event in fanotify_dfid_inode
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 12f47bf0f0990933d95d021d13d31bda010648fd ]
+
+FAN_FS_ERROR doesn't support DFID, but this function is still called for
+every event. The problem is that it is not capable of handling null
+inodes, which now can happen in case of superblock error events. For
+this case, just returning dir will be enough.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-14-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index c620b4f6fe123..397ee623ff1e8 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -452,7 +452,7 @@ static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data,
+ if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
+ return dir;
+
+- if (S_ISDIR(inode->i_mode))
++ if (inode && S_ISDIR(inode->i_mode))
+ return inode;
+
+ return dir;
+--
+2.43.0
+
--- /dev/null
+From 62ad6373c8071bd3ad392b8ebb1a2ddd811a45b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:33 +0200
+Subject: fanotify: support secondary dir fh and name in fanotify_info
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 3cf984e950c1c3f41d407ed31db33beb996be132 ]
+
+Allow storing a secondary dir fh and name tupple in fanotify_info.
+This will be used to store the new parent and name information in
+FAN_RENAME event.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-8-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 20 ++++++--
+ fs/notify/fanotify/fanotify.h | 79 +++++++++++++++++++++++++++---
+ fs/notify/fanotify/fanotify_user.c | 3 +-
+ 3 files changed, 88 insertions(+), 14 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 2b13c79cebc62..5f184b2d6ea7c 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -76,8 +76,10 @@ static bool fanotify_info_equal(struct fanotify_info *info1,
+ struct fanotify_info *info2)
+ {
+ if (info1->dir_fh_totlen != info2->dir_fh_totlen ||
++ info1->dir2_fh_totlen != info2->dir2_fh_totlen ||
+ info1->file_fh_totlen != info2->file_fh_totlen ||
+- info1->name_len != info2->name_len)
++ info1->name_len != info2->name_len ||
++ info1->name2_len != info2->name2_len)
+ return false;
+
+ if (info1->dir_fh_totlen &&
+@@ -85,14 +87,24 @@ static bool fanotify_info_equal(struct fanotify_info *info1,
+ fanotify_info_dir_fh(info2)))
+ return false;
+
++ if (info1->dir2_fh_totlen &&
++ !fanotify_fh_equal(fanotify_info_dir2_fh(info1),
++ fanotify_info_dir2_fh(info2)))
++ return false;
++
+ if (info1->file_fh_totlen &&
+ !fanotify_fh_equal(fanotify_info_file_fh(info1),
+ fanotify_info_file_fh(info2)))
+ return false;
+
+- return !info1->name_len ||
+- !memcmp(fanotify_info_name(info1), fanotify_info_name(info2),
+- info1->name_len);
++ if (info1->name_len &&
++ memcmp(fanotify_info_name(info1), fanotify_info_name(info2),
++ info1->name_len))
++ return false;
++
++ return !info1->name2_len ||
++ !memcmp(fanotify_info_name2(info1), fanotify_info_name2(info2),
++ info1->name2_len);
+ }
+
+ static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 7ac6f9f1e4148..8fa3bc0effd45 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -40,31 +40,45 @@ struct fanotify_fh {
+ struct fanotify_info {
+ /* size of dir_fh/file_fh including fanotify_fh hdr size */
+ u8 dir_fh_totlen;
++ u8 dir2_fh_totlen;
+ u8 file_fh_totlen;
+ u8 name_len;
+- u8 pad;
++ u8 name2_len;
++ u8 pad[3];
+ unsigned char buf[];
+ /*
+ * (struct fanotify_fh) dir_fh starts at buf[0]
+- * (optional) file_fh starts at buf[dir_fh_totlen]
+- * name starts at buf[dir_fh_totlen + file_fh_totlen]
++ * (optional) dir2_fh starts at buf[dir_fh_totlen]
++ * (optional) file_fh starts at buf[dir_fh_totlen + dir2_fh_totlen]
++ * name starts at buf[dir_fh_totlen + dir2_fh_totlen + file_fh_totlen]
++ * ...
+ */
+ #define FANOTIFY_DIR_FH_SIZE(info) ((info)->dir_fh_totlen)
++#define FANOTIFY_DIR2_FH_SIZE(info) ((info)->dir2_fh_totlen)
+ #define FANOTIFY_FILE_FH_SIZE(info) ((info)->file_fh_totlen)
+ #define FANOTIFY_NAME_SIZE(info) ((info)->name_len + 1)
++#define FANOTIFY_NAME2_SIZE(info) ((info)->name2_len + 1)
+
+ #define FANOTIFY_DIR_FH_OFFSET(info) 0
+-#define FANOTIFY_FILE_FH_OFFSET(info) \
++#define FANOTIFY_DIR2_FH_OFFSET(info) \
+ (FANOTIFY_DIR_FH_OFFSET(info) + FANOTIFY_DIR_FH_SIZE(info))
++#define FANOTIFY_FILE_FH_OFFSET(info) \
++ (FANOTIFY_DIR2_FH_OFFSET(info) + FANOTIFY_DIR2_FH_SIZE(info))
+ #define FANOTIFY_NAME_OFFSET(info) \
+ (FANOTIFY_FILE_FH_OFFSET(info) + FANOTIFY_FILE_FH_SIZE(info))
++#define FANOTIFY_NAME2_OFFSET(info) \
++ (FANOTIFY_NAME_OFFSET(info) + FANOTIFY_NAME_SIZE(info))
+
+ #define FANOTIFY_DIR_FH_BUF(info) \
+ ((info)->buf + FANOTIFY_DIR_FH_OFFSET(info))
++#define FANOTIFY_DIR2_FH_BUF(info) \
++ ((info)->buf + FANOTIFY_DIR2_FH_OFFSET(info))
+ #define FANOTIFY_FILE_FH_BUF(info) \
+ ((info)->buf + FANOTIFY_FILE_FH_OFFSET(info))
+ #define FANOTIFY_NAME_BUF(info) \
+ ((info)->buf + FANOTIFY_NAME_OFFSET(info))
++#define FANOTIFY_NAME2_BUF(info) \
++ ((info)->buf + FANOTIFY_NAME2_OFFSET(info))
+ } __aligned(4);
+
+ static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh)
+@@ -106,6 +120,20 @@ static inline struct fanotify_fh *fanotify_info_dir_fh(struct fanotify_info *inf
+ return (struct fanotify_fh *)FANOTIFY_DIR_FH_BUF(info);
+ }
+
++static inline int fanotify_info_dir2_fh_len(struct fanotify_info *info)
++{
++ if (!info->dir2_fh_totlen ||
++ WARN_ON_ONCE(info->dir2_fh_totlen < FANOTIFY_FH_HDR_LEN))
++ return 0;
++
++ return info->dir2_fh_totlen - FANOTIFY_FH_HDR_LEN;
++}
++
++static inline struct fanotify_fh *fanotify_info_dir2_fh(struct fanotify_info *info)
++{
++ return (struct fanotify_fh *)FANOTIFY_DIR2_FH_BUF(info);
++}
++
+ static inline int fanotify_info_file_fh_len(struct fanotify_info *info)
+ {
+ if (!info->file_fh_totlen ||
+@@ -128,31 +156,55 @@ static inline char *fanotify_info_name(struct fanotify_info *info)
+ return FANOTIFY_NAME_BUF(info);
+ }
+
++static inline char *fanotify_info_name2(struct fanotify_info *info)
++{
++ if (!info->name2_len)
++ return NULL;
++
++ return FANOTIFY_NAME2_BUF(info);
++}
++
+ static inline void fanotify_info_init(struct fanotify_info *info)
+ {
+ BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN + MAX_HANDLE_SZ > U8_MAX);
+ BUILD_BUG_ON(NAME_MAX > U8_MAX);
+
+ info->dir_fh_totlen = 0;
++ info->dir2_fh_totlen = 0;
+ info->file_fh_totlen = 0;
+ info->name_len = 0;
++ info->name2_len = 0;
+ }
+
+ /* These set/copy helpers MUST be called by order */
+ static inline void fanotify_info_set_dir_fh(struct fanotify_info *info,
+ unsigned int totlen)
+ {
+- if (WARN_ON_ONCE(info->file_fh_totlen > 0) ||
+- WARN_ON_ONCE(info->name_len > 0))
++ if (WARN_ON_ONCE(info->dir2_fh_totlen > 0) ||
++ WARN_ON_ONCE(info->file_fh_totlen > 0) ||
++ WARN_ON_ONCE(info->name_len > 0) ||
++ WARN_ON_ONCE(info->name2_len > 0))
+ return;
+
+ info->dir_fh_totlen = totlen;
+ }
+
++static inline void fanotify_info_set_dir2_fh(struct fanotify_info *info,
++ unsigned int totlen)
++{
++ if (WARN_ON_ONCE(info->file_fh_totlen > 0) ||
++ WARN_ON_ONCE(info->name_len > 0) ||
++ WARN_ON_ONCE(info->name2_len > 0))
++ return;
++
++ info->dir2_fh_totlen = totlen;
++}
++
+ static inline void fanotify_info_set_file_fh(struct fanotify_info *info,
+ unsigned int totlen)
+ {
+- if (WARN_ON_ONCE(info->name_len > 0))
++ if (WARN_ON_ONCE(info->name_len > 0) ||
++ WARN_ON_ONCE(info->name2_len > 0))
+ return;
+
+ info->file_fh_totlen = totlen;
+@@ -161,13 +213,24 @@ static inline void fanotify_info_set_file_fh(struct fanotify_info *info,
+ static inline void fanotify_info_copy_name(struct fanotify_info *info,
+ const struct qstr *name)
+ {
+- if (WARN_ON_ONCE(name->len > NAME_MAX))
++ if (WARN_ON_ONCE(name->len > NAME_MAX) ||
++ WARN_ON_ONCE(info->name2_len > 0))
+ return;
+
+ info->name_len = name->len;
+ strcpy(fanotify_info_name(info), name->name);
+ }
+
++static inline void fanotify_info_copy_name2(struct fanotify_info *info,
++ const struct qstr *name)
++{
++ if (WARN_ON_ONCE(name->len > NAME_MAX))
++ return;
++
++ info->name2_len = name->len;
++ strcpy(fanotify_info_name2(info), name->name);
++}
++
+ /*
+ * Common structure for fanotify events. Concrete structs are allocated in
+ * fanotify_handle_event() and freed when the information is retrieved by
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index bfafda0447ea7..526e3b5a98f34 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -332,11 +332,10 @@ static int process_access_response(struct fsnotify_group *group,
+ static size_t copy_error_info_to_user(struct fanotify_event *event,
+ char __user *buf, int count)
+ {
+- struct fanotify_event_info_error info;
++ struct fanotify_event_info_error info = { };
+ struct fanotify_error_event *fee = FANOTIFY_EE(event);
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR;
+- info.hdr.pad = 0;
+ info.hdr.len = FANOTIFY_ERROR_INFO_LEN;
+
+ if (WARN_ON(count < info.hdr.len))
+--
+2.43.0
+
--- /dev/null
+From a456859220c5b08a70d676535244407868bd14e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:26 +0300
+Subject: fanotify: use fsnotify group lock helpers
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit e79719a2ca5c61912c0493bc1367db52759cf6fd ]
+
+Direct reclaim from fanotify mark allocation context may try to evict
+inodes with evictable marks of the same group and hit this deadlock:
+
+[<0>] fsnotify_destroy_mark+0x1f/0x3a
+[<0>] fsnotify_destroy_marks+0x71/0xd9
+[<0>] __destroy_inode+0x24/0x7e
+[<0>] destroy_inode+0x2c/0x67
+[<0>] dispose_list+0x49/0x68
+[<0>] prune_icache_sb+0x5b/0x79
+[<0>] super_cache_scan+0x11c/0x16f
+[<0>] shrink_slab.constprop.0+0x23e/0x40f
+[<0>] shrink_node+0x218/0x3e7
+[<0>] do_try_to_free_pages+0x12a/0x2d2
+[<0>] try_to_free_pages+0x166/0x242
+[<0>] __alloc_pages_slowpath.constprop.0+0x30c/0x903
+[<0>] __alloc_pages+0xeb/0x1c7
+[<0>] cache_grow_begin+0x6f/0x31e
+[<0>] fallback_alloc+0xe0/0x12d
+[<0>] ____cache_alloc_node+0x15a/0x17e
+[<0>] kmem_cache_alloc_trace+0xa1/0x143
+[<0>] fanotify_add_mark+0xd5/0x2b2
+[<0>] do_fanotify_mark+0x566/0x5eb
+[<0>] __x64_sys_fanotify_mark+0x21/0x24
+[<0>] do_syscall_64+0x6d/0x80
+[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Set the FSNOTIFY_GROUP_NOFS flag to prevent going into direct reclaim
+from allocations under fanotify group lock and use the safe group lock
+helpers.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-16-amir73il@gmail.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220321112310.vpr7oxro2xkz5llh@quack3.lan/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 9bb182dc3f9b3..b4d16caa98d80 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1031,10 +1031,10 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
+ __u32 removed;
+ int destroy_mark;
+
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ fsn_mark = fsnotify_find_mark(connp, group);
+ if (!fsn_mark) {
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ return -ENOENT;
+ }
+
+@@ -1044,7 +1044,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
+ fsnotify_recalc_mask(fsn_mark->connector);
+ if (destroy_mark)
+ fsnotify_detach_mark(fsn_mark);
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ if (destroy_mark)
+ fsnotify_free_mark(fsn_mark);
+
+@@ -1192,13 +1192,13 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ bool recalc;
+ int ret = 0;
+
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ fsn_mark = fsnotify_find_mark(connp, group);
+ if (!fsn_mark) {
+ fsn_mark = fanotify_add_new_mark(group, connp, obj_type,
+ fan_flags, fsid);
+ if (IS_ERR(fsn_mark)) {
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ return PTR_ERR(fsn_mark);
+ }
+ }
+@@ -1227,7 +1227,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ fsnotify_recalc_mask(fsn_mark->connector);
+
+ out:
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+
+ fsnotify_put_mark(fsn_mark);
+ return ret;
+@@ -1381,7 +1381,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
+
+ /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
+ group = fsnotify_alloc_group(&fanotify_fsnotify_ops,
+- FSNOTIFY_GROUP_USER);
++ FSNOTIFY_GROUP_USER | FSNOTIFY_GROUP_NOFS);
+ if (IS_ERR(group)) {
+ return PTR_ERR(group);
+ }
+--
+2.43.0
+
--- /dev/null
+From d081712cdf4b7a4274fb883bac202ddb596238c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:32 +0200
+Subject: fanotify: use helpers to parcel fanotify_info buffer
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 1a9515ac9e55e68d733bab81bd408463ab1e25b1 ]
+
+fanotify_info buffer is parceled into variable sized records, so the
+records must be written in order: dir_fh, file_fh, name.
+
+Use helpers to assert that order and make fanotify_alloc_name_event()
+a bit more generic to allow empty dir_fh record and to allow expanding
+to more records (i.e. name2) soon.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-7-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 35 +++++++++++++++++++----------------
+ fs/notify/fanotify/fanotify.h | 20 ++++++++++++++++++++
+ 2 files changed, 39 insertions(+), 16 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index ffad224be0149..2b13c79cebc62 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -576,7 +576,7 @@ static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
+ return &ffe->fae;
+ }
+
+-static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
++static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir,
+ __kernel_fsid_t *fsid,
+ const struct qstr *name,
+ struct inode *child,
+@@ -586,15 +586,17 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
+ struct fanotify_name_event *fne;
+ struct fanotify_info *info;
+ struct fanotify_fh *dfh, *ffh;
+- unsigned int dir_fh_len = fanotify_encode_fh_len(id);
++ unsigned int dir_fh_len = fanotify_encode_fh_len(dir);
+ unsigned int child_fh_len = fanotify_encode_fh_len(child);
+- unsigned int size;
++ unsigned long name_len = name ? name->len : 0;
++ unsigned int len, size;
+
+- size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len;
++ /* Reserve terminating null byte even for empty name */
++ size = sizeof(*fne) + name_len + 1;
++ if (dir_fh_len)
++ size += FANOTIFY_FH_HDR_LEN + dir_fh_len;
+ if (child_fh_len)
+ size += FANOTIFY_FH_HDR_LEN + child_fh_len;
+- if (name)
+- size += name->len + 1;
+ fne = kmalloc(size, gfp);
+ if (!fne)
+ return NULL;
+@@ -604,22 +606,23 @@ static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
+ *hash ^= fanotify_hash_fsid(fsid);
+ info = &fne->info;
+ fanotify_info_init(info);
+- dfh = fanotify_info_dir_fh(info);
+- info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, hash, 0);
++ if (dir_fh_len) {
++ dfh = fanotify_info_dir_fh(info);
++ len = fanotify_encode_fh(dfh, dir, dir_fh_len, hash, 0);
++ fanotify_info_set_dir_fh(info, len);
++ }
+ if (child_fh_len) {
+ ffh = fanotify_info_file_fh(info);
+- info->file_fh_totlen = fanotify_encode_fh(ffh, child,
+- child_fh_len, hash, 0);
++ len = fanotify_encode_fh(ffh, child, child_fh_len, hash, 0);
++ fanotify_info_set_file_fh(info, len);
+ }
+- if (name) {
+- long salt = name->len;
+-
++ if (name_len) {
+ fanotify_info_copy_name(info, name);
+- *hash ^= full_name_hash((void *)salt, name->name, name->len);
++ *hash ^= full_name_hash((void *)name_len, name->name, name_len);
+ }
+
+- pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
+- __func__, id->i_ino, size, dir_fh_len, child_fh_len,
++ pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
++ __func__, size, dir_fh_len, child_fh_len,
+ info->name_len, info->name_len, fanotify_info_name(info));
+
+ return &fne->fae;
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index dd23ba659e76b..7ac6f9f1e4148 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -138,6 +138,26 @@ static inline void fanotify_info_init(struct fanotify_info *info)
+ info->name_len = 0;
+ }
+
++/* These set/copy helpers MUST be called by order */
++static inline void fanotify_info_set_dir_fh(struct fanotify_info *info,
++ unsigned int totlen)
++{
++ if (WARN_ON_ONCE(info->file_fh_totlen > 0) ||
++ WARN_ON_ONCE(info->name_len > 0))
++ return;
++
++ info->dir_fh_totlen = totlen;
++}
++
++static inline void fanotify_info_set_file_fh(struct fanotify_info *info,
++ unsigned int totlen)
++{
++ if (WARN_ON_ONCE(info->name_len > 0))
++ return;
++
++ info->file_fh_totlen = totlen;
++}
++
+ static inline void fanotify_info_copy_name(struct fanotify_info *info,
+ const struct qstr *name)
+ {
+--
+2.43.0
+
--- /dev/null
+From bd1fb3102eaf07d003ea368e36d50b09c665e302 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:31 +0200
+Subject: fanotify: use macros to get the offset to fanotify_info buffer
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 2d9374f095136206a02eb0b6cd9ef94632c1e9f7 ]
+
+The fanotify_info buffer contains up to two file handles and a name.
+Use macros to simplify the code that access the different items within
+the buffer.
+
+Add assertions to verify that stored fh len and name len do not overflow
+the u8 stored value in fanotify_info header.
+
+Remove the unused fanotify_info_len() helper.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-6-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 2 +-
+ fs/notify/fanotify/fanotify.h | 41 +++++++++++++++++++++++++----------
+ 2 files changed, 31 insertions(+), 12 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 85e542b164c8c..ffad224be0149 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -411,7 +411,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
+ * be zero in that case if encoding fh len failed.
+ */
+ err = -ENOENT;
+- if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4))
++ if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ)
+ goto out_err;
+
+ /* No external buffer in a variable size allocated fh */
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index d25f500bf7e79..dd23ba659e76b 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -49,6 +49,22 @@ struct fanotify_info {
+ * (optional) file_fh starts at buf[dir_fh_totlen]
+ * name starts at buf[dir_fh_totlen + file_fh_totlen]
+ */
++#define FANOTIFY_DIR_FH_SIZE(info) ((info)->dir_fh_totlen)
++#define FANOTIFY_FILE_FH_SIZE(info) ((info)->file_fh_totlen)
++#define FANOTIFY_NAME_SIZE(info) ((info)->name_len + 1)
++
++#define FANOTIFY_DIR_FH_OFFSET(info) 0
++#define FANOTIFY_FILE_FH_OFFSET(info) \
++ (FANOTIFY_DIR_FH_OFFSET(info) + FANOTIFY_DIR_FH_SIZE(info))
++#define FANOTIFY_NAME_OFFSET(info) \
++ (FANOTIFY_FILE_FH_OFFSET(info) + FANOTIFY_FILE_FH_SIZE(info))
++
++#define FANOTIFY_DIR_FH_BUF(info) \
++ ((info)->buf + FANOTIFY_DIR_FH_OFFSET(info))
++#define FANOTIFY_FILE_FH_BUF(info) \
++ ((info)->buf + FANOTIFY_FILE_FH_OFFSET(info))
++#define FANOTIFY_NAME_BUF(info) \
++ ((info)->buf + FANOTIFY_NAME_OFFSET(info))
+ } __aligned(4);
+
+ static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh)
+@@ -87,7 +103,7 @@ static inline struct fanotify_fh *fanotify_info_dir_fh(struct fanotify_info *inf
+ {
+ BUILD_BUG_ON(offsetof(struct fanotify_info, buf) % 4);
+
+- return (struct fanotify_fh *)info->buf;
++ return (struct fanotify_fh *)FANOTIFY_DIR_FH_BUF(info);
+ }
+
+ static inline int fanotify_info_file_fh_len(struct fanotify_info *info)
+@@ -101,32 +117,35 @@ static inline int fanotify_info_file_fh_len(struct fanotify_info *info)
+
+ static inline struct fanotify_fh *fanotify_info_file_fh(struct fanotify_info *info)
+ {
+- return (struct fanotify_fh *)(info->buf + info->dir_fh_totlen);
++ return (struct fanotify_fh *)FANOTIFY_FILE_FH_BUF(info);
+ }
+
+-static inline const char *fanotify_info_name(struct fanotify_info *info)
++static inline char *fanotify_info_name(struct fanotify_info *info)
+ {
+- return info->buf + info->dir_fh_totlen + info->file_fh_totlen;
++ if (!info->name_len)
++ return NULL;
++
++ return FANOTIFY_NAME_BUF(info);
+ }
+
+ static inline void fanotify_info_init(struct fanotify_info *info)
+ {
++ BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN + MAX_HANDLE_SZ > U8_MAX);
++ BUILD_BUG_ON(NAME_MAX > U8_MAX);
++
+ info->dir_fh_totlen = 0;
+ info->file_fh_totlen = 0;
+ info->name_len = 0;
+ }
+
+-static inline unsigned int fanotify_info_len(struct fanotify_info *info)
+-{
+- return info->dir_fh_totlen + info->file_fh_totlen + info->name_len;
+-}
+-
+ static inline void fanotify_info_copy_name(struct fanotify_info *info,
+ const struct qstr *name)
+ {
++ if (WARN_ON_ONCE(name->len > NAME_MAX))
++ return;
++
+ info->name_len = name->len;
+- strcpy(info->buf + info->dir_fh_totlen + info->file_fh_totlen,
+- name->name);
++ strcpy(fanotify_info_name(info), name->name);
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 9d94dc415481cee195dc8104deba287def8ff436 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:40 -0300
+Subject: fanotify: WARN_ON against too large file handles
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 572c28f27a269f88e2d8d7b6b1507f114d637337 ]
+
+struct fanotify_error_event, at least, is preallocated and isn't able to
+to handle arbitrarily large file handles. Future-proof the code by
+complaining loudly if a handle larger than MAX_HANDLE_SZ is ever found.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-26-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index cedcb15468043..45df610debbe4 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -360,13 +360,23 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ static int fanotify_encode_fh_len(struct inode *inode)
+ {
+ int dwords = 0;
++ int fh_len;
+
+ if (!inode)
+ return 0;
+
+ exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
++ fh_len = dwords << 2;
+
+- return dwords << 2;
++ /*
++ * struct fanotify_error_event might be preallocated and is
++ * limited to MAX_HANDLE_SZ. This should never happen, but
++ * safeguard by forcing an invalid file handle.
++ */
++ if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ))
++ return 0;
++
++ return fh_len;
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 7a136c09faf4c09f03dbd73124568a9185f19cd4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:37 +0200
+Subject: fanotify: wire up FAN_RENAME event
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 8cc3b1ccd930fe6971e1527f0c4f1bdc8cb56026 ]
+
+FAN_RENAME is the successor of FAN_MOVED_FROM and FAN_MOVED_TO
+and can be used to get the old and new parent+name information in
+a single event.
+
+FAN_MOVED_FROM and FAN_MOVED_TO are still supported for backward
+compatibility, but it makes little sense to use them together with
+FAN_RENAME in the same group.
+
+FAN_RENAME uses special info type records to report the old and
+new parent+name, so reporting only old and new parent id is less
+useful and was not implemented.
+Therefore, FAN_REANAME requires a group with flag FAN_REPORT_NAME.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-12-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 2 +-
+ fs/notify/fanotify/fanotify_user.c | 8 ++++++++
+ include/linux/fanotify.h | 3 ++-
+ 3 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 0da305b6f3e2f..985e995d2a398 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -930,7 +930,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
+ BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
+
+- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
++ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
+
+ mask = fanotify_group_event_mask(group, iter_info, &match_mask,
+ mask, data, data_type, dir);
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index b3ac2d877e1ee..ce84eb8443b10 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1604,6 +1604,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
+ (!fid_mode || mark_type == FAN_MARK_MOUNT))
+ goto fput_and_out;
+
++ /*
++ * FAN_RENAME uses special info type records to report the old and
++ * new parent+name. Reporting only old and new parent id is less
++ * useful and was not implemented.
++ */
++ if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
++ goto fput_and_out;
++
+ if (flags & FAN_MARK_FLUSH) {
+ ret = 0;
+ if (mark_type == FAN_MARK_MOUNT)
+diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
+index 376e050e6f384..3afdf339d53c9 100644
+--- a/include/linux/fanotify.h
++++ b/include/linux/fanotify.h
+@@ -82,7 +82,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
+ * Directory entry modification events - reported only to directory
+ * where entry is modified and not to a watching parent.
+ */
+-#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE)
++#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \
++ FAN_RENAME)
+
+ /* Events that can be reported with event->fd */
+ #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
+--
+2.43.0
+
--- /dev/null
+From 648e43feeafef00883878935379431f871dbf091 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:37 -0300
+Subject: fanotify: Wrap object_fh inline space in a creator macro
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 2c5069433a3adc01ff9c5673567961bb7f138074 ]
+
+fanotify_error_event would duplicate this sequence of declarations that
+already exist elsewhere with a slight different size. Create a helper
+macro to avoid code duplication.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-23-krisman@collabora.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.h | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 2b032b79d5b06..3510d06654ed0 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -171,12 +171,18 @@ static inline void fanotify_init_event(struct fanotify_event *event,
+ event->pid = NULL;
+ }
+
++#define FANOTIFY_INLINE_FH(name, size) \
++struct { \
++ struct fanotify_fh (name); \
++ /* Space for object_fh.buf[] - access with fanotify_fh_buf() */ \
++ unsigned char _inline_fh_buf[(size)]; \
++}
++
+ struct fanotify_fid_event {
+ struct fanotify_event fae;
+ __kernel_fsid_t fsid;
+- struct fanotify_fh object_fh;
+- /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
+- unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
++
++ FANOTIFY_INLINE_FH(object_fh, FANOTIFY_INLINE_FH_LEN);
+ };
+
+ static inline struct fanotify_fid_event *
+--
+2.43.0
+
--- /dev/null
+From 11111ab9e11b05b76ef1d82199a846d59c06b2f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 09:02:30 -0500
+Subject: filelock: add a new locks_inode_context accessor function
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 401a8b8fd5acd51582b15238d72a8d0edd580e9f ]
+
+There are a number of places in the kernel that are accessing the
+inode->i_flctx field without smp_load_acquire. This is required to
+ensure that the caller doesn't see a partially-initialized structure.
+
+Add a new accessor function for it to make this clear and convert all of
+the relevant accesses in locks.c to use it. Also, convert
+locks_free_lock_context to use the helper as well instead of just doing
+a "bare" assignment.
+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Stable-dep-of: 77c67530e1f9 ("nfsd: use locks_inode_context helper")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/locks.c | 24 ++++++++++++------------
+ include/linux/fs.h | 14 ++++++++++++++
+ 2 files changed, 26 insertions(+), 12 deletions(-)
+
+diff --git a/fs/locks.c b/fs/locks.c
+index 317c2ec17b943..77781b71bcaab 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -251,7 +251,7 @@ locks_get_lock_context(struct inode *inode, int type)
+ struct file_lock_context *ctx;
+
+ /* paired with cmpxchg() below */
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (likely(ctx) || type == F_UNLCK)
+ goto out;
+
+@@ -270,7 +270,7 @@ locks_get_lock_context(struct inode *inode, int type)
+ */
+ if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
+ kmem_cache_free(flctx_cache, ctx);
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ }
+ out:
+ trace_locks_get_lock_context(inode, type, ctx);
+@@ -323,7 +323,7 @@ locks_check_ctx_file_list(struct file *filp, struct list_head *list,
+ void
+ locks_free_lock_context(struct inode *inode)
+ {
+- struct file_lock_context *ctx = inode->i_flctx;
++ struct file_lock_context *ctx = locks_inode_context(inode);
+
+ if (unlikely(ctx)) {
+ locks_check_ctx_lists(inode);
+@@ -985,7 +985,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
+ void *owner;
+ void (*func)(void);
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx || list_empty_careful(&ctx->flc_posix)) {
+ fl->fl_type = F_UNLCK;
+ return;
+@@ -1577,7 +1577,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
+ new_fl->fl_flags = type;
+
+ /* typically we will check that ctx is non-NULL before calling */
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx) {
+ WARN_ON_ONCE(1);
+ goto free_lock;
+@@ -1682,7 +1682,7 @@ void lease_get_mtime(struct inode *inode, struct timespec64 *time)
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (ctx && !list_empty_careful(&ctx->flc_lease)) {
+ spin_lock(&ctx->flc_lock);
+ fl = list_first_entry_or_null(&ctx->flc_lease,
+@@ -1728,7 +1728,7 @@ int fcntl_getlease(struct file *filp)
+ int type = F_UNLCK;
+ LIST_HEAD(dispose);
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (ctx && !list_empty_careful(&ctx->flc_lease)) {
+ percpu_down_read(&file_rwsem);
+ spin_lock(&ctx->flc_lock);
+@@ -1917,7 +1917,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
+ struct file_lock_context *ctx;
+ LIST_HEAD(dispose);
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx) {
+ trace_generic_delete_lease(inode, NULL);
+ return error;
+@@ -2651,7 +2651,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
+ * posix_lock_file(). Another process could be setting a lock on this
+ * file at the same time, but we wouldn't remove that lock anyway.
+ */
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx || list_empty(&ctx->flc_posix))
+ return;
+
+@@ -2724,7 +2724,7 @@ void locks_remove_file(struct file *filp)
+ {
+ struct file_lock_context *ctx;
+
+- ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
++ ctx = locks_inode_context(locks_inode(filp));
+ if (!ctx)
+ return;
+
+@@ -2771,7 +2771,7 @@ bool vfs_inode_has_locks(struct inode *inode)
+ struct file_lock_context *ctx;
+ bool ret;
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return false;
+
+@@ -2962,7 +2962,7 @@ void show_fd_locks(struct seq_file *f,
+ struct file_lock_context *ctx;
+ int id = 0;
+
+- ctx = smp_load_acquire(&inode->i_flctx);
++ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return;
+
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index ef5a04d626953..61e86502fe65e 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1217,6 +1217,13 @@ extern void show_fd_locks(struct seq_file *f,
+ struct file *filp, struct files_struct *files);
+ extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
+ fl_owner_t owner);
++
++static inline struct file_lock_context *
++locks_inode_context(const struct inode *inode)
++{
++ return smp_load_acquire(&inode->i_flctx);
++}
++
+ #else /* !CONFIG_FILE_LOCKING */
+ static inline int fcntl_getlk(struct file *file, unsigned int cmd,
+ struct flock __user *user)
+@@ -1362,6 +1369,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
+ {
+ return false;
+ }
++
++static inline struct file_lock_context *
++locks_inode_context(const struct inode *inode)
++{
++ return NULL;
++}
++
+ #endif /* !CONFIG_FILE_LOCKING */
+
+ static inline struct inode *file_inode(const struct file *f)
+--
+2.43.0
+
--- /dev/null
+From 712a7d5965f6392383d4da0c9915a729585f4397 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 May 2022 15:59:59 +0100
+Subject: fs: inotify: Fix typo in inotify comment
+
+From: Oliver Ford <ojford@gmail.com>
+
+Correct spelling in comment.
+
+Signed-off-by: Oliver Ford <ojford@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220518145959.41-1-ojford@gmail.com
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/inotify/inotify_user.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
+index 266b1302290ba..131938986e54f 100644
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -128,7 +128,7 @@ static inline u32 inotify_mask_to_arg(__u32 mask)
+ IN_Q_OVERFLOW);
+ }
+
+-/* intofiy userspace file descriptor functions */
++/* inotify userspace file descriptor functions */
+ static __poll_t inotify_poll(struct file *file, poll_table *wait)
+ {
+ struct fsnotify_group *group = file->private_data;
+--
+2.43.0
+
--- /dev/null
+From b801e59eed338724f525a9f434657f3eac449078 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 14:19:25 -0700
+Subject: fs/lock: add 2 callbacks to lock_manager_operations to resolve
+ conflict
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 2443da2259e97688f93d64d17ab69b15f466078a ]
+
+Add 2 new callbacks, lm_lock_expirable and lm_expire_lock, to
+lock_manager_operations to allow the lock manager to take appropriate
+action to resolve the lock conflict if possible.
+
+A new field, lm_mod_owner, is also added to lock_manager_operations.
+The lm_mod_owner is used by the fs/lock code to make sure the lock
+manager module such as nfsd, is not freed while lock conflict is being
+resolved.
+
+lm_lock_expirable checks and returns true to indicate that the lock
+conflict can be resolved else return false. This callback must be
+called with the flc_lock held so it can not block.
+
+lm_expire_lock is called to resolve the lock conflict if the returned
+value from lm_lock_expirable is true. This callback is called without
+the flc_lock held since it's allowed to block. Upon returning from
+this callback, the lock conflict should be resolved and the caller is
+expected to restart the conflict check from the beginnning of the list.
+
+Lock manager, such as NFSv4 courteous server, uses this callback to
+resolve conflict by destroying lock owner, or the NFSv4 courtesy client
+(client that has expired but allowed to maintains its states) that owns
+the lock.
+
+Reviewed-by: J. Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ Documentation/filesystems/locking.rst | 4 ++++
+ fs/locks.c | 33 ++++++++++++++++++++++++---
+ include/linux/fs.h | 3 +++
+ 3 files changed, 37 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
+index e2a27bb5dc411..dabf4f7c755ed 100644
+--- a/Documentation/filesystems/locking.rst
++++ b/Documentation/filesystems/locking.rst
+@@ -442,6 +442,8 @@ prototypes::
+ void (*lm_break)(struct file_lock *); /* break_lease callback */
+ int (*lm_change)(struct file_lock **, int);
+ bool (*lm_breaker_owns_lease)(struct file_lock *);
++ bool (*lm_lock_expirable)(struct file_lock *);
++ void (*lm_expire_lock)(void);
+
+ locking rules:
+
+@@ -453,6 +455,8 @@ lm_grant: no no no
+ lm_break: yes no no
+ lm_change yes no no
+ lm_breaker_owns_lease: yes no no
++lm_lock_expirable yes no no
++lm_expire_lock no no yes
+ ====================== ============= ================= =========
+
+ buffer_head
+diff --git a/fs/locks.c b/fs/locks.c
+index f8c4844ebcce4..317c2ec17b943 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -982,6 +982,8 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
+ struct file_lock *cfl;
+ struct file_lock_context *ctx;
+ struct inode *inode = locks_inode(filp);
++ void *owner;
++ void (*func)(void);
+
+ ctx = smp_load_acquire(&inode->i_flctx);
+ if (!ctx || list_empty_careful(&ctx->flc_posix)) {
+@@ -989,12 +991,23 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
+ return;
+ }
+
++retry:
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
+- if (posix_locks_conflict(fl, cfl)) {
+- locks_copy_conflock(fl, cfl);
+- goto out;
++ if (!posix_locks_conflict(fl, cfl))
++ continue;
++ if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
++ && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
++ owner = cfl->fl_lmops->lm_mod_owner;
++ func = cfl->fl_lmops->lm_expire_lock;
++ __module_get(owner);
++ spin_unlock(&ctx->flc_lock);
++ (*func)();
++ module_put(owner);
++ goto retry;
+ }
++ locks_copy_conflock(fl, cfl);
++ goto out;
+ }
+ fl->fl_type = F_UNLCK;
+ out:
+@@ -1168,6 +1181,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ int error;
+ bool added = false;
+ LIST_HEAD(dispose);
++ void *owner;
++ void (*func)(void);
+
+ ctx = locks_get_lock_context(inode, request->fl_type);
+ if (!ctx)
+@@ -1186,6 +1201,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ new_fl2 = locks_alloc_lock();
+ }
+
++retry:
+ percpu_down_read(&file_rwsem);
+ spin_lock(&ctx->flc_lock);
+ /*
+@@ -1197,6 +1213,17 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
+ list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
+ if (!posix_locks_conflict(request, fl))
+ continue;
++ if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
++ && (*fl->fl_lmops->lm_lock_expirable)(fl)) {
++ owner = fl->fl_lmops->lm_mod_owner;
++ func = fl->fl_lmops->lm_expire_lock;
++ __module_get(owner);
++ spin_unlock(&ctx->flc_lock);
++ percpu_up_read(&file_rwsem);
++ (*func)();
++ module_put(owner);
++ goto retry;
++ }
+ if (conflock)
+ locks_copy_conflock(conflock, fl);
+ error = -EAGAIN;
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 9b7ce642d4f08..371d67c9221c5 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1066,6 +1066,7 @@ struct file_lock_operations {
+ };
+
+ struct lock_manager_operations {
++ void *lm_mod_owner;
+ fl_owner_t (*lm_get_owner)(fl_owner_t);
+ void (*lm_put_owner)(fl_owner_t);
+ void (*lm_notify)(struct file_lock *); /* unblock callback */
+@@ -1074,6 +1075,8 @@ struct lock_manager_operations {
+ int (*lm_change)(struct file_lock *, int, struct list_head *);
+ void (*lm_setup)(struct file_lock *, void **);
+ bool (*lm_breaker_owns_lease)(struct file_lock *);
++ bool (*lm_lock_expirable)(struct file_lock *cfl);
++ void (*lm_expire_lock)(void);
+ };
+
+ struct lock_manager {
+--
+2.43.0
+
--- /dev/null
+From 7954ac641d61b6dcec3ab5287cf4e437b9a522a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 14:19:24 -0700
+Subject: fs/lock: add helper locks_owner_has_blockers to check for blockers
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 591502c5cb325b1c6ec59ab161927d606b918aa0 ]
+
+Add helper locks_owner_has_blockers to check if there is any blockers
+for a given lockowner.
+
+Reviewed-by: J. Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/locks.c | 28 ++++++++++++++++++++++++++++
+ include/linux/fs.h | 7 +++++++
+ 2 files changed, 35 insertions(+)
+
+diff --git a/fs/locks.c b/fs/locks.c
+index 4899a4666f24d..f8c4844ebcce4 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -376,6 +376,34 @@ void locks_release_private(struct file_lock *fl)
+ }
+ EXPORT_SYMBOL_GPL(locks_release_private);
+
++/**
++ * locks_owner_has_blockers - Check for blocking lock requests
++ * @flctx: file lock context
++ * @owner: lock owner
++ *
++ * Return values:
++ * %true: @owner has at least one blocker
++ * %false: @owner has no blockers
++ */
++bool locks_owner_has_blockers(struct file_lock_context *flctx,
++ fl_owner_t owner)
++{
++ struct file_lock *fl;
++
++ spin_lock(&flctx->flc_lock);
++ list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
++ if (fl->fl_owner != owner)
++ continue;
++ if (!list_empty(&fl->fl_blocked_requests)) {
++ spin_unlock(&flctx->flc_lock);
++ return true;
++ }
++ }
++ spin_unlock(&flctx->flc_lock);
++ return false;
++}
++EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
++
+ /* Free a lock which is not in use. */
+ void locks_free_lock(struct file_lock *fl)
+ {
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index f32723d937fb5..9b7ce642d4f08 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1212,6 +1212,8 @@ extern void lease_unregister_notifier(struct notifier_block *);
+ struct files_struct;
+ extern void show_fd_locks(struct seq_file *f,
+ struct file *filp, struct files_struct *files);
++extern bool locks_owner_has_blockers(struct file_lock_context *flctx,
++ fl_owner_t owner);
+ #else /* !CONFIG_FILE_LOCKING */
+ static inline int fcntl_getlk(struct file *file, unsigned int cmd,
+ struct flock __user *user)
+@@ -1352,6 +1354,11 @@ static inline int lease_modify(struct file_lock *fl, int arg,
+ struct files_struct;
+ static inline void show_fd_locks(struct seq_file *f,
+ struct file *filp, struct files_struct *files) {}
++static inline bool locks_owner_has_blockers(struct file_lock_context *flctx,
++ fl_owner_t owner)
++{
++ return false;
++}
+ #endif /* !CONFIG_FILE_LOCKING */
+
+ static inline struct inode *file_inode(const struct file *f)
+--
+2.43.0
+
--- /dev/null
+From 6eddf30aa29d1c70cea73bffc8876b5c88af56d7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Feb 2022 10:12:52 -0800
+Subject: fs/lock: documentation cleanup. Replace inode->i_lock with flc_lock.
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 9d6647762b9c6b555bc83d97d7c93be6057a990f ]
+
+Update lock usage of lock_manager_operations' functions to reflect
+the changes in commit 6109c85037e5 ("locks: add a dedicated spinlock
+to protect i_flctx lists").
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ Documentation/filesystems/locking.rst | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
+index 5833cea4a16b2..e2a27bb5dc411 100644
+--- a/Documentation/filesystems/locking.rst
++++ b/Documentation/filesystems/locking.rst
+@@ -446,13 +446,13 @@ prototypes::
+ locking rules:
+
+ ====================== ============= ================= =========
+-ops inode->i_lock blocked_lock_lock may block
++ops flc_lock blocked_lock_lock may block
+ ====================== ============= ================= =========
+-lm_notify: yes yes no
++lm_notify: no yes no
+ lm_grant: no no no
+ lm_break: yes no no
+ lm_change yes no no
+-lm_breaker_owns_lease: no no no
++lm_breaker_owns_lease: yes no no
+ ====================== ============= ================= =========
+
+ buffer_head
+--
+2.43.0
+
--- /dev/null
+From 83d4969343fcfbded337f94e4d2f86c9c05539a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Aug 2022 12:57:38 -0400
+Subject: fs/notify: constify path
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit d5bf88895f24686641c39420ee6df716dc1d95d8 ]
+
+Reviewed-by: Matthew Bobrowski <repnop@google.com>
+Reviewed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 2 +-
+ fs/notify/fanotify/fanotify.h | 2 +-
+ fs/notify/fanotify/fanotify_user.c | 6 +++---
+ 3 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index cd7d09a569fff..a2a15bc4df280 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -18,7 +18,7 @@
+
+ #include "fanotify.h"
+
+-static bool fanotify_path_equal(struct path *p1, struct path *p2)
++static bool fanotify_path_equal(const struct path *p1, const struct path *p2)
+ {
+ return p1->mnt == p2->mnt && p1->dentry == p2->dentry;
+ }
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 1d9f11255c64f..bf6d4d38afa04 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -458,7 +458,7 @@ static inline bool fanotify_event_has_path(struct fanotify_event *event)
+ event->type == FANOTIFY_EVENT_TYPE_PATH_PERM;
+ }
+
+-static inline struct path *fanotify_event_path(struct fanotify_event *event)
++static inline const struct path *fanotify_event_path(struct fanotify_event *event)
+ {
+ if (event->type == FANOTIFY_EVENT_TYPE_PATH)
+ return &FANOTIFY_PE(event)->path;
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 879cd65b15187..d93418f213863 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -245,7 +245,7 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
+ return event;
+ }
+
+-static int create_fd(struct fsnotify_group *group, struct path *path,
++static int create_fd(struct fsnotify_group *group, const struct path *path,
+ struct file **file)
+ {
+ int client_fd;
+@@ -615,7 +615,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
+ char __user *buf, size_t count)
+ {
+ struct fanotify_event_metadata metadata;
+- struct path *path = fanotify_event_path(event);
++ const struct path *path = fanotify_event_path(event);
+ struct fanotify_info *info = fanotify_event_info(event);
+ unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
+ unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
+@@ -1549,7 +1549,7 @@ static int fanotify_test_fid(struct dentry *dentry)
+ }
+
+ static int fanotify_events_supported(struct fsnotify_group *group,
+- struct path *path, __u64 mask,
++ const struct path *path, __u64 mask,
+ unsigned int flags)
+ {
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+--
+2.43.0
+
--- /dev/null
+From bdf8544919b7c08cdb00cd7a20996e5351f336e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:23 -0300
+Subject: fsnotify: Add helper to detect overflow_event
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 808967a0a4d2f4ce6a2005c5692fffbecaf018c1 ]
+
+Similarly to fanotify_is_perm_event and friends, provide a helper
+predicate to say whether a mask is of an overflow event.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-9-krisman@collabora.com
+Suggested-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.h | 3 ++-
+ include/linux/fsnotify_backend.h | 5 +++++
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
+index 4a5e555dc3d25..c42cf8fd7d798 100644
+--- a/fs/notify/fanotify/fanotify.h
++++ b/fs/notify/fanotify/fanotify.h
+@@ -315,7 +315,8 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
+ */
+ static inline bool fanotify_is_hashed_event(u32 mask)
+ {
+- return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
++ return !(fanotify_is_perm_event(mask) ||
++ fsnotify_is_overflow_event(mask));
+ }
+
+ static inline unsigned int fanotify_event_hash_bucket(
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index a2db821e8a8f2..749bc85e1d1c4 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -510,6 +510,11 @@ static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
+ fsnotify_add_event(group, group->overflow_event, NULL, NULL);
+ }
+
++static inline bool fsnotify_is_overflow_event(u32 mask)
++{
++ return mask & FS_Q_OVERFLOW;
++}
++
+ static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
+ {
+ assert_spin_locked(&group->notification_lock);
+--
+2.43.0
+
--- /dev/null
+From dd30349fa145335469df91cb654247e78e1b2eb4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:24 -0300
+Subject: fsnotify: Add wrapper around fsnotify_add_event
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 1ad03c3a326a86e259389592117252c851873395 ]
+
+fsnotify_add_event is growing in number of parameters, which in most
+case are just passed a NULL pointer. So, split out a new
+fsnotify_insert_event function to clean things up for users who don't
+need an insert hook.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-10-krisman@collabora.com
+Suggested-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 4 ++--
+ fs/notify/inotify/inotify_fsnotify.c | 2 +-
+ fs/notify/notification.c | 12 ++++++------
+ include/linux/fsnotify_backend.h | 23 ++++++++++++++++-------
+ 4 files changed, 25 insertions(+), 16 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 310246f8d3f19..f82e20228999c 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -781,8 +781,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ }
+
+ fsn_event = &event->fse;
+- ret = fsnotify_add_event(group, fsn_event, fanotify_merge,
+- fanotify_insert_event);
++ ret = fsnotify_insert_event(group, fsn_event, fanotify_merge,
++ fanotify_insert_event);
+ if (ret) {
+ /* Permission events shouldn't be merged */
+ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
+diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
+index b0530f75b274a..be3eb1cebdcce 100644
+--- a/fs/notify/inotify/inotify_fsnotify.c
++++ b/fs/notify/inotify/inotify_fsnotify.c
+@@ -123,7 +123,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+ if (len)
+ strcpy(event->name, name->name);
+
+- ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL);
++ ret = fsnotify_add_event(group, fsn_event, inotify_merge);
+ if (ret) {
+ /* Our event wasn't used in the end. Free it. */
+ fsnotify_destroy_event(group, fsn_event);
+diff --git a/fs/notify/notification.c b/fs/notify/notification.c
+index 32f45543b9c64..44bb10f507153 100644
+--- a/fs/notify/notification.c
++++ b/fs/notify/notification.c
+@@ -78,12 +78,12 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
+ */
+-int fsnotify_add_event(struct fsnotify_group *group,
+- struct fsnotify_event *event,
+- int (*merge)(struct fsnotify_group *,
+- struct fsnotify_event *),
+- void (*insert)(struct fsnotify_group *,
+- struct fsnotify_event *))
++int fsnotify_insert_event(struct fsnotify_group *group,
++ struct fsnotify_event *event,
++ int (*merge)(struct fsnotify_group *,
++ struct fsnotify_event *),
++ void (*insert)(struct fsnotify_group *,
++ struct fsnotify_event *))
+ {
+ int ret = 0;
+ struct list_head *list = &group->notification_list;
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 749bc85e1d1c4..b323d0c4b9671 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -498,16 +498,25 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
+ extern void fsnotify_destroy_event(struct fsnotify_group *group,
+ struct fsnotify_event *event);
+ /* attach the event to the group notification queue */
+-extern int fsnotify_add_event(struct fsnotify_group *group,
+- struct fsnotify_event *event,
+- int (*merge)(struct fsnotify_group *,
+- struct fsnotify_event *),
+- void (*insert)(struct fsnotify_group *,
+- struct fsnotify_event *));
++extern int fsnotify_insert_event(struct fsnotify_group *group,
++ struct fsnotify_event *event,
++ int (*merge)(struct fsnotify_group *,
++ struct fsnotify_event *),
++ void (*insert)(struct fsnotify_group *,
++ struct fsnotify_event *));
++
++static inline int fsnotify_add_event(struct fsnotify_group *group,
++ struct fsnotify_event *event,
++ int (*merge)(struct fsnotify_group *,
++ struct fsnotify_event *))
++{
++ return fsnotify_insert_event(group, event, merge, NULL);
++}
++
+ /* Queue overflow event to a notification group */
+ static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
+ {
+- fsnotify_add_event(group, group->overflow_event, NULL, NULL);
++ fsnotify_add_event(group, group->overflow_event, NULL);
+ }
+
+ static inline bool fsnotify_is_overflow_event(u32 mask)
+--
+2.43.0
+
--- /dev/null
+From e65483a7cd17eb4fe94378d6d71a12cc88c7f7b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:22 +0300
+Subject: fsnotify: allow adding an inode mark without pinning inode
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit c3638b5b13740fa31762d414bbce8b7a694e582a ]
+
+fsnotify_add_mark() and variants implicitly take a reference on inode
+when attaching a mark to an inode.
+
+Make that behavior opt-out with the mark flag FSNOTIFY_MARK_FLAG_NO_IREF.
+
+Instead of taking the inode reference when attaching connector to inode
+and dropping the inode reference when detaching connector from inode,
+take the inode reference on attach of the first mark that wants to hold
+an inode reference and drop the inode reference on detach of the last
+mark that wants to hold an inode reference.
+
+Backends can "upgrade" an existing mark to take an inode reference, but
+cannot "downgrade" a mark with inode reference to release the refernce.
+
+This leaves the choice to the backend whether or not to pin the inode
+when adding an inode mark.
+
+This is intended to be used when adding a mark with ignored mask that is
+used for optimization in cases where group can afford getting unneeded
+events and reinstate the mark with ignored mask when inode is accessed
+again after being evicted.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-12-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/mark.c | 76 +++++++++++++++++++++++---------
+ include/linux/fsnotify_backend.h | 2 +
+ 2 files changed, 58 insertions(+), 20 deletions(-)
+
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index 982ca2f20ff5d..c74ef947447d6 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -116,20 +116,64 @@ __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
+ return *fsnotify_conn_mask_p(conn);
+ }
+
+-static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
++static void fsnotify_get_inode_ref(struct inode *inode)
++{
++ ihold(inode);
++ atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
++}
++
++/*
++ * Grab or drop inode reference for the connector if needed.
++ *
++ * When it's time to drop the reference, we only clear the HAS_IREF flag and
++ * return the inode object. fsnotify_drop_object() will be resonsible for doing
++ * iput() outside of spinlocks. This happens when last mark that wanted iref is
++ * detached.
++ */
++static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn,
++ bool want_iref)
++{
++ bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF;
++ struct inode *inode = NULL;
++
++ if (conn->type != FSNOTIFY_OBJ_TYPE_INODE ||
++ want_iref == has_iref)
++ return NULL;
++
++ if (want_iref) {
++ /* Pin inode if any mark wants inode refcount held */
++ fsnotify_get_inode_ref(fsnotify_conn_inode(conn));
++ conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF;
++ } else {
++ /* Unpin inode after detach of last mark that wanted iref */
++ inode = fsnotify_conn_inode(conn);
++ conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF;
++ }
++
++ return inode;
++}
++
++static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
+ {
+ u32 new_mask = 0;
++ bool want_iref = false;
+ struct fsnotify_mark *mark;
+
+ assert_spin_locked(&conn->lock);
+ /* We can get detached connector here when inode is getting unlinked. */
+ if (!fsnotify_valid_obj_type(conn->type))
+- return;
++ return NULL;
+ hlist_for_each_entry(mark, &conn->list, obj_list) {
+- if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
+- new_mask |= fsnotify_calc_mask(mark);
++ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
++ continue;
++ new_mask |= fsnotify_calc_mask(mark);
++ if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
++ !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
++ want_iref = true;
+ }
+ *fsnotify_conn_mask_p(conn) = new_mask;
++
++ return fsnotify_update_iref(conn, want_iref);
+ }
+
+ /*
+@@ -169,12 +213,6 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
+ }
+ }
+
+-static void fsnotify_get_inode_ref(struct inode *inode)
+-{
+- ihold(inode);
+- atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
+-}
+-
+ static void fsnotify_put_inode_ref(struct inode *inode)
+ {
+ struct super_block *sb = inode->i_sb;
+@@ -213,6 +251,10 @@ static void *fsnotify_detach_connector_from_object(
+ if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
+ inode = fsnotify_conn_inode(conn);
+ inode->i_fsnotify_mask = 0;
++
++ /* Unpin inode when detaching from connector */
++ if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF))
++ inode = NULL;
+ } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+ fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
+ } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
+@@ -274,7 +316,8 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
+ objp = fsnotify_detach_connector_from_object(conn, &type);
+ free_conn = true;
+ } else {
+- __fsnotify_recalc_mask(conn);
++ objp = __fsnotify_recalc_mask(conn);
++ type = conn->type;
+ }
+ WRITE_ONCE(mark->connector, NULL);
+ spin_unlock(&conn->lock);
+@@ -497,7 +540,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+ unsigned int obj_type,
+ __kernel_fsid_t *fsid)
+ {
+- struct inode *inode = NULL;
+ struct fsnotify_mark_connector *conn;
+
+ conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
+@@ -505,6 +547,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+ return -ENOMEM;
+ spin_lock_init(&conn->lock);
+ INIT_HLIST_HEAD(&conn->list);
++ conn->flags = 0;
+ conn->type = obj_type;
+ conn->obj = connp;
+ /* Cache fsid of filesystem containing the object */
+@@ -515,10 +558,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+ conn->fsid.val[0] = conn->fsid.val[1] = 0;
+ conn->flags = 0;
+ }
+- if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
+- inode = fsnotify_conn_inode(conn);
+- fsnotify_get_inode_ref(inode);
+- }
+ fsnotify_get_sb_connectors(conn);
+
+ /*
+@@ -527,8 +566,6 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+ */
+ if (cmpxchg(connp, NULL, conn)) {
+ /* Someone else created list structure for us */
+- if (inode)
+- fsnotify_put_inode_ref(inode);
+ fsnotify_put_sb_connectors(conn);
+ kmem_cache_free(fsnotify_mark_connector_cachep, conn);
+ }
+@@ -690,8 +727,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ if (ret)
+ goto err;
+
+- if (mark->mask || mark->ignored_mask)
+- fsnotify_recalc_mask(mark->connector);
++ fsnotify_recalc_mask(mark->connector);
+
+ return ret;
+ err:
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index d62111e832440..9a1a9e78f69f5 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -456,6 +456,7 @@ struct fsnotify_mark_connector {
+ spinlock_t lock;
+ unsigned short type; /* Type of object [lock] */
+ #define FSNOTIFY_CONN_FLAG_HAS_FSID 0x01
++#define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02
+ unsigned short flags; /* flags [lock] */
+ __kernel_fsid_t fsid; /* fsid of filesystem containing object */
+ union {
+@@ -510,6 +511,7 @@ struct fsnotify_mark {
+ #define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020
+ /* fanotify mark flags */
+ #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
++#define FSNOTIFY_MARK_FLAG_NO_IREF 0x0200
+ unsigned int flags; /* flags [mark->lock] */
+ };
+
+--
+2.43.0
+
--- /dev/null
+From b491edf21b758b9074359cbf3128fab95525fe8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:18 -0300
+Subject: fsnotify: clarify contract for create event hooks
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit dabe729dddca550446e9cc118c96d1f91703345b ]
+
+Clarify argument names and contract for fsnotify_create() and
+fsnotify_mkdir() to reflect the anomaly of kernfs, which leaves dentries
+negavite after mkdir/create.
+
+Remove the WARN_ON(!inode) in audit code that were added by the Fixes
+commit under the wrong assumption that dentries cannot be negative after
+mkdir/create.
+
+Fixes: aa93bdc5500c ("fsnotify: use helpers to access data by data_type")
+Link: https://lore.kernel.org/linux-fsdevel/87mtp5yz0q.fsf@collabora.com/
+Link: https://lore.kernel.org/r/20211025192746.66445-4-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reported-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ include/linux/fsnotify.h | 22 ++++++++++++++++------
+ kernel/audit_fsnotify.c | 3 +--
+ kernel/audit_watch.c | 3 +--
+ 3 files changed, 18 insertions(+), 10 deletions(-)
+
+diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
+index 70e6b147a76ad..a327a95fa68f1 100644
+--- a/include/linux/fsnotify.h
++++ b/include/linux/fsnotify.h
+@@ -192,16 +192,22 @@ static inline void fsnotify_inoderemove(struct inode *inode)
+
+ /*
+ * fsnotify_create - 'name' was linked in
++ *
++ * Caller must make sure that dentry->d_name is stable.
++ * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
++ * ->d_inode later
+ */
+-static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
++static inline void fsnotify_create(struct inode *dir, struct dentry *dentry)
+ {
+- audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
++ audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);
+
+- fsnotify_dirent(inode, dentry, FS_CREATE);
++ fsnotify_dirent(dir, dentry, FS_CREATE);
+ }
+
+ /*
+ * fsnotify_link - new hardlink in 'inode' directory
++ *
++ * Caller must make sure that new_dentry->d_name is stable.
+ * Note: We have to pass also the linked inode ptr as some filesystems leave
+ * new_dentry->d_inode NULL and instantiate inode pointer later
+ */
+@@ -266,12 +272,16 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
+
+ /*
+ * fsnotify_mkdir - directory 'name' was created
++ *
++ * Caller must make sure that dentry->d_name is stable.
++ * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
++ * ->d_inode later
+ */
+-static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
++static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry)
+ {
+- audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
++ audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);
+
+- fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR);
++ fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR);
+ }
+
+ /*
+diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
+index c428312938e95..7a506b65e8630 100644
+--- a/kernel/audit_fsnotify.c
++++ b/kernel/audit_fsnotify.c
+@@ -161,8 +161,7 @@ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
+
+ audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
+
+- if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group) ||
+- WARN_ON_ONCE(!inode))
++ if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group))
+ return 0;
+
+ if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) {
+diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
+index edbeffee64b8e..fd7b30a2d9a4b 100644
+--- a/kernel/audit_watch.c
++++ b/kernel/audit_watch.c
+@@ -472,8 +472,7 @@ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
+
+ parent = container_of(inode_mark, struct audit_parent, mark);
+
+- if (WARN_ON_ONCE(inode_mark->group != audit_watch_group) ||
+- WARN_ON_ONCE(!inode))
++ if (WARN_ON_ONCE(inode_mark->group != audit_watch_group))
+ return 0;
+
+ if (mask & (FS_CREATE|FS_MOVED_TO) && inode)
+--
+2.43.0
+
--- /dev/null
+From d03335838dd756e4f8e519b66790038779678878 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:27 +0200
+Subject: fsnotify: clarify object type argument
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit ad69cd9972e79aba103ba5365de0acd35770c265 ]
+
+In preparation for separating object type from iterator type, rename
+some 'type' arguments in functions to 'obj_type' and remove the unused
+interface to clear marks by object type mask.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-2-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 8 ++++----
+ fs/notify/group.c | 2 +-
+ fs/notify/mark.c | 27 +++++++++++++++------------
+ include/linux/fsnotify_backend.h | 28 ++++++++++++----------------
+ 4 files changed, 32 insertions(+), 33 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 58b0a7fabd4a6..e8f6c843e9204 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1057,7 +1057,7 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+
+ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
+ fsnotify_connp_t *connp,
+- unsigned int type,
++ unsigned int obj_type,
+ __kernel_fsid_t *fsid)
+ {
+ struct ucounts *ucounts = group->fanotify_data.ucounts;
+@@ -1080,7 +1080,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
+ }
+
+ fsnotify_init_mark(mark, group);
+- ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
++ ret = fsnotify_add_mark_locked(mark, connp, obj_type, 0, fsid);
+ if (ret) {
+ fsnotify_put_mark(mark);
+ goto out_dec_ucounts;
+@@ -1105,7 +1105,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
+ }
+
+ static int fanotify_add_mark(struct fsnotify_group *group,
+- fsnotify_connp_t *connp, unsigned int type,
++ fsnotify_connp_t *connp, unsigned int obj_type,
+ __u32 mask, unsigned int flags,
+ __kernel_fsid_t *fsid)
+ {
+@@ -1116,7 +1116,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ mutex_lock(&group->mark_mutex);
+ fsn_mark = fsnotify_find_mark(connp, group);
+ if (!fsn_mark) {
+- fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
++ fsn_mark = fanotify_add_new_mark(group, connp, obj_type, fsid);
+ if (IS_ERR(fsn_mark)) {
+ mutex_unlock(&group->mark_mutex);
+ return PTR_ERR(fsn_mark);
+diff --git a/fs/notify/group.c b/fs/notify/group.c
+index 6a297efc47887..b7d4d64f87c29 100644
+--- a/fs/notify/group.c
++++ b/fs/notify/group.c
+@@ -58,7 +58,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
+ fsnotify_group_stop_queueing(group);
+
+ /* Clear all marks for this group and queue them for destruction */
+- fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES_MASK);
++ fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_ANY);
+
+ /*
+ * Some marks can still be pinned when waiting for response from
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index bea106fac0901..7c0946e16918a 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -496,7 +496,7 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
+ }
+
+ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+- unsigned int type,
++ unsigned int obj_type,
+ __kernel_fsid_t *fsid)
+ {
+ struct inode *inode = NULL;
+@@ -507,7 +507,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+ return -ENOMEM;
+ spin_lock_init(&conn->lock);
+ INIT_HLIST_HEAD(&conn->list);
+- conn->type = type;
++ conn->type = obj_type;
+ conn->obj = connp;
+ /* Cache fsid of filesystem containing the object */
+ if (fsid) {
+@@ -572,7 +572,8 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector(
+ * priority, highest number first, and then by the group's location in memory.
+ */
+ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+- fsnotify_connp_t *connp, unsigned int type,
++ fsnotify_connp_t *connp,
++ unsigned int obj_type,
+ int allow_dups, __kernel_fsid_t *fsid)
+ {
+ struct fsnotify_mark *lmark, *last = NULL;
+@@ -580,7 +581,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+ int cmp;
+ int err = 0;
+
+- if (WARN_ON(!fsnotify_valid_obj_type(type)))
++ if (WARN_ON(!fsnotify_valid_obj_type(obj_type)))
+ return -EINVAL;
+
+ /* Backend is expected to check for zero fsid (e.g. tmpfs) */
+@@ -592,7 +593,8 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+ conn = fsnotify_grab_connector(connp);
+ if (!conn) {
+ spin_unlock(&mark->lock);
+- err = fsnotify_attach_connector_to_object(connp, type, fsid);
++ err = fsnotify_attach_connector_to_object(connp, obj_type,
++ fsid);
+ if (err)
+ return err;
+ goto restart;
+@@ -665,7 +667,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+ * event types should be delivered to which group.
+ */
+ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+- fsnotify_connp_t *connp, unsigned int type,
++ fsnotify_connp_t *connp, unsigned int obj_type,
+ int allow_dups, __kernel_fsid_t *fsid)
+ {
+ struct fsnotify_group *group = mark->group;
+@@ -686,7 +688,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ fsnotify_get_mark(mark); /* for g_list */
+ spin_unlock(&mark->lock);
+
+- ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid);
++ ret = fsnotify_add_mark_list(mark, connp, obj_type, allow_dups, fsid);
+ if (ret)
+ goto err;
+
+@@ -706,13 +708,14 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ }
+
+ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
+- unsigned int type, int allow_dups, __kernel_fsid_t *fsid)
++ unsigned int obj_type, int allow_dups,
++ __kernel_fsid_t *fsid)
+ {
+ int ret;
+ struct fsnotify_group *group = mark->group;
+
+ mutex_lock(&group->mark_mutex);
+- ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid);
++ ret = fsnotify_add_mark_locked(mark, connp, obj_type, allow_dups, fsid);
+ mutex_unlock(&group->mark_mutex);
+ return ret;
+ }
+@@ -747,14 +750,14 @@ EXPORT_SYMBOL_GPL(fsnotify_find_mark);
+
+ /* Clear any marks in a group with given type mask */
+ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+- unsigned int type_mask)
++ unsigned int obj_type)
+ {
+ struct fsnotify_mark *lmark, *mark;
+ LIST_HEAD(to_free);
+ struct list_head *head = &to_free;
+
+ /* Skip selection step if we want to clear all marks. */
+- if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) {
++ if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) {
+ head = &group->marks_list;
+ goto clear;
+ }
+@@ -769,7 +772,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+ */
+ mutex_lock(&group->mark_mutex);
+ list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
+- if ((1U << mark->connector->type) & type_mask)
++ if (mark->connector->type == obj_type)
+ list_move(&mark->g_list, &to_free);
+ }
+ mutex_unlock(&group->mark_mutex);
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 51ef2b079bfa0..b9c84b1dbcc8f 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -338,6 +338,7 @@ static inline struct fs_error_report *fsnotify_data_error_report(
+ }
+
+ enum fsnotify_obj_type {
++ FSNOTIFY_OBJ_TYPE_ANY = -1,
+ FSNOTIFY_OBJ_TYPE_INODE,
+ FSNOTIFY_OBJ_TYPE_PARENT,
+ FSNOTIFY_OBJ_TYPE_VFSMOUNT,
+@@ -346,15 +347,9 @@ enum fsnotify_obj_type {
+ FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
+ };
+
+-#define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE)
+-#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT)
+-#define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+-#define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB)
+-#define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
+-
+-static inline bool fsnotify_valid_obj_type(unsigned int type)
++static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
+ {
+- return (type < FSNOTIFY_OBJ_TYPE_COUNT);
++ return (obj_type < FSNOTIFY_OBJ_TYPE_COUNT);
+ }
+
+ struct fsnotify_iter_info {
+@@ -387,7 +382,7 @@ static inline void fsnotify_iter_set_report_type_mark(
+ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
+ struct fsnotify_iter_info *iter_info) \
+ { \
+- return (iter_info->report_mask & FSNOTIFY_OBJ_TYPE_##NAME##_FL) ? \
++ return (iter_info->report_mask & (1U << FSNOTIFY_OBJ_TYPE_##NAME)) ? \
+ iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \
+ }
+
+@@ -604,11 +599,11 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
+ __kernel_fsid_t *fsid);
+ /* attach the mark to the object */
+ extern int fsnotify_add_mark(struct fsnotify_mark *mark,
+- fsnotify_connp_t *connp, unsigned int type,
++ fsnotify_connp_t *connp, unsigned int obj_type,
+ int allow_dups, __kernel_fsid_t *fsid);
+ extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ fsnotify_connp_t *connp,
+- unsigned int type, int allow_dups,
++ unsigned int obj_type, int allow_dups,
+ __kernel_fsid_t *fsid);
+
+ /* attach the mark to the inode */
+@@ -637,22 +632,23 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
+ extern void fsnotify_free_mark(struct fsnotify_mark *mark);
+ /* Wait until all marks queued for destruction are destroyed */
+ extern void fsnotify_wait_marks_destroyed(void);
+-/* run all the marks in a group, and clear all of the marks attached to given object type */
+-extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
++/* Clear all of the marks of a group attached to a given object type */
++extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
++ unsigned int obj_type);
+ /* run all the marks in a group, and clear all of the vfsmount marks */
+ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
+ {
+- fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL);
++ fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT);
+ }
+ /* run all the marks in a group, and clear all of the inode marks */
+ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
+ {
+- fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL);
++ fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE);
+ }
+ /* run all the marks in a group, and clear all of the sn marks */
+ static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group)
+ {
+- fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL);
++ fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB);
+ }
+ extern void fsnotify_get_mark(struct fsnotify_mark *mark);
+ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
+--
+2.43.0
+
--- /dev/null
+From e259fb2830fab05fda7398c5deef1a928f63c124 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 May 2022 22:02:13 +0300
+Subject: fsnotify: consistent behavior for parent not watching children
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit e730558adffb88a52e562db089e969ee9510184a ]
+
+The logic for handling events on child in groups that have a mark on
+the parent inode, but without FS_EVENT_ON_CHILD flag in the mask is
+duplicated in several places and inconsistent.
+
+Move the logic into the preparation of mark type iterator, so that the
+parent mark type will be excluded from all mark type iterations in that
+case.
+
+This results in several subtle changes of behavior, hopefully all
+desired changes of behavior, for example:
+
+- Group A has a mount mark with FS_MODIFY in mask
+- Group A has a mark with ignore mask that does not survive FS_MODIFY
+ and does not watch children on directory D.
+- Group B has a mark with FS_MODIFY in mask that does watch children
+ on directory D.
+- FS_MODIFY event on file D/foo should not clear the ignore mask of
+ group A, but before this change it does
+
+And if group A ignore mask was set to survive FS_MODIFY:
+- FS_MODIFY event on file D/foo should be reported to group A on account
+ of the mount mark, but before this change it is wrongly ignored
+
+Fixes: 2f02fd3fa13e ("fanotify: fix ignore mask logic for events on child and on dir")
+Reported-by: Jan Kara <jack@suse.com>
+Link: https://lore.kernel.org/linux-fsdevel/20220314113337.j7slrb5srxukztje@quack3.lan/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220511190213.831646-3-amir73il@gmail.com
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 10 +---------
+ fs/notify/fsnotify.c | 34 +++++++++++++++++++---------------
+ 2 files changed, 20 insertions(+), 24 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 263d303d8f8f1..4f897e1095470 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -320,7 +320,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ }
+
+ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+- /* Apply ignore mask regardless of ISDIR and ON_CHILD flags */
++ /* Apply ignore mask regardless of mark's ISDIR flag */
+ marks_ignored_mask |= mark->ignored_mask;
+
+ /*
+@@ -330,14 +330,6 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ if (event_mask & FS_ISDIR && !(mark->mask & FS_ISDIR))
+ continue;
+
+- /*
+- * If the event is on a child and this mark is on a parent not
+- * watching children, don't send it!
+- */
+- if (type == FSNOTIFY_ITER_TYPE_PARENT &&
+- !(mark->mask & FS_EVENT_ON_CHILD))
+- continue;
+-
+ marks_mask |= mark->mask;
+
+ /* Record the mark types of this group that matched the event */
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 35740a64ee453..0b3e74935cb4f 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -290,22 +290,15 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
+ }
+
+ if (parent_mark) {
+- /*
+- * parent_mark indicates that the parent inode is watching
+- * children and interested in this event, which is an event
+- * possible on child. But is *this mark* watching children and
+- * interested in this event?
+- */
+- if (parent_mark->mask & FS_EVENT_ON_CHILD) {
+- ret = fsnotify_handle_inode_event(group, parent_mark, mask,
+- data, data_type, dir, name, 0);
+- if (ret)
+- return ret;
+- }
+- if (!inode_mark)
+- return 0;
++ ret = fsnotify_handle_inode_event(group, parent_mark, mask,
++ data, data_type, dir, name, 0);
++ if (ret)
++ return ret;
+ }
+
++ if (!inode_mark)
++ return 0;
++
+ if (mask & FS_EVENT_ON_CHILD) {
+ /*
+ * Some events can be sent on both parent dir and child marks
+@@ -422,8 +415,19 @@ static bool fsnotify_iter_select_report_types(
+ iter_info->report_mask = 0;
+ fsnotify_foreach_iter_type(type) {
+ mark = iter_info->marks[type];
+- if (mark && mark->group == iter_info->current_group)
++ if (mark && mark->group == iter_info->current_group) {
++ /*
++ * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode
++ * is watching children and interested in this event,
++ * which is an event possible on child.
++ * But is *this mark* watching children?
++ */
++ if (type == FSNOTIFY_ITER_TYPE_PARENT &&
++ !(mark->mask & FS_EVENT_ON_CHILD))
++ continue;
++
+ fsnotify_iter_set_report_type(iter_info, type);
++ }
+ }
+
+ return true;
+--
+2.43.0
+
--- /dev/null
+From 0ccd41f56d119aa2207a21bba12c6dcd99bb00ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:17 +0300
+Subject: fsnotify: create helpers for group mark_mutex lock
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 43b245a788e2d8f1bb742668a9bdace02fcb3e96 ]
+
+Create helpers to take and release the group mark_mutex lock.
+
+Define a flag FSNOTIFY_GROUP_NOFS in fsnotify_group that determines
+if the mark_mutex lock is fs reclaim safe or not. If not safe, the
+lock helpers take the lock and disable direct fs reclaim.
+
+In that case we annotate the mutex with a different lockdep class to
+express to lockdep that an allocation of mark of an fs reclaim safe group
+may take the group lock of another "NOFS" group to evict inodes.
+
+For now, converted only the callers in common code and no backend
+defines the NOFS flag. It is intended to be set by fanotify for
+evictable marks support.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-7-amir73il@gmail.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220321112310.vpr7oxro2xkz5llh@quack3.lan/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fdinfo.c | 4 ++--
+ fs/notify/group.c | 11 +++++++++++
+ fs/notify/mark.c | 24 +++++++++++-------------
+ include/linux/fsnotify_backend.h | 28 ++++++++++++++++++++++++++++
+ 4 files changed, 52 insertions(+), 15 deletions(-)
+
+diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
+index 3451708fd035c..1f34c5c29fdbd 100644
+--- a/fs/notify/fdinfo.c
++++ b/fs/notify/fdinfo.c
+@@ -28,13 +28,13 @@ static void show_fdinfo(struct seq_file *m, struct file *f,
+ struct fsnotify_group *group = f->private_data;
+ struct fsnotify_mark *mark;
+
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ list_for_each_entry(mark, &group->marks_list, g_list) {
+ show(m, mark);
+ if (seq_has_overflowed(m))
+ break;
+ }
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ }
+
+ #if defined(CONFIG_EXPORTFS)
+diff --git a/fs/notify/group.c b/fs/notify/group.c
+index 18446b7b0d495..1de6631a3925e 100644
+--- a/fs/notify/group.c
++++ b/fs/notify/group.c
+@@ -115,6 +115,7 @@ static struct fsnotify_group *__fsnotify_alloc_group(
+ const struct fsnotify_ops *ops,
+ int flags, gfp_t gfp)
+ {
++ static struct lock_class_key nofs_marks_lock;
+ struct fsnotify_group *group;
+
+ group = kzalloc(sizeof(struct fsnotify_group), gfp);
+@@ -135,6 +136,16 @@ static struct fsnotify_group *__fsnotify_alloc_group(
+
+ group->ops = ops;
+ group->flags = flags;
++ /*
++ * For most backends, eviction of inode with a mark is not expected,
++ * because marks hold a refcount on the inode against eviction.
++ *
++ * Use a different lockdep class for groups that support evictable
++ * inode marks, because with evictable marks, mark_mutex is NOT
++ * fs-reclaim safe - the mutex is taken when evicting inodes.
++ */
++ if (flags & FSNOTIFY_GROUP_NOFS)
++ lockdep_set_class(&group->mark_mutex, &nofs_marks_lock);
+
+ return group;
+ }
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index 1fb246ea61752..982ca2f20ff5d 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -398,9 +398,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
+ */
+ void fsnotify_detach_mark(struct fsnotify_mark *mark)
+ {
+- struct fsnotify_group *group = mark->group;
+-
+- WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
++ fsnotify_group_assert_locked(mark->group);
+ WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
+ refcount_read(&mark->refcnt) < 1 +
+ !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
+@@ -452,9 +450,9 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
+ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
+ struct fsnotify_group *group)
+ {
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ fsnotify_detach_mark(mark);
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ fsnotify_free_mark(mark);
+ }
+ EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
+@@ -673,7 +671,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ struct fsnotify_group *group = mark->group;
+ int ret = 0;
+
+- BUG_ON(!mutex_is_locked(&group->mark_mutex));
++ fsnotify_group_assert_locked(group);
+
+ /*
+ * LOCKING ORDER!!!!
+@@ -714,9 +712,9 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
+ int ret;
+ struct fsnotify_group *group = mark->group;
+
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid);
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(fsnotify_add_mark);
+@@ -770,24 +768,24 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+ * move marks to free to to_free list in one go and then free marks in
+ * to_free list one by one.
+ */
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
+ if (mark->connector->type == obj_type)
+ list_move(&mark->g_list, &to_free);
+ }
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+
+ clear:
+ while (1) {
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ if (list_empty(head)) {
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ break;
+ }
+ mark = list_first_entry(head, struct fsnotify_mark, g_list);
+ fsnotify_get_mark(mark);
+ fsnotify_detach_mark(mark);
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+ fsnotify_free_mark(mark);
+ fsnotify_put_mark(mark);
+ }
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index dd440e6ff5285..d62111e832440 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -20,6 +20,7 @@
+ #include <linux/user_namespace.h>
+ #include <linux/refcount.h>
+ #include <linux/mempool.h>
++#include <linux/sched/mm.h>
+
+ /*
+ * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
+@@ -212,7 +213,9 @@ struct fsnotify_group {
+
+ #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */
+ #define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */
++#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */
+ int flags;
++ unsigned int owner_flags; /* stored flags of mark_mutex owner */
+
+ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
+ struct mutex mark_mutex; /* protect marks_list */
+@@ -254,6 +257,31 @@ struct fsnotify_group {
+ };
+ };
+
++/*
++ * These helpers are used to prevent deadlock when reclaiming inodes with
++ * evictable marks of the same group that is allocating a new mark.
++ */
++static inline void fsnotify_group_lock(struct fsnotify_group *group)
++{
++ mutex_lock(&group->mark_mutex);
++ if (group->flags & FSNOTIFY_GROUP_NOFS)
++ group->owner_flags = memalloc_nofs_save();
++}
++
++static inline void fsnotify_group_unlock(struct fsnotify_group *group)
++{
++ if (group->flags & FSNOTIFY_GROUP_NOFS)
++ memalloc_nofs_restore(group->owner_flags);
++ mutex_unlock(&group->mark_mutex);
++}
++
++static inline void fsnotify_group_assert_locked(struct fsnotify_group *group)
++{
++ WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
++ if (group->flags & FSNOTIFY_GROUP_NOFS)
++ WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
++}
++
+ /* When calling fsnotify tell it if the data is a path or inode */
+ enum fsnotify_data_type {
+ FSNOTIFY_EVENT_NONE,
+--
+2.43.0
+
--- /dev/null
+From 361096834668878f5266f046b96c4569e093f39d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:19 -0300
+Subject: fsnotify: Don't insert unmergeable events in hashtable
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit cc53b55f697fe5aa98bdbfdfe67c6401da242155 ]
+
+Some events, like the overflow event, are not mergeable, so they are not
+hashed. But, when failing inside fsnotify_add_event for lack of space,
+fsnotify_add_event() still calls the insert hook, which adds the
+overflow event to the merge list. Add a check to prevent any kind of
+unmergeable event to be inserted in the hashtable.
+
+Fixes: 94e00d28a680 ("fsnotify: use hash table for faster events merge")
+Link: https://lore.kernel.org/r/20211025192746.66445-5-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 057abd2cf8875..310246f8d3f19 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -702,6 +702,9 @@ static void fanotify_insert_event(struct fsnotify_group *group,
+
+ assert_spin_locked(&group->notification_lock);
+
++ if (!fanotify_is_hashed_event(event->mask))
++ return;
++
+ pr_debug("%s: group=%p event=%p bucket=%u\n", __func__,
+ group, event, bucket);
+
+@@ -779,8 +782,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+
+ fsn_event = &event->fse;
+ ret = fsnotify_add_event(group, fsn_event, fanotify_merge,
+- fanotify_is_hashed_event(mask) ?
+- fanotify_insert_event : NULL);
++ fanotify_insert_event);
+ if (ret) {
+ /* Permission events shouldn't be merged */
+ BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
+--
+2.43.0
+
--- /dev/null
+From 048899ffc33d7a3257b74c0646c3cd803eeb2a48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 03:46:39 +0800
+Subject: fsnotify: Fix comment typo
+
+From: Xin Gao <gaoxin@cdjrlc.com>
+
+[ Upstream commit feee1ce45a5666bbdb08c5bb2f5f394047b1915b ]
+
+The double `if' is duplicated in line 104, remove one.
+
+Signed-off-by: Xin Gao <gaoxin@cdjrlc.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220722194639.18545-1-gaoxin@cdjrlc.com
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fsnotify.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 8687562df2e37..7974e91ffe134 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -100,7 +100,7 @@ void fsnotify_sb_delete(struct super_block *sb)
+ * Given an inode, first check if we care what happens to our children. Inotify
+ * and dnotify both tell their parents about events. If we care about any event
+ * on a child we run all of our children and set a dentry flag saying that the
+- * parent cares. Thus when an event happens on a child it can quickly tell if
++ * parent cares. Thus when an event happens on a child it can quickly tell
+ * if there is a need to find a parent and send the event to the parent.
+ */
+ void __fsnotify_update_child_dentry_flags(struct inode *inode)
+--
+2.43.0
+
--- /dev/null
+From ef97dc1b2119aed88000d9e4be6115429460386c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Feb 2022 17:14:37 +0200
+Subject: fsnotify: fix merge with parent's ignored mask
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 4f0b903ded728c505850daf2914bfc08841f0ae6 ]
+
+fsnotify_parent() does not consider the parent's mark at all unless
+the parent inode shows interest in events on children and in the
+specific event.
+
+So unless parent added an event to both its mark mask and ignored mask,
+the event will not be ignored.
+
+Fix this by declaring the interest of an object in an event when the
+event is in either a mark mask or ignored mask.
+
+Link: https://lore.kernel.org/r/20220223151438.790268-2-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 17 +++++++++--------
+ fs/notify/mark.c | 4 ++--
+ include/linux/fsnotify_backend.h | 15 +++++++++++++++
+ 3 files changed, 26 insertions(+), 10 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index ce84eb8443b10..9ed9d7f6c2b50 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -999,17 +999,18 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
+ __u32 mask, unsigned int flags,
+ __u32 umask, int *destroy)
+ {
+- __u32 oldmask = 0;
++ __u32 oldmask, newmask;
+
+ /* umask bits cannot be removed by user */
+ mask &= ~umask;
+ spin_lock(&fsn_mark->lock);
++ oldmask = fsnotify_calc_mask(fsn_mark);
+ if (!(flags & FAN_MARK_IGNORED_MASK)) {
+- oldmask = fsn_mark->mask;
+ fsn_mark->mask &= ~mask;
+ } else {
+ fsn_mark->ignored_mask &= ~mask;
+ }
++ newmask = fsnotify_calc_mask(fsn_mark);
+ /*
+ * We need to keep the mark around even if remaining mask cannot
+ * result in any events (e.g. mask == FAN_ONDIR) to support incremenal
+@@ -1019,7 +1020,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
+ *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
+ spin_unlock(&fsn_mark->lock);
+
+- return mask & oldmask;
++ return oldmask & ~newmask;
+ }
+
+ static int fanotify_remove_mark(struct fsnotify_group *group,
+@@ -1077,23 +1078,23 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
+ }
+
+ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+- __u32 mask,
+- unsigned int flags)
++ __u32 mask, unsigned int flags)
+ {
+- __u32 oldmask = -1;
++ __u32 oldmask, newmask;
+
+ spin_lock(&fsn_mark->lock);
++ oldmask = fsnotify_calc_mask(fsn_mark);
+ if (!(flags & FAN_MARK_IGNORED_MASK)) {
+- oldmask = fsn_mark->mask;
+ fsn_mark->mask |= mask;
+ } else {
+ fsn_mark->ignored_mask |= mask;
+ if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
+ fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
+ }
++ newmask = fsnotify_calc_mask(fsn_mark);
+ spin_unlock(&fsn_mark->lock);
+
+- return mask & ~oldmask;
++ return newmask & ~oldmask;
+ }
+
+ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index b42629d2fc1c6..c86982be2d505 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -127,7 +127,7 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
+ return;
+ hlist_for_each_entry(mark, &conn->list, obj_list) {
+ if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
+- new_mask |= mark->mask;
++ new_mask |= fsnotify_calc_mask(mark);
+ }
+ *fsnotify_conn_mask_p(conn) = new_mask;
+ }
+@@ -692,7 +692,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ if (ret)
+ goto err;
+
+- if (mark->mask)
++ if (mark->mask || mark->ignored_mask)
+ fsnotify_recalc_mask(mark->connector);
+
+ return ret;
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 790c31844db5d..5f9c960049b07 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -601,6 +601,21 @@ extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
+
+ /* functions used to manipulate the marks attached to inodes */
+
++/* Get mask for calculating object interest taking ignored mask into account */
++static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
++{
++ __u32 mask = mark->mask;
++
++ if (!mark->ignored_mask)
++ return mask;
++
++ /*
++ * If mark is interested in ignoring events on children, the object must
++ * show interest in those events for fsnotify_parent() to notice it.
++ */
++ return mask | (mark->ignored_mask & ALL_FSNOTIFY_EVENTS);
++}
++
+ /* Get mask of events for a list of marks */
+ extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn);
+ /* Calculate mask of events for a list of marks */
+--
+2.43.0
+
--- /dev/null
+From 1e3b8342d4333fe8c492818f170d12fced675d24 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:30 +0200
+Subject: fsnotify: generate FS_RENAME event with rich information
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit e54183fa7047c15819bc155f4c58501d9a9a3489 ]
+
+The dnotify FS_DN_RENAME event is used to request notification about
+a move within the same parent directory and was always coupled with
+the FS_MOVED_FROM event.
+
+Rename the FS_DN_RENAME event flag to FS_RENAME, decouple it from
+FS_MOVED_FROM and report it with the moved dentry instead of the moved
+inode, so it has the information about both old and new parent and name.
+
+Generate the FS_RENAME event regardless of same parent dir and apply
+the "same parent" rule in the generic fsnotify_handle_event() helper
+that is used to call backends with ->handle_inode_event() method
+(i.e. dnotify). The ->handle_inode_event() method is not rich enough to
+report both old and new parent and name anyway.
+
+The enriched event is reported to fanotify over the ->handle_event()
+method with the old and new dir inode marks in marks array slots for
+ITER_TYPE_INODE and a new iter type slot ITER_TYPE_INODE2.
+
+The enriched event will be used for reporting old and new parent+name to
+fanotify groups with FAN_RENAME events.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-5-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/dnotify/dnotify.c | 2 +-
+ fs/notify/fsnotify.c | 37 +++++++++++++++++++++++++-------
+ include/linux/dnotify.h | 2 +-
+ include/linux/fsnotify.h | 9 +++++---
+ include/linux/fsnotify_backend.h | 7 +++---
+ 5 files changed, 41 insertions(+), 16 deletions(-)
+
+diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
+index e85e13c50d6d4..d5ebebb034ffe 100644
+--- a/fs/notify/dnotify/dnotify.c
++++ b/fs/notify/dnotify/dnotify.c
+@@ -196,7 +196,7 @@ static __u32 convert_arg(unsigned long arg)
+ if (arg & DN_ATTRIB)
+ new_mask |= FS_ATTRIB;
+ if (arg & DN_RENAME)
+- new_mask |= FS_DN_RENAME;
++ new_mask |= FS_RENAME;
+ if (arg & DN_CREATE)
+ new_mask |= (FS_CREATE | FS_MOVED_TO);
+
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 0c94457c625e2..ab81a0776ece5 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -279,6 +279,18 @@ static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
+ WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
+ return 0;
+
++ /*
++ * For FS_RENAME, 'dir' is old dir and 'data' is new dentry.
++ * The only ->handle_inode_event() backend that supports FS_RENAME is
++ * dnotify, where it means file was renamed within same parent.
++ */
++ if (mask & FS_RENAME) {
++ struct dentry *moved = fsnotify_data_dentry(data, data_type);
++
++ if (dir != moved->d_parent->d_inode)
++ return 0;
++ }
++
+ if (parent_mark) {
+ /*
+ * parent_mark indicates that the parent inode is watching
+@@ -469,7 +481,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ struct super_block *sb = fsnotify_data_sb(data, data_type);
+ struct fsnotify_iter_info iter_info = {};
+ struct mount *mnt = NULL;
+- struct inode *parent = NULL;
++ struct inode *inode2 = NULL;
++ struct dentry *moved;
++ int inode2_type;
+ int ret = 0;
+ __u32 test_mask, marks_mask;
+
+@@ -479,12 +493,19 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ if (!inode) {
+ /* Dirent event - report on TYPE_INODE to dir */
+ inode = dir;
++ /* For FS_RENAME, inode is old_dir and inode2 is new_dir */
++ if (mask & FS_RENAME) {
++ moved = fsnotify_data_dentry(data, data_type);
++ inode2 = moved->d_parent->d_inode;
++ inode2_type = FSNOTIFY_ITER_TYPE_INODE2;
++ }
+ } else if (mask & FS_EVENT_ON_CHILD) {
+ /*
+ * Event on child - report on TYPE_PARENT to dir if it is
+ * watching children and on TYPE_INODE to child.
+ */
+- parent = dir;
++ inode2 = dir;
++ inode2_type = FSNOTIFY_ITER_TYPE_PARENT;
+ }
+
+ /*
+@@ -497,7 +518,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ if (!sb->s_fsnotify_marks &&
+ (!mnt || !mnt->mnt_fsnotify_marks) &&
+ (!inode || !inode->i_fsnotify_marks) &&
+- (!parent || !parent->i_fsnotify_marks))
++ (!inode2 || !inode2->i_fsnotify_marks))
+ return 0;
+
+ marks_mask = sb->s_fsnotify_mask;
+@@ -505,8 +526,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ marks_mask |= mnt->mnt_fsnotify_mask;
+ if (inode)
+ marks_mask |= inode->i_fsnotify_mask;
+- if (parent)
+- marks_mask |= parent->i_fsnotify_mask;
++ if (inode2)
++ marks_mask |= inode2->i_fsnotify_mask;
+
+
+ /*
+@@ -529,9 +550,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] =
+ fsnotify_first_mark(&inode->i_fsnotify_marks);
+ }
+- if (parent) {
+- iter_info.marks[FSNOTIFY_ITER_TYPE_PARENT] =
+- fsnotify_first_mark(&parent->i_fsnotify_marks);
++ if (inode2) {
++ iter_info.marks[inode2_type] =
++ fsnotify_first_mark(&inode2->i_fsnotify_marks);
+ }
+
+ /*
+diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
+index 0aad774beaec4..b87c3b85a166c 100644
+--- a/include/linux/dnotify.h
++++ b/include/linux/dnotify.h
+@@ -26,7 +26,7 @@ struct dnotify_struct {
+ FS_MODIFY | FS_MODIFY_CHILD |\
+ FS_ACCESS | FS_ACCESS_CHILD |\
+ FS_ATTRIB | FS_ATTRIB_CHILD |\
+- FS_CREATE | FS_DN_RENAME |\
++ FS_CREATE | FS_RENAME |\
+ FS_MOVED_FROM | FS_MOVED_TO)
+
+ extern int dir_notify_enable;
+diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
+index 67d6db6c8df8f..c80f448b9b0f2 100644
+--- a/include/linux/fsnotify.h
++++ b/include/linux/fsnotify.h
+@@ -144,16 +144,19 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
+ u32 fs_cookie = fsnotify_get_cookie();
+ __u32 old_dir_mask = FS_MOVED_FROM;
+ __u32 new_dir_mask = FS_MOVED_TO;
++ __u32 rename_mask = FS_RENAME;
+ const struct qstr *new_name = &moved->d_name;
+
+- if (old_dir == new_dir)
+- old_dir_mask |= FS_DN_RENAME;
+-
+ if (isdir) {
+ old_dir_mask |= FS_ISDIR;
+ new_dir_mask |= FS_ISDIR;
++ rename_mask |= FS_ISDIR;
+ }
+
++ /* Event with information about both old and new parent+name */
++ fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY,
++ old_dir, old_name, 0);
++
+ fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE,
+ old_dir, old_name, fs_cookie);
+ fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE,
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 73739fee1710f..790c31844db5d 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -63,7 +63,7 @@
+ */
+ #define FS_EVENT_ON_CHILD 0x08000000
+
+-#define FS_DN_RENAME 0x10000000 /* file renamed */
++#define FS_RENAME 0x10000000 /* File was renamed */
+ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */
+ #define FS_ISDIR 0x40000000 /* event occurred against dir */
+ #define FS_IN_ONESHOT 0x80000000 /* only send event once */
+@@ -76,7 +76,7 @@
+ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event
+ * when a directory entry inside a child subdir changes.
+ */
+-#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE)
++#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
+
+ #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \
+ FS_OPEN_EXEC_PERM)
+@@ -101,7 +101,7 @@
+ /* Events that can be reported to backends */
+ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
+ FS_EVENTS_POSS_ON_CHILD | \
+- FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \
++ FS_DELETE_SELF | FS_MOVE_SELF | \
+ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
+ FS_ERROR)
+
+@@ -349,6 +349,7 @@ enum fsnotify_iter_type {
+ FSNOTIFY_ITER_TYPE_VFSMOUNT,
+ FSNOTIFY_ITER_TYPE_SB,
+ FSNOTIFY_ITER_TYPE_PARENT,
++ FSNOTIFY_ITER_TYPE_INODE2,
+ FSNOTIFY_ITER_TYPE_COUNT
+ };
+
+--
+2.43.0
+
--- /dev/null
+From 950396f38b44d89dd2008a643350f3d116562675 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 May 2022 22:02:12 +0300
+Subject: fsnotify: introduce mark type iterator
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 14362a2541797cf9df0e86fb12dcd7950baf566e ]
+
+fsnotify_foreach_iter_mark_type() is used to reduce boilerplate code
+of iterating all marks of a specific group interested in an event
+by consulting the iterator report_mask.
+
+Use an open coded version of that iterator in fsnotify_iter_next()
+that collects all marks of the current iteration group without
+consulting the iterator report_mask.
+
+At the moment, the two iterator variants are the same, but this
+decoupling will allow us to exclude some of the group's marks from
+reporting the event, for example for event on child and inode marks
+on parent did not request to watch events on children.
+
+Fixes: 2f02fd3fa13e ("fanotify: fix ignore mask logic for events on child and on dir")
+Reported-by: Jan Kara <jack@suse.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220511190213.831646-2-amir73il@gmail.com
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 14 +++------
+ fs/notify/fsnotify.c | 53 ++++++++++++++++----------------
+ include/linux/fsnotify_backend.h | 31 ++++++++++++++-----
+ 3 files changed, 54 insertions(+), 44 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index 985e995d2a398..263d303d8f8f1 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -319,11 +319,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ return 0;
+ }
+
+- fsnotify_foreach_iter_type(type) {
+- if (!fsnotify_iter_should_report_type(iter_info, type))
+- continue;
+- mark = iter_info->marks[type];
+-
++ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+ /* Apply ignore mask regardless of ISDIR and ON_CHILD flags */
+ marks_ignored_mask |= mark->ignored_mask;
+
+@@ -849,16 +845,14 @@ static struct fanotify_event *fanotify_alloc_event(
+ */
+ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
+ {
++ struct fsnotify_mark *mark;
+ int type;
+ __kernel_fsid_t fsid = {};
+
+- fsnotify_foreach_iter_type(type) {
++ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
+ struct fsnotify_mark_connector *conn;
+
+- if (!fsnotify_iter_should_report_type(iter_info, type))
+- continue;
+-
+- conn = READ_ONCE(iter_info->marks[type]->connector);
++ conn = READ_ONCE(mark->connector);
+ /* Mark is just getting destroyed or created? */
+ if (!conn)
+ continue;
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 6eee19d15e8cd..35740a64ee453 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -335,31 +335,23 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
+ struct fsnotify_mark *mark;
+ int type;
+
+- if (WARN_ON(!iter_info->report_mask))
++ if (!iter_info->report_mask)
+ return 0;
+
+ /* clear ignored on inode modification */
+ if (mask & FS_MODIFY) {
+- fsnotify_foreach_iter_type(type) {
+- if (!fsnotify_iter_should_report_type(iter_info, type))
+- continue;
+- mark = iter_info->marks[type];
+- if (mark &&
+- !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
++ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
++ if (!(mark->flags &
++ FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+ mark->ignored_mask = 0;
+ }
+ }
+
+- fsnotify_foreach_iter_type(type) {
+- if (!fsnotify_iter_should_report_type(iter_info, type))
+- continue;
+- mark = iter_info->marks[type];
+- /* does the object mark tell us to do something? */
+- if (mark) {
+- group = mark->group;
+- marks_mask |= mark->mask;
+- marks_ignored_mask |= mark->ignored_mask;
+- }
++ /* Are any of the group marks interested in this event? */
++ fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
++ group = mark->group;
++ marks_mask |= mark->mask;
++ marks_ignored_mask |= mark->ignored_mask;
+ }
+
+ pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
+@@ -403,11 +395,11 @@ static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
+
+ /*
+ * iter_info is a multi head priority queue of marks.
+- * Pick a subset of marks from queue heads, all with the
+- * same group and set the report_mask for selected subset.
+- * Returns the report_mask of the selected subset.
++ * Pick a subset of marks from queue heads, all with the same group
++ * and set the report_mask to a subset of the selected marks.
++ * Returns false if there are no more groups to iterate.
+ */
+-static unsigned int fsnotify_iter_select_report_types(
++static bool fsnotify_iter_select_report_types(
+ struct fsnotify_iter_info *iter_info)
+ {
+ struct fsnotify_group *max_prio_group = NULL;
+@@ -423,30 +415,37 @@ static unsigned int fsnotify_iter_select_report_types(
+ }
+
+ if (!max_prio_group)
+- return 0;
++ return false;
+
+ /* Set the report mask for marks from same group as max prio group */
++ iter_info->current_group = max_prio_group;
+ iter_info->report_mask = 0;
+ fsnotify_foreach_iter_type(type) {
+ mark = iter_info->marks[type];
+- if (mark &&
+- fsnotify_compare_groups(max_prio_group, mark->group) == 0)
++ if (mark && mark->group == iter_info->current_group)
+ fsnotify_iter_set_report_type(iter_info, type);
+ }
+
+- return iter_info->report_mask;
++ return true;
+ }
+
+ /*
+- * Pop from iter_info multi head queue, the marks that were iterated in the
++ * Pop from iter_info multi head queue, the marks that belong to the group of
+ * current iteration step.
+ */
+ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
+ {
++ struct fsnotify_mark *mark;
+ int type;
+
++ /*
++ * We cannot use fsnotify_foreach_iter_mark_type() here because we
++ * may need to advance a mark of type X that belongs to current_group
++ * but was not selected for reporting.
++ */
+ fsnotify_foreach_iter_type(type) {
+- if (fsnotify_iter_should_report_type(iter_info, type))
++ mark = iter_info->marks[type];
++ if (mark && mark->group == iter_info->current_group)
+ iter_info->marks[type] =
+ fsnotify_next_mark(iter_info->marks[type]);
+ }
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 9a1a9e78f69f5..9560734759fa6 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -399,6 +399,7 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
+
+ struct fsnotify_iter_info {
+ struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT];
++ struct fsnotify_group *current_group;
+ unsigned int report_mask;
+ int srcu_idx;
+ };
+@@ -415,20 +416,31 @@ static inline void fsnotify_iter_set_report_type(
+ iter_info->report_mask |= (1U << iter_type);
+ }
+
+-static inline void fsnotify_iter_set_report_type_mark(
+- struct fsnotify_iter_info *iter_info, int iter_type,
+- struct fsnotify_mark *mark)
++static inline struct fsnotify_mark *fsnotify_iter_mark(
++ struct fsnotify_iter_info *iter_info, int iter_type)
+ {
+- iter_info->marks[iter_type] = mark;
+- iter_info->report_mask |= (1U << iter_type);
++ if (fsnotify_iter_should_report_type(iter_info, iter_type))
++ return iter_info->marks[iter_type];
++ return NULL;
++}
++
++static inline int fsnotify_iter_step(struct fsnotify_iter_info *iter, int type,
++ struct fsnotify_mark **markp)
++{
++ while (type < FSNOTIFY_ITER_TYPE_COUNT) {
++ *markp = fsnotify_iter_mark(iter, type);
++ if (*markp)
++ break;
++ type++;
++ }
++ return type;
+ }
+
+ #define FSNOTIFY_ITER_FUNCS(name, NAME) \
+ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
+ struct fsnotify_iter_info *iter_info) \
+ { \
+- return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \
+- iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \
++ return fsnotify_iter_mark(iter_info, FSNOTIFY_ITER_TYPE_##NAME); \
+ }
+
+ FSNOTIFY_ITER_FUNCS(inode, INODE)
+@@ -438,6 +450,11 @@ FSNOTIFY_ITER_FUNCS(sb, SB)
+
+ #define fsnotify_foreach_iter_type(type) \
+ for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++)
++#define fsnotify_foreach_iter_mark_type(iter, mark, type) \
++ for (type = 0; \
++ type = fsnotify_iter_step(iter, type, &mark), \
++ type < FSNOTIFY_ITER_TYPE_COUNT; \
++ type++)
+
+ /*
+ * fsnotify_connp_t is what we embed in objects which connector can be attached
+--
+2.43.0
+
--- /dev/null
+From c674b3cb37cca23cb4cedc40412757f88e03b2d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 23:53:04 +0200
+Subject: fsnotify: invalidate dcache before IN_DELETE event
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit a37d9a17f099072fe4d3a9048b0321978707a918 ]
+
+Apparently, there are some applications that use IN_DELETE event as an
+invalidation mechanism and expect that if they try to open a file with
+the name reported with the delete event, that it should not contain the
+content of the deleted file.
+
+Commit 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of
+d_delete()") moved the fsnotify delete hook before d_delete() so fsnotify
+will have access to a positive dentry.
+
+This allowed a race where opening the deleted file via cached dentry
+is now possible after receiving the IN_DELETE event.
+
+To fix the regression, create a new hook fsnotify_delete() that takes
+the unlinked inode as an argument and use a helper d_delete_notify() to
+pin the inode, so we can pass it to fsnotify_delete() after d_delete().
+
+Backporting hint: this regression is from v5.3. Although patch will
+apply with only trivial conflicts to v5.4 and v5.10, it won't build,
+because fsnotify_delete() implementation is different in each of those
+versions (see fsnotify_link()).
+
+A follow up patch will fix the fsnotify_unlink/rmdir() calls in pseudo
+filesystem that do not need to call d_delete().
+
+Link: https://lore.kernel.org/r/20220120215305.282577-1-amir73il@gmail.com
+Reported-by: Ivan Delalande <colona@arista.com>
+Link: https://lore.kernel.org/linux-fsdevel/YeNyzoDM5hP5LtGW@visor/
+Fixes: 49246466a989 ("fsnotify: move fsnotify_nameremove() hook out of d_delete()")
+Cc: stable@vger.kernel.org # v5.3+
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+[ cel: adjusted to apply on v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ include/linux/fsnotify.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
+index c80f448b9b0f2..bb8467cd11ae2 100644
+--- a/include/linux/fsnotify.h
++++ b/include/linux/fsnotify.h
+@@ -240,7 +240,8 @@ static inline void fsnotify_delete(struct inode *dir, struct inode *inode,
+ if (S_ISDIR(inode->i_mode))
+ mask |= FS_ISDIR;
+
+- fsnotify_name(dir, mask, inode, &dentry->d_name, 0);
++ fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name,
++ 0);
+ }
+
+ /**
+--
+2.43.0
+
--- /dev/null
+From 9953b41ffb40d147856f35ec99e818ce6ad6eb7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:16 +0300
+Subject: fsnotify: make allow_dups a property of the group
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit f3010343d9e119da35ee864b3a28993bb5c78ed7 ]
+
+Instead of passing the allow_dups argument to fsnotify_add_mark()
+as an argument, define the group flag FSNOTIFY_GROUP_DUPS to express
+the allow_dups behavior and set this behavior at group creation time
+for all calls of fsnotify_add_mark().
+
+Rename the allow_dups argument to generic add_flags argument for future
+use.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-6-amir73il@gmail.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/mark.c | 12 ++++++------
+ include/linux/fsnotify_backend.h | 13 +++++++------
+ kernel/audit_fsnotify.c | 4 ++--
+ 3 files changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index c86982be2d505..1fb246ea61752 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -574,7 +574,7 @@ static struct fsnotify_mark_connector *fsnotify_grab_connector(
+ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+ fsnotify_connp_t *connp,
+ unsigned int obj_type,
+- int allow_dups, __kernel_fsid_t *fsid)
++ int add_flags, __kernel_fsid_t *fsid)
+ {
+ struct fsnotify_mark *lmark, *last = NULL;
+ struct fsnotify_mark_connector *conn;
+@@ -633,7 +633,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+
+ if ((lmark->group == mark->group) &&
+ (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
+- !allow_dups) {
++ !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) {
+ err = -EEXIST;
+ goto out_err;
+ }
+@@ -668,7 +668,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+ */
+ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ fsnotify_connp_t *connp, unsigned int obj_type,
+- int allow_dups, __kernel_fsid_t *fsid)
++ int add_flags, __kernel_fsid_t *fsid)
+ {
+ struct fsnotify_group *group = mark->group;
+ int ret = 0;
+@@ -688,7 +688,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ fsnotify_get_mark(mark); /* for g_list */
+ spin_unlock(&mark->lock);
+
+- ret = fsnotify_add_mark_list(mark, connp, obj_type, allow_dups, fsid);
++ ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid);
+ if (ret)
+ goto err;
+
+@@ -708,14 +708,14 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ }
+
+ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
+- unsigned int obj_type, int allow_dups,
++ unsigned int obj_type, int add_flags,
+ __kernel_fsid_t *fsid)
+ {
+ int ret;
+ struct fsnotify_group *group = mark->group;
+
+ mutex_lock(&group->mark_mutex);
+- ret = fsnotify_add_mark_locked(mark, connp, obj_type, allow_dups, fsid);
++ ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid);
+ mutex_unlock(&group->mark_mutex);
+ return ret;
+ }
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index f0bf557af0091..dd440e6ff5285 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -211,6 +211,7 @@ struct fsnotify_group {
+ bool shutdown; /* group is being shut down, don't queue more events */
+
+ #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */
++#define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */
+ int flags;
+
+ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
+@@ -641,26 +642,26 @@ extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
+ /* attach the mark to the object */
+ extern int fsnotify_add_mark(struct fsnotify_mark *mark,
+ fsnotify_connp_t *connp, unsigned int obj_type,
+- int allow_dups, __kernel_fsid_t *fsid);
++ int add_flags, __kernel_fsid_t *fsid);
+ extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+ fsnotify_connp_t *connp,
+- unsigned int obj_type, int allow_dups,
++ unsigned int obj_type, int add_flags,
+ __kernel_fsid_t *fsid);
+
+ /* attach the mark to the inode */
+ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
+ struct inode *inode,
+- int allow_dups)
++ int add_flags)
+ {
+ return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
+- FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL);
++ FSNOTIFY_OBJ_TYPE_INODE, add_flags, NULL);
+ }
+ static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
+ struct inode *inode,
+- int allow_dups)
++ int add_flags)
+ {
+ return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
+- FSNOTIFY_OBJ_TYPE_INODE, allow_dups,
++ FSNOTIFY_OBJ_TYPE_INODE, add_flags,
+ NULL);
+ }
+
+diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
+index 8dee00959eb3d..c565fbf66ac87 100644
+--- a/kernel/audit_fsnotify.c
++++ b/kernel/audit_fsnotify.c
+@@ -100,7 +100,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
+ audit_update_mark(audit_mark, dentry->d_inode);
+ audit_mark->rule = krule;
+
+- ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true);
++ ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
+ if (ret < 0) {
+ audit_mark->path = NULL;
+ fsnotify_put_mark(&audit_mark->mark);
+@@ -183,7 +183,7 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = {
+ static int __init audit_fsnotify_init(void)
+ {
+ audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops,
+- 0);
++ FSNOTIFY_GROUP_DUPS);
+ if (IS_ERR(audit_fsnotify_group)) {
+ audit_fsnotify_group = NULL;
+ audit_panic("cannot create audit fsnotify group");
+--
+2.43.0
+
--- /dev/null
+From 04d71f95a3b600f86e1b3a0fafb4676da714582c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Feb 2022 17:14:38 +0200
+Subject: fsnotify: optimize FS_MODIFY events with no ignored masks
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 04e317ba72d07901b03399b3d1525e83424df5b3 ]
+
+fsnotify() treats FS_MODIFY events specially - it does not skip them
+even if the FS_MODIFY event does not apear in the object's fsnotify
+mask. This is because send_to_group() checks if FS_MODIFY needs to
+clear ignored mask of marks.
+
+The common case is that an object does not have any mark with ignored
+mask and in particular, that it does not have a mark with ignored mask
+and without the FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY flag.
+
+Set FS_MODIFY in object's fsnotify mask during fsnotify_recalc_mask()
+if object has a mark with an ignored mask and without the
+FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY flag and remove the special
+treatment of FS_MODIFY in fsnotify(), so that FS_MODIFY events could
+be optimized in the common case.
+
+Call fsnotify_recalc_mask() from fanotify after adding or removing an
+ignored mask from a mark without FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY
+or when adding the FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY flag to a mark
+with ignored mask (the flag cannot be removed by fanotify uapi).
+
+Performance results for doing 10000000 write(2)s to tmpfs:
+
+ vanilla patched
+without notification mark 25.486+-1.054 24.965+-0.244
+with notification mark 30.111+-0.139 26.891+-1.355
+
+So we can see the overhead of notification subsystem has been
+drastically reduced.
+
+Link: https://lore.kernel.org/r/20220223151438.790268-3-amir73il@gmail.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify_user.c | 32 +++++++++++++++++++++++-------
+ fs/notify/fsnotify.c | 8 +++++---
+ include/linux/fsnotify_backend.h | 4 ++++
+ 3 files changed, 34 insertions(+), 10 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 9ed9d7f6c2b50..4f607fd793f3a 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1077,8 +1077,28 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
+ flags, umask);
+ }
+
++static void fanotify_mark_add_ignored_mask(struct fsnotify_mark *fsn_mark,
++ __u32 mask, unsigned int flags,
++ __u32 *removed)
++{
++ fsn_mark->ignored_mask |= mask;
++
++ /*
++ * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to
++ * the removal of the FS_MODIFY bit in calculated mask if it was set
++ * because of an ignored mask that is now going to survive FS_MODIFY.
++ */
++ if ((flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
++ !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) {
++ fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
++ if (!(fsn_mark->mask & FS_MODIFY))
++ *removed = FS_MODIFY;
++ }
++}
++
+ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+- __u32 mask, unsigned int flags)
++ __u32 mask, unsigned int flags,
++ __u32 *removed)
+ {
+ __u32 oldmask, newmask;
+
+@@ -1087,9 +1107,7 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+ if (!(flags & FAN_MARK_IGNORED_MASK)) {
+ fsn_mark->mask |= mask;
+ } else {
+- fsn_mark->ignored_mask |= mask;
+- if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
+- fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
++ fanotify_mark_add_ignored_mask(fsn_mark, mask, flags, removed);
+ }
+ newmask = fsnotify_calc_mask(fsn_mark);
+ spin_unlock(&fsn_mark->lock);
+@@ -1152,7 +1170,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ __kernel_fsid_t *fsid)
+ {
+ struct fsnotify_mark *fsn_mark;
+- __u32 added;
++ __u32 added, removed = 0;
+ int ret = 0;
+
+ mutex_lock(&group->mark_mutex);
+@@ -1175,8 +1193,8 @@ static int fanotify_add_mark(struct fsnotify_group *group,
+ goto out;
+ }
+
+- added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
+- if (added & ~fsnotify_conn_mask(fsn_mark->connector))
++ added = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &removed);
++ if (removed || (added & ~fsnotify_conn_mask(fsn_mark->connector)))
+ fsnotify_recalc_mask(fsn_mark->connector);
+
+ out:
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index ab81a0776ece5..494f653efbc6e 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -531,11 +531,13 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+
+
+ /*
+- * if this is a modify event we may need to clear the ignored masks
+- * otherwise return if none of the marks care about this type of event.
++ * If this is a modify event we may need to clear some ignored masks.
++ * In that case, the object with ignored masks will have the FS_MODIFY
++ * event in its mask.
++ * Otherwise, return if none of the marks care about this type of event.
+ */
+ test_mask = (mask & ALL_FSNOTIFY_EVENTS);
+- if (!(mask & FS_MODIFY) && !(test_mask & marks_mask))
++ if (!(test_mask & marks_mask))
+ return 0;
+
+ iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 5f9c960049b07..0805b74cae441 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -609,6 +609,10 @@ static inline __u32 fsnotify_calc_mask(struct fsnotify_mark *mark)
+ if (!mark->ignored_mask)
+ return mask;
+
++ /* Interest in FS_MODIFY may be needed for clearing ignored mask */
++ if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
++ mask |= FS_MODIFY;
++
+ /*
+ * If mark is interested in ignoring events on children, the object must
+ * show interest in those events for fsnotify_parent() to notice it.
+--
+2.43.0
+
--- /dev/null
+From 143dd392da96d7b6050caf9f5ff01cfb8adeef19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:16 -0300
+Subject: fsnotify: pass data_type to fsnotify_name()
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 9baf93d68bcc3d0a6042283b82603c076e25e4f5 ]
+
+Align the arguments of fsnotify_name() to those of fsnotify().
+
+Link: https://lore.kernel.org/r/20211025192746.66445-2-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ include/linux/fsnotify.h | 22 +++++++++++++---------
+ 1 file changed, 13 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
+index a9477c14fad5c..298fd2bc29e49 100644
+--- a/include/linux/fsnotify.h
++++ b/include/linux/fsnotify.h
+@@ -26,20 +26,21 @@
+ * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only
+ * the child is interested and not the parent.
+ */
+-static inline void fsnotify_name(struct inode *dir, __u32 mask,
+- struct inode *child,
+- const struct qstr *name, u32 cookie)
++static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
++ struct inode *dir, const struct qstr *name,
++ u32 cookie)
+ {
+ if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0)
+- return;
++ return 0;
+
+- fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie);
++ return fsnotify(mask, data, data_type, dir, name, NULL, cookie);
+ }
+
+ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
+ __u32 mask)
+ {
+- fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0);
++ fsnotify_name(mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
++ dir, &dentry->d_name, 0);
+ }
+
+ static inline void fsnotify_inode(struct inode *inode, __u32 mask)
+@@ -154,8 +155,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
+ new_dir_mask |= FS_ISDIR;
+ }
+
+- fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie);
+- fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie);
++ fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE,
++ old_dir, old_name, fs_cookie);
++ fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE,
++ new_dir, new_name, fs_cookie);
+
+ if (target)
+ fsnotify_link_count(target);
+@@ -209,7 +212,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode,
+ fsnotify_link_count(inode);
+ audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE);
+
+- fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0);
++ fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE,
++ dir, &new_dentry->d_name, 0);
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From c7dd9fc9943ea7979a17474baeaf388036a9ec36 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:17 -0300
+Subject: fsnotify: pass dentry instead of inode data
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit fd5a3ff49a19aa69e2bc1e26e98037c2d778e61a ]
+
+Define a new data type to pass for event - FSNOTIFY_EVENT_DENTRY.
+Use it to pass the dentry instead of it's ->d_inode where available.
+
+This is needed in preparation to the refactor to retrieve the super
+block from the data field. In some cases (i.e. mkdir in kernfs), the
+data inode comes from a negative dentry, such that no super block
+information would be available. By receiving the dentry itself, instead
+of the inode, fsnotify can derive the super block even on these cases.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-3-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+[Expand explanation in commit message]
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ include/linux/fsnotify.h | 5 ++---
+ include/linux/fsnotify_backend.h | 16 ++++++++++++++++
+ 2 files changed, 18 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
+index 298fd2bc29e49..70e6b147a76ad 100644
+--- a/include/linux/fsnotify.h
++++ b/include/linux/fsnotify.h
+@@ -39,8 +39,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
+ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
+ __u32 mask)
+ {
+- fsnotify_name(mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
+- dir, &dentry->d_name, 0);
++ fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0);
+ }
+
+ static inline void fsnotify_inode(struct inode *inode, __u32 mask)
+@@ -87,7 +86,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
+ */
+ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
+ {
+- fsnotify_parent(dentry, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE);
++ fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY);
+ }
+
+ static inline int fsnotify_file(struct file *file, __u32 mask)
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 1ce66748a2d29..a2db821e8a8f2 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -248,6 +248,7 @@ enum fsnotify_data_type {
+ FSNOTIFY_EVENT_NONE,
+ FSNOTIFY_EVENT_PATH,
+ FSNOTIFY_EVENT_INODE,
++ FSNOTIFY_EVENT_DENTRY,
+ };
+
+ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
+@@ -255,6 +256,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
+ switch (data_type) {
+ case FSNOTIFY_EVENT_INODE:
+ return (struct inode *)data;
++ case FSNOTIFY_EVENT_DENTRY:
++ return d_inode(data);
+ case FSNOTIFY_EVENT_PATH:
+ return d_inode(((const struct path *)data)->dentry);
+ default:
+@@ -262,6 +265,19 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
+ }
+ }
+
++static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type)
++{
++ switch (data_type) {
++ case FSNOTIFY_EVENT_DENTRY:
++ /* Non const is needed for dget() */
++ return (struct dentry *)data;
++ case FSNOTIFY_EVENT_PATH:
++ return ((const struct path *)data)->dentry;
++ default:
++ return NULL;
++ }
++}
++
+ static inline const struct path *fsnotify_data_path(const void *data,
+ int data_type)
+ {
+--
+2.43.0
+
--- /dev/null
+From dada96c8582398e8d84ef13e5762873cf2a4d923 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:15 +0300
+Subject: fsnotify: pass flags argument to fsnotify_alloc_group()
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 867a448d587e7fa845bceaf4ee1c632448f2a9fa ]
+
+Add flags argument to fsnotify_alloc_group(), define and use the flag
+FSNOTIFY_GROUP_USER in inotify and fanotify instead of the helper
+fsnotify_alloc_user_group() to indicate user allocation.
+
+Although the flag FSNOTIFY_GROUP_USER is currently not used after group
+allocation, we store the flags argument in the group struct for future
+use of other group flags.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-5-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 3 ++-
+ fs/notify/dnotify/dnotify.c | 2 +-
+ fs/notify/fanotify/fanotify_user.c | 3 ++-
+ fs/notify/group.c | 21 +++++++++------------
+ fs/notify/inotify/inotify_user.c | 3 ++-
+ include/linux/fsnotify_backend.h | 8 ++++++--
+ kernel/audit_fsnotify.c | 3 ++-
+ kernel/audit_tree.c | 2 +-
+ kernel/audit_watch.c | 2 +-
+ 9 files changed, 26 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 429ae485ebbbe..97ca256a76323 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -671,7 +671,8 @@ nfsd_file_cache_init(void)
+ goto out_shrinker;
+ }
+
+- nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
++ nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
++ 0);
+ if (IS_ERR(nfsd_file_fsnotify_group)) {
+ pr_err("nfsd: unable to create fsnotify group: %ld\n",
+ PTR_ERR(nfsd_file_fsnotify_group));
+diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
+index d5ebebb034ffe..6c586802c50e6 100644
+--- a/fs/notify/dnotify/dnotify.c
++++ b/fs/notify/dnotify/dnotify.c
+@@ -383,7 +383,7 @@ static int __init dnotify_init(void)
+ SLAB_PANIC|SLAB_ACCOUNT);
+ dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
+
+- dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
++ dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops, 0);
+ if (IS_ERR(dnotify_group))
+ panic("unable to allocate fsnotify group for dnotify\n");
+ return 0;
+diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
+index 336ccec2abed3..f23326be0d371 100644
+--- a/fs/notify/fanotify/fanotify_user.c
++++ b/fs/notify/fanotify/fanotify_user.c
+@@ -1351,7 +1351,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
+ f_flags |= O_NONBLOCK;
+
+ /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
+- group = fsnotify_alloc_user_group(&fanotify_fsnotify_ops);
++ group = fsnotify_alloc_group(&fanotify_fsnotify_ops,
++ FSNOTIFY_GROUP_USER);
+ if (IS_ERR(group)) {
+ return PTR_ERR(group);
+ }
+diff --git a/fs/notify/group.c b/fs/notify/group.c
+index b7d4d64f87c29..18446b7b0d495 100644
+--- a/fs/notify/group.c
++++ b/fs/notify/group.c
+@@ -112,7 +112,8 @@ void fsnotify_put_group(struct fsnotify_group *group)
+ EXPORT_SYMBOL_GPL(fsnotify_put_group);
+
+ static struct fsnotify_group *__fsnotify_alloc_group(
+- const struct fsnotify_ops *ops, gfp_t gfp)
++ const struct fsnotify_ops *ops,
++ int flags, gfp_t gfp)
+ {
+ struct fsnotify_group *group;
+
+@@ -133,6 +134,7 @@ static struct fsnotify_group *__fsnotify_alloc_group(
+ INIT_LIST_HEAD(&group->marks_list);
+
+ group->ops = ops;
++ group->flags = flags;
+
+ return group;
+ }
+@@ -140,20 +142,15 @@ static struct fsnotify_group *__fsnotify_alloc_group(
+ /*
+ * Create a new fsnotify_group and hold a reference for the group returned.
+ */
+-struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
++struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops,
++ int flags)
+ {
+- return __fsnotify_alloc_group(ops, GFP_KERNEL);
+-}
+-EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
++ gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT :
++ GFP_KERNEL;
+
+-/*
+- * Create a new fsnotify_group and hold a reference for the group returned.
+- */
+-struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops)
+-{
+- return __fsnotify_alloc_group(ops, GFP_KERNEL_ACCOUNT);
++ return __fsnotify_alloc_group(ops, flags, gfp);
+ }
+-EXPORT_SYMBOL_GPL(fsnotify_alloc_user_group);
++EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
+
+ int fsnotify_fasync(int fd, struct file *file, int on)
+ {
+diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
+index fdce87902b382..0d8e1bead23ea 100644
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -648,7 +648,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
+ struct fsnotify_group *group;
+ struct inotify_event_info *oevent;
+
+- group = fsnotify_alloc_user_group(&inotify_fsnotify_ops);
++ group = fsnotify_alloc_group(&inotify_fsnotify_ops,
++ FSNOTIFY_GROUP_USER);
+ if (IS_ERR(group))
+ return group;
+
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index b1c72edd97845..f0bf557af0091 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -210,6 +210,9 @@ struct fsnotify_group {
+ unsigned int priority;
+ bool shutdown; /* group is being shut down, don't queue more events */
+
++#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */
++ int flags;
++
+ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
+ struct mutex mark_mutex; /* protect marks_list */
+ atomic_t user_waits; /* Number of tasks waiting for user
+@@ -543,8 +546,9 @@ static inline void fsnotify_update_flags(struct dentry *dentry)
+ /* called from fsnotify listeners, such as fanotify or dnotify */
+
+ /* create a new group */
+-extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops);
+-extern struct fsnotify_group *fsnotify_alloc_user_group(const struct fsnotify_ops *ops);
++extern struct fsnotify_group *fsnotify_alloc_group(
++ const struct fsnotify_ops *ops,
++ int flags);
+ /* get reference to a group */
+ extern void fsnotify_get_group(struct fsnotify_group *group);
+ /* drop reference on a group from fsnotify_alloc_group */
+diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
+index 7a506b65e8630..8dee00959eb3d 100644
+--- a/kernel/audit_fsnotify.c
++++ b/kernel/audit_fsnotify.c
+@@ -182,7 +182,8 @@ static const struct fsnotify_ops audit_mark_fsnotify_ops = {
+
+ static int __init audit_fsnotify_init(void)
+ {
+- audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops);
++ audit_fsnotify_group = fsnotify_alloc_group(&audit_mark_fsnotify_ops,
++ 0);
+ if (IS_ERR(audit_fsnotify_group)) {
+ audit_fsnotify_group = NULL;
+ audit_panic("cannot create audit fsnotify group");
+diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
+index 2cd7b5694422d..18ab4575ae009 100644
+--- a/kernel/audit_tree.c
++++ b/kernel/audit_tree.c
+@@ -1073,7 +1073,7 @@ static int __init audit_tree_init(void)
+
+ audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
+
+- audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
++ audit_tree_group = fsnotify_alloc_group(&audit_tree_ops, 0);
+ if (IS_ERR(audit_tree_group))
+ audit_panic("cannot initialize fsnotify group for rectree watches");
+
+diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
+index fd7b30a2d9a4b..5cf22fe301493 100644
+--- a/kernel/audit_watch.c
++++ b/kernel/audit_watch.c
+@@ -492,7 +492,7 @@ static const struct fsnotify_ops audit_watch_fsnotify_ops = {
+
+ static int __init audit_watch_init(void)
+ {
+- audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops);
++ audit_watch_group = fsnotify_alloc_group(&audit_watch_fsnotify_ops, 0);
+ if (IS_ERR(audit_watch_group)) {
+ audit_watch_group = NULL;
+ audit_panic("cannot create audit fsnotify group");
+--
+2.43.0
+
--- /dev/null
+From 08c7bade36c4c3b8e5b28d8c47e03451aa1b88dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:27 -0300
+Subject: fsnotify: Pass group argument to free_event
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 330ae77d2a5b0af32c0f29e139bf28ec8591de59 ]
+
+For group-wide mempool backed events, like FS_ERROR, the free_event
+callback will need to reference the group's mempool to free the memory.
+Wire that argument into the current callers.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-13-krisman@collabora.com
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 3 ++-
+ fs/notify/group.c | 2 +-
+ fs/notify/inotify/inotify_fsnotify.c | 3 ++-
+ fs/notify/notification.c | 2 +-
+ include/linux/fsnotify_backend.h | 2 +-
+ 5 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index f82e20228999c..c620b4f6fe123 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -835,7 +835,8 @@ static void fanotify_free_name_event(struct fanotify_event *event)
+ kfree(FANOTIFY_NE(event));
+ }
+
+-static void fanotify_free_event(struct fsnotify_event *fsn_event)
++static void fanotify_free_event(struct fsnotify_group *group,
++ struct fsnotify_event *fsn_event)
+ {
+ struct fanotify_event *event;
+
+diff --git a/fs/notify/group.c b/fs/notify/group.c
+index fb89c351295d6..6a297efc47887 100644
+--- a/fs/notify/group.c
++++ b/fs/notify/group.c
+@@ -88,7 +88,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
+ * that deliberately ignores overflow events.
+ */
+ if (group->overflow_event)
+- group->ops->free_event(group->overflow_event);
++ group->ops->free_event(group, group->overflow_event);
+
+ fsnotify_put_group(group);
+ }
+diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
+index be3eb1cebdcce..8279827836399 100644
+--- a/fs/notify/inotify/inotify_fsnotify.c
++++ b/fs/notify/inotify/inotify_fsnotify.c
+@@ -184,7 +184,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
+ dec_inotify_instances(group->inotify_data.ucounts);
+ }
+
+-static void inotify_free_event(struct fsnotify_event *fsn_event)
++static void inotify_free_event(struct fsnotify_group *group,
++ struct fsnotify_event *fsn_event)
+ {
+ kfree(INOTIFY_E(fsn_event));
+ }
+diff --git a/fs/notify/notification.c b/fs/notify/notification.c
+index 44bb10f507153..9022ae650cf86 100644
+--- a/fs/notify/notification.c
++++ b/fs/notify/notification.c
+@@ -64,7 +64,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
+ WARN_ON(!list_empty(&event->list));
+ spin_unlock(&group->notification_lock);
+ }
+- group->ops->free_event(event);
++ group->ops->free_event(group, event);
+ }
+
+ /*
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index b71dc788018e4..3a7c314361824 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -156,7 +156,7 @@ struct fsnotify_ops {
+ const struct qstr *file_name, u32 cookie);
+ void (*free_group_priv)(struct fsnotify_group *group);
+ void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
+- void (*free_event)(struct fsnotify_event *event);
++ void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event);
+ /* called on final put+free to free memory */
+ void (*free_mark)(struct fsnotify_mark *mark);
+ };
+--
+2.43.0
+
--- /dev/null
+From f87ba5af7861b93908c0f4889f846c98f6c212e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:26 -0300
+Subject: fsnotify: Protect fsnotify_handle_inode_event from no-inode events
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 24dca90590509a7a6cbe0650100c90c5b8a3468a ]
+
+FAN_FS_ERROR allows events without inodes - i.e. for file system-wide
+errors. Even though fsnotify_handle_inode_event is not currently used
+by fanotify, this patch protects other backends from cases where neither
+inode or dir are provided. Also document the constraints of the
+interface (inode and dir cannot be both NULL).
+
+Link: https://lore.kernel.org/r/20211025192746.66445-12-krisman@collabora.com
+Suggested-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 3 +++
+ fs/notify/fsnotify.c | 3 +++
+ include/linux/fsnotify_backend.h | 1 +
+ 3 files changed, 7 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 1e8c31ed6c7c4..fbc0628c599af 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -595,6 +595,9 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
+ struct inode *inode, struct inode *dir,
+ const struct qstr *name, u32 cookie)
+ {
++ if (WARN_ON_ONCE(!inode))
++ return 0;
++
+ trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+ /* Should be no marks on non-regular files */
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index fde3a1115a170..4034ca566f95c 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -252,6 +252,9 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group,
+ if (WARN_ON_ONCE(!ops->handle_inode_event))
+ return 0;
+
++ if (WARN_ON_ONCE(!inode && !dir))
++ return 0;
++
+ if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+ path && d_unlinked(path->dentry))
+ return 0;
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 035438fe4a435..b71dc788018e4 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -136,6 +136,7 @@ struct mem_cgroup;
+ * @dir: optional directory associated with event -
+ * if @file_name is not NULL, this is the directory that
+ * @file_name is relative to.
++ * Either @inode or @dir must be non-NULL.
+ * @file_name: optional file name associated with event
+ * @cookie: inotify rename cookie
+ *
+--
+2.43.0
+
--- /dev/null
+From 6d649d15d402782ef8dc92f85b2ad1da1c91f42a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Mar 2022 23:12:40 +0800
+Subject: fsnotify: remove redundant parameter judgment
+
+From: Bang Li <libang.linuxer@gmail.com>
+
+[ Upstream commit f92ca72b0263d601807bbd23ed25cbe6f4da89f4 ]
+
+iput() has already judged the incoming parameter, so there is no need to
+repeat the judgment here.
+
+Link: https://lore.kernel.org/r/20220311151240.62045-1-libang.linuxer@gmail.com
+Signed-off-by: Bang Li <libang.linuxer@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fsnotify.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 494f653efbc6e..70a8516b78bc5 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -70,8 +70,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&sb->s_inode_list_lock);
+
+- if (iput_inode)
+- iput(iput_inode);
++ iput(iput_inode);
+
+ /* for each watch, send FS_UNMOUNT and then remove it */
+ fsnotify_inode(inode, FS_UNMOUNT);
+@@ -85,8 +84,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
+ }
+ spin_unlock(&sb->s_inode_list_lock);
+
+- if (iput_inode)
+- iput(iput_inode);
++ iput(iput_inode);
+ }
+
+ void fsnotify_sb_delete(struct super_block *sb)
+--
+2.43.0
+
--- /dev/null
+From 0ceae15dabe3549c1f99cf957bdf9847b4b901da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Sep 2022 11:38:28 +0800
+Subject: fsnotify: remove unused declaration
+
+From: Gaosheng Cui <cuigaosheng1@huawei.com>
+
+[ Upstream commit f847c74d6e89f10926db58649a05b99237258691 ]
+
+fsnotify_alloc_event_holder() and fsnotify_destroy_event_holder()
+has been removed since commit 7053aee26a35 ("fsnotify: do not share
+events between notification groups"), so remove it.
+
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fsnotify.h | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
+index 87d8a50ee8038..fde74eb333cc9 100644
+--- a/fs/notify/fsnotify.h
++++ b/fs/notify/fsnotify.h
+@@ -76,10 +76,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
+ */
+ extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
+
+-/* allocate and destroy and event holder to attach events to notification/access queues */
+-extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
+-extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
+-
+ extern struct kmem_cache *fsnotify_mark_connector_cachep;
+
+ #endif /* __FS_NOTIFY_FSNOTIFY_H_ */
+--
+2.43.0
+
--- /dev/null
+From 5c482e5af3f290f4305b852c64dac995a433830f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:25 -0300
+Subject: fsnotify: Retrieve super block from the data field
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 29335033c574a15334015d8c4e36862cff3d3384 ]
+
+Some file system events (i.e. FS_ERROR) might not be associated with an
+inode or directory. For these, we can retrieve the super block from the
+data field. But, since the super_block is available in the data field
+on every event type, simplify the code to always retrieve it from there,
+through a new helper.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-11-krisman@collabora.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fsnotify.c | 7 +++----
+ include/linux/fsnotify_backend.h | 15 +++++++++++++++
+ 2 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 963e6ce75b961..fde3a1115a170 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -455,16 +455,16 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
+ * @file_name is relative to
+ * @file_name: optional file name associated with event
+ * @inode: optional inode associated with event -
+- * either @dir or @inode must be non-NULL.
+- * if both are non-NULL event may be reported to both.
++ * If @dir and @inode are both non-NULL, event may be
++ * reported to both.
+ * @cookie: inotify rename cookie
+ */
+ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ const struct qstr *file_name, struct inode *inode, u32 cookie)
+ {
+ const struct path *path = fsnotify_data_path(data, data_type);
++ struct super_block *sb = fsnotify_data_sb(data, data_type);
+ struct fsnotify_iter_info iter_info = {};
+- struct super_block *sb;
+ struct mount *mnt = NULL;
+ struct inode *parent = NULL;
+ int ret = 0;
+@@ -483,7 +483,6 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ */
+ parent = dir;
+ }
+- sb = inode->i_sb;
+
+ /*
+ * Optimization: srcu_read_lock() has a memory barrier which can
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index b323d0c4b9671..035438fe4a435 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -289,6 +289,21 @@ static inline const struct path *fsnotify_data_path(const void *data,
+ }
+ }
+
++static inline struct super_block *fsnotify_data_sb(const void *data,
++ int data_type)
++{
++ switch (data_type) {
++ case FSNOTIFY_EVENT_INODE:
++ return ((struct inode *)data)->i_sb;
++ case FSNOTIFY_EVENT_DENTRY:
++ return ((struct dentry *)data)->d_sb;
++ case FSNOTIFY_EVENT_PATH:
++ return ((const struct path *)data)->dentry->d_sb;
++ default:
++ return NULL;
++ }
++}
++
+ enum fsnotify_obj_type {
+ FSNOTIFY_OBJ_TYPE_INODE,
+ FSNOTIFY_OBJ_TYPE_PARENT,
+--
+2.43.0
+
--- /dev/null
+From a306f5b339eec184e68bd207b06d55d73637f988 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 22:15:28 +0200
+Subject: fsnotify: separate mark iterator type from object type enum
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 1c9007d62bea6fd164285314f7553f73e5308863 ]
+
+They are two different types that use the same enum, so this confusing.
+
+Use the object type to indicate the type of object mark is attached to
+and the iter type to indicate the type of watch.
+
+A group can have two different watches of the same object type (parent
+and child watches) that match the same event.
+
+Link: https://lore.kernel.org/r/20211129201537.1932819-3-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fanotify/fanotify.c | 6 ++---
+ fs/notify/fsnotify.c | 18 +++++++-------
+ fs/notify/mark.c | 4 ++--
+ include/linux/fsnotify_backend.h | 41 ++++++++++++++++++++++----------
+ 4 files changed, 42 insertions(+), 27 deletions(-)
+
+diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
+index b6091775aa6ef..652fe84cb8acd 100644
+--- a/fs/notify/fanotify/fanotify.c
++++ b/fs/notify/fanotify/fanotify.c
+@@ -299,7 +299,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ return 0;
+ }
+
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ if (!fsnotify_iter_should_report_type(iter_info, type))
+ continue;
+ mark = iter_info->marks[type];
+@@ -318,7 +318,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+ * If the event is on a child and this mark is on a parent not
+ * watching children, don't send it!
+ */
+- if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
++ if (type == FSNOTIFY_ITER_TYPE_PARENT &&
+ !(mark->mask & FS_EVENT_ON_CHILD))
+ continue;
+
+@@ -746,7 +746,7 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
+ int type;
+ __kernel_fsid_t fsid = {};
+
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ struct fsnotify_mark_connector *conn;
+
+ if (!fsnotify_iter_should_report_type(iter_info, type))
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 4034ca566f95c..0c94457c625e2 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -330,7 +330,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
+
+ /* clear ignored on inode modification */
+ if (mask & FS_MODIFY) {
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ if (!fsnotify_iter_should_report_type(iter_info, type))
+ continue;
+ mark = iter_info->marks[type];
+@@ -340,7 +340,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type,
+ }
+ }
+
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ if (!fsnotify_iter_should_report_type(iter_info, type))
+ continue;
+ mark = iter_info->marks[type];
+@@ -405,7 +405,7 @@ static unsigned int fsnotify_iter_select_report_types(
+ int type;
+
+ /* Choose max prio group among groups of all queue heads */
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ mark = iter_info->marks[type];
+ if (mark &&
+ fsnotify_compare_groups(max_prio_group, mark->group) > 0)
+@@ -417,7 +417,7 @@ static unsigned int fsnotify_iter_select_report_types(
+
+ /* Set the report mask for marks from same group as max prio group */
+ iter_info->report_mask = 0;
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ mark = iter_info->marks[type];
+ if (mark &&
+ fsnotify_compare_groups(max_prio_group, mark->group) == 0)
+@@ -435,7 +435,7 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
+ {
+ int type;
+
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ if (fsnotify_iter_should_report_type(iter_info, type))
+ iter_info->marks[type] =
+ fsnotify_next_mark(iter_info->marks[type]);
+@@ -519,18 +519,18 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+
+ iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
+
+- iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
++ iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
+ fsnotify_first_mark(&sb->s_fsnotify_marks);
+ if (mnt) {
+- iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
++ iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
+ fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
+ }
+ if (inode) {
+- iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
++ iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] =
+ fsnotify_first_mark(&inode->i_fsnotify_marks);
+ }
+ if (parent) {
+- iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
++ iter_info.marks[FSNOTIFY_ITER_TYPE_PARENT] =
+ fsnotify_first_mark(&parent->i_fsnotify_marks);
+ }
+
+diff --git a/fs/notify/mark.c b/fs/notify/mark.c
+index 7c0946e16918a..b42629d2fc1c6 100644
+--- a/fs/notify/mark.c
++++ b/fs/notify/mark.c
+@@ -353,7 +353,7 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+ {
+ int type;
+
+- fsnotify_foreach_obj_type(type) {
++ fsnotify_foreach_iter_type(type) {
+ /* This can fail if mark is being removed */
+ if (!fsnotify_get_mark_safe(iter_info->marks[type])) {
+ __release(&fsnotify_mark_srcu);
+@@ -382,7 +382,7 @@ void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
+ int type;
+
+ iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
+- fsnotify_foreach_obj_type(type)
++ fsnotify_foreach_iter_type(type)
+ fsnotify_put_mark_wake(iter_info->marks[type]);
+ }
+
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index b9c84b1dbcc8f..73739fee1710f 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -337,10 +337,25 @@ static inline struct fs_error_report *fsnotify_data_error_report(
+ }
+ }
+
++/*
++ * Index to merged marks iterator array that correlates to a type of watch.
++ * The type of watched object can be deduced from the iterator type, but not
++ * the other way around, because an event can match different watched objects
++ * of the same object type.
++ * For example, both parent and child are watching an object of type inode.
++ */
++enum fsnotify_iter_type {
++ FSNOTIFY_ITER_TYPE_INODE,
++ FSNOTIFY_ITER_TYPE_VFSMOUNT,
++ FSNOTIFY_ITER_TYPE_SB,
++ FSNOTIFY_ITER_TYPE_PARENT,
++ FSNOTIFY_ITER_TYPE_COUNT
++};
++
++/* The type of object that a mark is attached to */
+ enum fsnotify_obj_type {
+ FSNOTIFY_OBJ_TYPE_ANY = -1,
+ FSNOTIFY_OBJ_TYPE_INODE,
+- FSNOTIFY_OBJ_TYPE_PARENT,
+ FSNOTIFY_OBJ_TYPE_VFSMOUNT,
+ FSNOTIFY_OBJ_TYPE_SB,
+ FSNOTIFY_OBJ_TYPE_COUNT,
+@@ -353,37 +368,37 @@ static inline bool fsnotify_valid_obj_type(unsigned int obj_type)
+ }
+
+ struct fsnotify_iter_info {
+- struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT];
++ struct fsnotify_mark *marks[FSNOTIFY_ITER_TYPE_COUNT];
+ unsigned int report_mask;
+ int srcu_idx;
+ };
+
+ static inline bool fsnotify_iter_should_report_type(
+- struct fsnotify_iter_info *iter_info, int type)
++ struct fsnotify_iter_info *iter_info, int iter_type)
+ {
+- return (iter_info->report_mask & (1U << type));
++ return (iter_info->report_mask & (1U << iter_type));
+ }
+
+ static inline void fsnotify_iter_set_report_type(
+- struct fsnotify_iter_info *iter_info, int type)
++ struct fsnotify_iter_info *iter_info, int iter_type)
+ {
+- iter_info->report_mask |= (1U << type);
++ iter_info->report_mask |= (1U << iter_type);
+ }
+
+ static inline void fsnotify_iter_set_report_type_mark(
+- struct fsnotify_iter_info *iter_info, int type,
++ struct fsnotify_iter_info *iter_info, int iter_type,
+ struct fsnotify_mark *mark)
+ {
+- iter_info->marks[type] = mark;
+- iter_info->report_mask |= (1U << type);
++ iter_info->marks[iter_type] = mark;
++ iter_info->report_mask |= (1U << iter_type);
+ }
+
+ #define FSNOTIFY_ITER_FUNCS(name, NAME) \
+ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
+ struct fsnotify_iter_info *iter_info) \
+ { \
+- return (iter_info->report_mask & (1U << FSNOTIFY_OBJ_TYPE_##NAME)) ? \
+- iter_info->marks[FSNOTIFY_OBJ_TYPE_##NAME] : NULL; \
++ return (iter_info->report_mask & (1U << FSNOTIFY_ITER_TYPE_##NAME)) ? \
++ iter_info->marks[FSNOTIFY_ITER_TYPE_##NAME] : NULL; \
+ }
+
+ FSNOTIFY_ITER_FUNCS(inode, INODE)
+@@ -391,8 +406,8 @@ FSNOTIFY_ITER_FUNCS(parent, PARENT)
+ FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
+ FSNOTIFY_ITER_FUNCS(sb, SB)
+
+-#define fsnotify_foreach_obj_type(type) \
+- for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++)
++#define fsnotify_foreach_iter_type(type) \
++ for (type = 0; type < FSNOTIFY_ITER_TYPE_COUNT; type++)
+
+ /*
+ * fsnotify_connp_t is what we embed in objects which connector can be attached
+--
+2.43.0
+
--- /dev/null
+From c90dca329bd1916c470a12b15601027974d74f67 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:32 -0300
+Subject: fsnotify: Support FS_ERROR event type
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit 9daa811073fa19c08e8aad3b90f9235fed161acf ]
+
+Expose a new type of fsnotify event for filesystems to report errors for
+userspace monitoring tools. fanotify will send this type of
+notification for FAN_FS_ERROR events. This also introduce a helper for
+generating the new event.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-18-krisman@collabora.com
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ include/linux/fsnotify.h | 13 +++++++++++++
+ include/linux/fsnotify_backend.h | 32 +++++++++++++++++++++++++++++++-
+ 2 files changed, 44 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
+index a327a95fa68f1..67d6db6c8df8f 100644
+--- a/include/linux/fsnotify.h
++++ b/include/linux/fsnotify.h
+@@ -375,4 +375,17 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
+ fsnotify_dentry(dentry, mask);
+ }
+
++static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
++ int error)
++{
++ struct fs_error_report report = {
++ .error = error,
++ .inode = inode,
++ .sb = sb,
++ };
++
++ return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR,
++ NULL, NULL, NULL, 0);
++}
++
+ #endif /* _LINUX_FS_NOTIFY_H */
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 3a7c314361824..00dbaafbcf953 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -42,6 +42,12 @@
+
+ #define FS_UNMOUNT 0x00002000 /* inode on umount fs */
+ #define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
++#define FS_ERROR 0x00008000 /* Filesystem Error (fanotify) */
++
++/*
++ * FS_IN_IGNORED overloads FS_ERROR. It is only used internally by inotify
++ * which does not support FS_ERROR.
++ */
+ #define FS_IN_IGNORED 0x00008000 /* last inotify event here */
+
+ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */
+@@ -95,7 +101,8 @@
+ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
+ FS_EVENTS_POSS_ON_CHILD | \
+ FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \
+- FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED)
++ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
++ FS_ERROR)
+
+ /* Extra flags that may be reported with event or control handling of events */
+ #define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
+@@ -250,6 +257,13 @@ enum fsnotify_data_type {
+ FSNOTIFY_EVENT_PATH,
+ FSNOTIFY_EVENT_INODE,
+ FSNOTIFY_EVENT_DENTRY,
++ FSNOTIFY_EVENT_ERROR,
++};
++
++struct fs_error_report {
++ int error;
++ struct inode *inode;
++ struct super_block *sb;
+ };
+
+ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
+@@ -261,6 +275,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
+ return d_inode(data);
+ case FSNOTIFY_EVENT_PATH:
+ return d_inode(((const struct path *)data)->dentry);
++ case FSNOTIFY_EVENT_ERROR:
++ return ((struct fs_error_report *)data)->inode;
+ default:
+ return NULL;
+ }
+@@ -300,6 +316,20 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
+ return ((struct dentry *)data)->d_sb;
+ case FSNOTIFY_EVENT_PATH:
+ return ((const struct path *)data)->dentry->d_sb;
++ case FSNOTIFY_EVENT_ERROR:
++ return ((struct fs_error_report *) data)->sb;
++ default:
++ return NULL;
++ }
++}
++
++static inline struct fs_error_report *fsnotify_data_error_report(
++ const void *data,
++ int data_type)
++{
++ switch (data_type) {
++ case FSNOTIFY_EVENT_ERROR:
++ return (struct fs_error_report *) data;
+ default:
+ return NULL;
+ }
+--
+2.43.0
+
--- /dev/null
+From 1ad3d6c9bce459219b707e5bd60a2cf6bf41e826 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Oct 2021 16:27:22 -0300
+Subject: inotify: Don't force FS_IN_IGNORED
+
+From: Gabriel Krisman Bertazi <krisman@collabora.com>
+
+[ Upstream commit e0462f91d24756916fded4313d508e0fc52f39c9 ]
+
+According to Amir:
+
+"FS_IN_IGNORED is completely internal to inotify and there is no need
+to set it in i_fsnotify_mask at all, so if we remove the bit from the
+output of inotify_arg_to_mask() no functionality will change and we will
+be able to overload the event bit for FS_ERROR."
+
+This is done in preparation to overload FS_ERROR with the notification
+mechanism in fanotify.
+
+Link: https://lore.kernel.org/r/20211025192746.66445-8-krisman@collabora.com
+Suggested-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/inotify/inotify_user.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
+index 9fb7701d2f8a0..b87759b8402be 100644
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -94,10 +94,10 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
+ __u32 mask;
+
+ /*
+- * Everything should accept their own ignored and should receive events
+- * when the inode is unmounted. All directories care about children.
++ * Everything should receive events when the inode is unmounted.
++ * All directories care about children.
+ */
+- mask = (FS_IN_IGNORED | FS_UNMOUNT);
++ mask = (FS_UNMOUNT);
+ if (S_ISDIR(inode->i_mode))
+ mask |= FS_EVENT_ON_CHILD;
+
+--
+2.43.0
+
--- /dev/null
+From 8b90e8675d00c05710f0deafa0a0e2fc85d8da29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:13 +0300
+Subject: inotify: move control flags from mask to mark flags
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 38035c04f5865c4ef9597d6beed6a7178f90f64a ]
+
+The inotify control flags in the mark mask (e.g. FS_IN_ONE_SHOT) are not
+relevant to object interest mask, so move them to the mark flags.
+
+This frees up some bits in the object interest mask.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-3-amir73il@gmail.com
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/fsnotify.c | 4 +--
+ fs/notify/inotify/inotify.h | 11 ++++++--
+ fs/notify/inotify/inotify_fsnotify.c | 2 +-
+ fs/notify/inotify/inotify_user.c | 38 ++++++++++++++++++----------
+ include/linux/fsnotify_backend.h | 16 +++++++-----
+ 5 files changed, 45 insertions(+), 26 deletions(-)
+
+diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
+index 70a8516b78bc5..6eee19d15e8cd 100644
+--- a/fs/notify/fsnotify.c
++++ b/fs/notify/fsnotify.c
+@@ -253,7 +253,7 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group,
+ if (WARN_ON_ONCE(!inode && !dir))
+ return 0;
+
+- if ((inode_mark->mask & FS_EXCL_UNLINK) &&
++ if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) &&
+ path && d_unlinked(path->dentry))
+ return 0;
+
+@@ -581,7 +581,7 @@ static __init int fsnotify_init(void)
+ {
+ int ret;
+
+- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
++ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
+
+ ret = init_srcu_struct(&fsnotify_mark_srcu);
+ if (ret)
+diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
+index 8f00151eb731f..7d5df7a215397 100644
+--- a/fs/notify/inotify/inotify.h
++++ b/fs/notify/inotify/inotify.h
+@@ -27,11 +27,18 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
+ * userspace. There is at least one bit (FS_EVENT_ON_CHILD) which is
+ * used only internally to the kernel.
+ */
+-#define INOTIFY_USER_MASK (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)
++#define INOTIFY_USER_MASK (IN_ALL_EVENTS)
+
+ static inline __u32 inotify_mark_user_mask(struct fsnotify_mark *fsn_mark)
+ {
+- return fsn_mark->mask & INOTIFY_USER_MASK;
++ __u32 mask = fsn_mark->mask & INOTIFY_USER_MASK;
++
++ if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK)
++ mask |= IN_EXCL_UNLINK;
++ if (fsn_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
++ mask |= IN_ONESHOT;
++
++ return mask;
+ }
+
+ extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
+diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
+index 8279827836399..993375f0db673 100644
+--- a/fs/notify/inotify/inotify_fsnotify.c
++++ b/fs/notify/inotify/inotify_fsnotify.c
+@@ -129,7 +129,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+ fsnotify_destroy_event(group, fsn_event);
+ }
+
+- if (inode_mark->mask & IN_ONESHOT)
++ if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT)
+ fsnotify_destroy_mark(inode_mark, group);
+
+ return 0;
+diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
+index b87759b8402be..fdce87902b382 100644
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -107,6 +107,21 @@ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
+ return mask;
+ }
+
++#define INOTIFY_MARK_FLAGS \
++ (FSNOTIFY_MARK_FLAG_EXCL_UNLINK | FSNOTIFY_MARK_FLAG_IN_ONESHOT)
++
++static inline unsigned int inotify_arg_to_flags(u32 arg)
++{
++ unsigned int flags = 0;
++
++ if (arg & IN_EXCL_UNLINK)
++ flags |= FSNOTIFY_MARK_FLAG_EXCL_UNLINK;
++ if (arg & IN_ONESHOT)
++ flags |= FSNOTIFY_MARK_FLAG_IN_ONESHOT;
++
++ return flags;
++}
++
+ static inline u32 inotify_mask_to_arg(__u32 mask)
+ {
+ return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
+@@ -518,13 +533,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
+ struct fsnotify_mark *fsn_mark;
+ struct inotify_inode_mark *i_mark;
+ __u32 old_mask, new_mask;
+- __u32 mask;
+- int add = (arg & IN_MASK_ADD);
++ int replace = !(arg & IN_MASK_ADD);
+ int create = (arg & IN_MASK_CREATE);
+ int ret;
+
+- mask = inotify_arg_to_mask(inode, arg);
+-
+ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
+ if (!fsn_mark)
+ return -ENOENT;
+@@ -537,10 +549,12 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
+
+ spin_lock(&fsn_mark->lock);
+ old_mask = fsn_mark->mask;
+- if (add)
+- fsn_mark->mask |= mask;
+- else
+- fsn_mark->mask = mask;
++ if (replace) {
++ fsn_mark->mask = 0;
++ fsn_mark->flags &= ~INOTIFY_MARK_FLAGS;
++ }
++ fsn_mark->mask |= inotify_arg_to_mask(inode, arg);
++ fsn_mark->flags |= inotify_arg_to_flags(arg);
+ new_mask = fsn_mark->mask;
+ spin_unlock(&fsn_mark->lock);
+
+@@ -571,19 +585,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
+ u32 arg)
+ {
+ struct inotify_inode_mark *tmp_i_mark;
+- __u32 mask;
+ int ret;
+ struct idr *idr = &group->inotify_data.idr;
+ spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+
+- mask = inotify_arg_to_mask(inode, arg);
+-
+ tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+ if (unlikely(!tmp_i_mark))
+ return -ENOMEM;
+
+ fsnotify_init_mark(&tmp_i_mark->fsn_mark, group);
+- tmp_i_mark->fsn_mark.mask = mask;
++ tmp_i_mark->fsn_mark.mask = inotify_arg_to_mask(inode, arg);
++ tmp_i_mark->fsn_mark.flags = inotify_arg_to_flags(arg);
+ tmp_i_mark->wd = -1;
+
+ ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
+@@ -837,9 +849,7 @@ static int __init inotify_user_setup(void)
+ BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
+ BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
+ BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
+- BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
+ BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
+- BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
+
+ BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
+
+diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
+index 0805b74cae441..b1c72edd97845 100644
+--- a/include/linux/fsnotify_backend.h
++++ b/include/linux/fsnotify_backend.h
+@@ -55,7 +55,6 @@
+ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */
+ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */
+
+-#define FS_EXCL_UNLINK 0x04000000 /* do not send events if object is unlinked */
+ /*
+ * Set on inode mark that cares about things that happen to its children.
+ * Always set for dnotify and inotify.
+@@ -66,7 +65,6 @@
+ #define FS_RENAME 0x10000000 /* File was renamed */
+ #define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */
+ #define FS_ISDIR 0x40000000 /* event occurred against dir */
+-#define FS_IN_ONESHOT 0x80000000 /* only send event once */
+
+ #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO)
+
+@@ -106,8 +104,7 @@
+ FS_ERROR)
+
+ /* Extra flags that may be reported with event or control handling of events */
+-#define ALL_FSNOTIFY_FLAGS (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
+- FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
++#define ALL_FSNOTIFY_FLAGS (FS_ISDIR | FS_EVENT_ON_CHILD | FS_DN_MULTISHOT)
+
+ #define ALL_FSNOTIFY_BITS (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)
+
+@@ -473,9 +470,14 @@ struct fsnotify_mark {
+ struct fsnotify_mark_connector *connector;
+ /* Events types to ignore [mark->lock, group->mark_mutex] */
+ __u32 ignored_mask;
+-#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01
+-#define FSNOTIFY_MARK_FLAG_ALIVE 0x02
+-#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04
++ /* General fsnotify mark flags */
++#define FSNOTIFY_MARK_FLAG_ALIVE 0x0001
++#define FSNOTIFY_MARK_FLAG_ATTACHED 0x0002
++ /* inotify mark flags */
++#define FSNOTIFY_MARK_FLAG_EXCL_UNLINK 0x0010
++#define FSNOTIFY_MARK_FLAG_IN_ONESHOT 0x0020
++ /* fanotify mark flags */
++#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x0100
+ unsigned int flags; /* flags [mark->lock] */
+ };
+
+--
+2.43.0
+
--- /dev/null
+From d9b2bbfd1ecc309be7a20ae7d875648957c34dc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:18 +0300
+Subject: inotify: use fsnotify group lock helpers
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit 642054b87058019be36033f73c3e48ffff1915aa ]
+
+inotify inode marks pin the inode so there is no need to set the
+FSNOTIFY_GROUP_NOFS flag.
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-8-amir73il@gmail.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220321112310.vpr7oxro2xkz5llh@quack3.lan/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/notify/inotify/inotify_user.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
+index 0d8e1bead23ea..266b1302290ba 100644
+--- a/fs/notify/inotify/inotify_user.c
++++ b/fs/notify/inotify/inotify_user.c
+@@ -632,13 +632,13 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
+ {
+ int ret = 0;
+
+- mutex_lock(&group->mark_mutex);
++ fsnotify_group_lock(group);
+ /* try to update and existing watch with the new arg */
+ ret = inotify_update_existing_watch(group, inode, arg);
+ /* no mark present, try to add a new one */
+ if (ret == -ENOENT)
+ ret = inotify_new_watch(group, inode, arg);
+- mutex_unlock(&group->mark_mutex);
++ fsnotify_group_unlock(group);
+
+ return ret;
+ }
+--
+2.43.0
+
--- /dev/null
+From db946ae04b96fdba46d660cecc5fa0e490beebc5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 3 Jun 2023 07:14:14 +1000
+Subject: lockd: drop inappropriate svc_get() from locked_get()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 665e89ab7c5af1f2d260834c861a74b01a30f95f ]
+
+The below-mentioned patch was intended to simplify refcounting on the
+svc_serv used by locked. The goal was to only ever have a single
+reference from the single thread. To that end we dropped a call to
+lockd_start_svc() (except when creating thread) which would take a
+reference, and dropped the svc_put(serv) that would drop that reference.
+
+Unfortunately we didn't also remove the svc_get() from
+lockd_create_svc() in the case where the svc_serv already existed.
+So after the patch:
+ - on the first call the svc_serv was allocated and the one reference
+ was given to the thread, so there are no extra references
+ - on subsequent calls svc_get() was called so there is now an extra
+ reference.
+This is clearly not consistent.
+
+The inconsistency is also clear in the current code in lockd_get()
+takes *two* references, one on nlmsvc_serv and one by incrementing
+nlmsvc_users. This clearly does not match lockd_put().
+
+So: drop that svc_get() from lockd_get() (which used to be in
+lockd_create_svc().
+
+Reported-by: Ido Schimmel <idosch@idosch.org>
+Closes: https://lore.kernel.org/linux-nfs/ZHsI%2FH16VX9kJQX1@shredder/T/#u
+Fixes: b73a2972041b ("lockd: move lockd_start_svc() call into lockd_create_svc()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Tested-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 59ef8a1f843f3..5579e67da17db 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -355,7 +355,6 @@ static int lockd_get(void)
+ int error;
+
+ if (nlmsvc_serv) {
+- svc_get(nlmsvc_serv);
+ nlmsvc_users++;
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 1b0690a97450e81016de393cd9ded0bd0154ba33 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Nov 2022 14:36:37 -0500
+Subject: lockd: ensure we use the correct file descriptor when unlocking
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 69efce009f7df888e1fede3cb2913690eb829f52 ]
+
+Shared locks are set on O_RDONLY descriptors and exclusive locks are set
+on O_WRONLY ones. nlmsvc_unlock however calls vfs_lock_file twice, once
+for each descriptor, but it doesn't reset fl_file. Ensure that it does.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svclock.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
+index 9c1aa75441e1c..9eae99e08e699 100644
+--- a/fs/lockd/svclock.c
++++ b/fs/lockd/svclock.c
+@@ -659,11 +659,13 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
+ nlmsvc_cancel_blocked(net, file, lock);
+
+ lock->fl.fl_type = F_UNLCK;
+- if (file->f_file[O_RDONLY])
+- error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK,
++ lock->fl.fl_file = file->f_file[O_RDONLY];
++ if (lock->fl.fl_file)
++ error = vfs_lock_file(lock->fl.fl_file, F_SETLK,
+ &lock->fl, NULL);
+- if (file->f_file[O_WRONLY])
+- error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK,
++ lock->fl.fl_file = file->f_file[O_WRONLY];
++ if (lock->fl.fl_file)
++ error |= vfs_lock_file(lock->fl.fl_file, F_SETLK,
+ &lock->fl, NULL);
+
+ return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
+--
+2.43.0
+
--- /dev/null
+From eb22a39bcdd7b072c084eb8fcb216e8c74345887 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Nov 2022 14:36:38 -0500
+Subject: lockd: fix file selection in nlmsvc_cancel_blocked
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 9f27783b4dd235ef3c8dbf69fc6322777450323c ]
+
+We currently do a lock_to_openmode call based on the arguments from the
+NLM_UNLOCK call, but that will always set the fl_type of the lock to
+F_UNLCK, and the O_RDONLY descriptor is always chosen.
+
+Fix it to use the file_lock from the block instead.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svclock.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
+index 9eae99e08e699..4e30f3c509701 100644
+--- a/fs/lockd/svclock.c
++++ b/fs/lockd/svclock.c
+@@ -699,9 +699,10 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
+ block = nlmsvc_lookup_block(file, lock);
+ mutex_unlock(&file->f_mutex);
+ if (block != NULL) {
+- mode = lock_to_openmode(&lock->fl);
+- vfs_cancel_lock(block->b_file->f_file[mode],
+- &block->b_call->a_args.lock.fl);
++ struct file_lock *fl = &block->b_call->a_args.lock.fl;
++
++ mode = lock_to_openmode(fl);
++ vfs_cancel_lock(block->b_file->f_file[mode], fl);
+ status = nlmsvc_unlink_block(block);
+ nlmsvc_release_block(block);
+ }
+--
+2.43.0
+
--- /dev/null
+From e8d8ea13f66775fba5b7841bccc08e45a87ff1e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: lockd: introduce lockd_put()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 865b674069e05e5779fcf8cf7a166d2acb7e930b ]
+
+There is some cleanup that is duplicated in lockd_down() and the failure
+path of lockd_up().
+Factor these out into a new lockd_put() and call it from both places.
+
+lockd_put() does *not* take the mutex - that must be held by the caller.
+It decrements nlmsvc_users and if that reaches zero, it cleans up.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 64 +++++++++++++++++++++-----------------------------
+ 1 file changed, 27 insertions(+), 37 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 9aa499a761591..7f12c280fd30d 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -351,14 +351,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
+ };
+ #endif
+
+-static void lockd_unregister_notifiers(void)
+-{
+- unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
+-#if IS_ENABLED(CONFIG_IPV6)
+- unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+-#endif
+-}
+-
+ static int lockd_start_svc(struct svc_serv *serv)
+ {
+ int error;
+@@ -450,6 +442,27 @@ static int lockd_create_svc(void)
+ return 0;
+ }
+
++static void lockd_put(void)
++{
++ if (WARN(nlmsvc_users <= 0, "lockd_down: no users!\n"))
++ return;
++ if (--nlmsvc_users)
++ return;
++
++ unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
++#if IS_ENABLED(CONFIG_IPV6)
++ unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
++#endif
++
++ if (nlmsvc_task) {
++ kthread_stop(nlmsvc_task);
++ dprintk("lockd_down: service stopped\n");
++ nlmsvc_task = NULL;
++ }
++ nlmsvc_serv = NULL;
++ dprintk("lockd_down: service destroyed\n");
++}
++
+ /*
+ * Bring up the lockd process if it's not already up.
+ */
+@@ -461,21 +474,16 @@ int lockd_up(struct net *net, const struct cred *cred)
+
+ error = lockd_create_svc();
+ if (error)
+- goto err_create;
++ goto err;
++ nlmsvc_users++;
+
+ error = lockd_up_net(nlmsvc_serv, net, cred);
+ if (error < 0) {
+- goto err_put;
++ lockd_put();
++ goto err;
+ }
+
+- nlmsvc_users++;
+-err_put:
+- if (nlmsvc_users == 0) {
+- lockd_unregister_notifiers();
+- kthread_stop(nlmsvc_task);
+- nlmsvc_serv = NULL;
+- }
+-err_create:
++err:
+ mutex_unlock(&nlmsvc_mutex);
+ return error;
+ }
+@@ -489,25 +497,7 @@ lockd_down(struct net *net)
+ {
+ mutex_lock(&nlmsvc_mutex);
+ lockd_down_net(nlmsvc_serv, net);
+- if (nlmsvc_users) {
+- if (--nlmsvc_users)
+- goto out;
+- } else {
+- printk(KERN_ERR "lockd_down: no users! task=%p\n",
+- nlmsvc_task);
+- BUG();
+- }
+-
+- if (!nlmsvc_task) {
+- printk(KERN_ERR "lockd_down: no lockd running.\n");
+- BUG();
+- }
+- lockd_unregister_notifiers();
+- kthread_stop(nlmsvc_task);
+- dprintk("lockd_down: service destroyed\n");
+- nlmsvc_serv = NULL;
+- nlmsvc_task = NULL;
+-out:
++ lockd_put();
+ mutex_unlock(&nlmsvc_mutex);
+ }
+ EXPORT_SYMBOL_GPL(lockd_down);
+--
+2.43.0
+
--- /dev/null
+From 98941d9f1448130df88ec7833aaec351bbdbf1a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: lockd: introduce nlmsvc_serv
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 2840fe864c91a0fe822169b1fbfddbcac9aeac43 ]
+
+lockd has two globals - nlmsvc_task and nlmsvc_rqst - but mostly it
+wants the 'struct svc_serv', and when it doesn't want it exactly it can
+get to what it wants from the serv.
+
+This patch is a first step to removing nlmsvc_task and nlmsvc_rqst. It
+introduces nlmsvc_serv to store the 'struct svc_serv*'. This is set as
+soon as the serv is created, and cleared only when it is destroyed.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 36 ++++++++++++++++++++----------------
+ 1 file changed, 20 insertions(+), 16 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index a9669b106dbde..83874878f41d8 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -54,6 +54,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops);
+
+ static DEFINE_MUTEX(nlmsvc_mutex);
+ static unsigned int nlmsvc_users;
++static struct svc_serv *nlmsvc_serv;
+ static struct task_struct *nlmsvc_task;
+ static struct svc_rqst *nlmsvc_rqst;
+ unsigned long nlmsvc_timeout;
+@@ -306,13 +307,12 @@ static int lockd_inetaddr_event(struct notifier_block *this,
+ !atomic_inc_not_zero(&nlm_ntf_refcnt))
+ goto out;
+
+- if (nlmsvc_rqst) {
++ if (nlmsvc_serv) {
+ dprintk("lockd_inetaddr_event: removed %pI4\n",
+ &ifa->ifa_local);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ifa->ifa_local;
+- svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
+- (struct sockaddr *)&sin);
++ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin);
+ }
+ atomic_dec(&nlm_ntf_refcnt);
+ wake_up(&nlm_ntf_wq);
+@@ -336,14 +336,13 @@ static int lockd_inet6addr_event(struct notifier_block *this,
+ !atomic_inc_not_zero(&nlm_ntf_refcnt))
+ goto out;
+
+- if (nlmsvc_rqst) {
++ if (nlmsvc_serv) {
+ dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
+- svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
+- (struct sockaddr *)&sin6);
++ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6);
+ }
+ atomic_dec(&nlm_ntf_refcnt);
+ wake_up(&nlm_ntf_wq);
+@@ -423,15 +422,17 @@ static const struct svc_serv_ops lockd_sv_ops = {
+ .svo_enqueue_xprt = svc_xprt_do_enqueue,
+ };
+
+-static struct svc_serv *lockd_create_svc(void)
++static int lockd_create_svc(void)
+ {
+ struct svc_serv *serv;
+
+ /*
+ * Check whether we're already up and running.
+ */
+- if (nlmsvc_rqst)
+- return svc_get(nlmsvc_rqst->rq_server);
++ if (nlmsvc_serv) {
++ svc_get(nlmsvc_serv);
++ return 0;
++ }
+
+ /*
+ * Sanity check: if there's no pid,
+@@ -448,14 +449,15 @@ static struct svc_serv *lockd_create_svc(void)
+ serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
+ if (!serv) {
+ printk(KERN_WARNING "lockd_up: create service failed\n");
+- return ERR_PTR(-ENOMEM);
++ return -ENOMEM;
+ }
++ nlmsvc_serv = serv;
+ register_inetaddr_notifier(&lockd_inetaddr_notifier);
+ #if IS_ENABLED(CONFIG_IPV6)
+ register_inet6addr_notifier(&lockd_inet6addr_notifier);
+ #endif
+ dprintk("lockd_up: service created\n");
+- return serv;
++ return 0;
+ }
+
+ /*
+@@ -468,11 +470,10 @@ int lockd_up(struct net *net, const struct cred *cred)
+
+ mutex_lock(&nlmsvc_mutex);
+
+- serv = lockd_create_svc();
+- if (IS_ERR(serv)) {
+- error = PTR_ERR(serv);
++ error = lockd_create_svc();
++ if (error)
+ goto err_create;
+- }
++ serv = nlmsvc_serv;
+
+ error = lockd_up_net(serv, net, cred);
+ if (error < 0) {
+@@ -487,6 +488,8 @@ int lockd_up(struct net *net, const struct cred *cred)
+ }
+ nlmsvc_users++;
+ err_put:
++ if (nlmsvc_users == 0)
++ nlmsvc_serv = NULL;
+ svc_put(serv);
+ err_create:
+ mutex_unlock(&nlmsvc_mutex);
+@@ -501,7 +504,7 @@ void
+ lockd_down(struct net *net)
+ {
+ mutex_lock(&nlmsvc_mutex);
+- lockd_down_net(nlmsvc_rqst->rq_server, net);
++ lockd_down_net(nlmsvc_serv, net);
+ if (nlmsvc_users) {
+ if (--nlmsvc_users)
+ goto out;
+@@ -519,6 +522,7 @@ lockd_down(struct net *net)
+ dprintk("lockd_down: service stopped\n");
+ lockd_svc_exit_thread();
+ dprintk("lockd_down: service destroyed\n");
++ nlmsvc_serv = NULL;
+ nlmsvc_task = NULL;
+ nlmsvc_rqst = NULL;
+ out:
+--
+2.43.0
+
--- /dev/null
+From 23069375a9d6af6db887f087de022c57f6ef36c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 17:53:18 -0400
+Subject: lockd: introduce safe async lock op
+
+From: Alexander Aring <aahringo@redhat.com>
+
+[ Upstream commit 2dd10de8e6bcbacf85ad758b904543c294820c63 ]
+
+This patch reverts mostly commit 40595cdc93ed ("nfs: block notification
+on fs with its own ->lock") and introduces an EXPORT_OP_ASYNC_LOCK
+export flag to signal that the "own ->lock" implementation supports
+async lock requests. The only main user is DLM that is used by GFS2 and
+OCFS2 filesystem. Those implement their own lock() implementation and
+return FILE_LOCK_DEFERRED as return value. Since commit 40595cdc93ed
+("nfs: block notification on fs with its own ->lock") the DLM
+implementation were never updated. This patch should prepare for DLM
+to set the EXPORT_OP_ASYNC_LOCK export flag and update the DLM
+plock implementation regarding to it.
+
+Acked-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Alexander Aring <aahringo@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ Documentation/filesystems/nfs/exporting.rst | 7 +++++++
+ fs/lockd/svclock.c | 4 +---
+ fs/nfsd/nfs4state.c | 10 +++++++---
+ include/linux/exportfs.h | 14 ++++++++++++++
+ 4 files changed, 29 insertions(+), 6 deletions(-)
+
+diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
+index 6f59a364f84cd..6a1cbd7de38df 100644
+--- a/Documentation/filesystems/nfs/exporting.rst
++++ b/Documentation/filesystems/nfs/exporting.rst
+@@ -241,3 +241,10 @@ following flags are defined:
+ all of an inode's dirty data on last close. Exports that behave this
+ way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
+ waiting for writeback when closing such files.
++
++ EXPORT_OP_ASYNC_LOCK - Indicates a capable filesystem to do async lock
++ requests from lockd. Only set EXPORT_OP_ASYNC_LOCK if the filesystem has
++ it's own ->lock() functionality as core posix_lock_file() implementation
++ has no async lock request handling yet. For more information about how to
++ indicate an async lock request from a ->lock() file_operations struct, see
++ fs/locks.c and comment for the function vfs_lock_file().
+diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
+index 4e30f3c509701..55c0a03311884 100644
+--- a/fs/lockd/svclock.c
++++ b/fs/lockd/svclock.c
+@@ -470,9 +470,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
+ struct nlm_host *host, struct nlm_lock *lock, int wait,
+ struct nlm_cookie *cookie, int reclaim)
+ {
+-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ struct inode *inode = nlmsvc_file_inode(file);
+-#endif
+ struct nlm_block *block = NULL;
+ int error;
+ int mode;
+@@ -486,7 +484,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
+ (long long)lock->fl.fl_end,
+ wait);
+
+- if (nlmsvc_file_file(file)->f_op->lock) {
++ if (!exportfs_lock_op_is_async(inode->i_sb->s_export_op)) {
+ async_block = wait;
+ wait = 0;
+ }
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 69bc4622a95a4..4d95b2052c31a 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7424,6 +7424,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_blocked_lock *nbl = NULL;
+ struct file_lock *file_lock = NULL;
+ struct file_lock *conflock = NULL;
++ struct super_block *sb;
+ __be32 status = 0;
+ int lkflg;
+ int err;
+@@ -7445,6 +7446,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ dprintk("NFSD: nfsd4_lock: permission denied!\n");
+ return status;
+ }
++ sb = cstate->current_fh.fh_dentry->d_sb;
+
+ if (lock->lk_is_new) {
+ if (nfsd4_has_session(cstate))
+@@ -7496,7 +7498,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ fp = lock_stp->st_stid.sc_file;
+ switch (lock->lk_type) {
+ case NFS4_READW_LT:
+- if (nfsd4_has_session(cstate))
++ if (nfsd4_has_session(cstate) ||
++ exportfs_lock_op_is_async(sb->s_export_op))
+ fl_flags |= FL_SLEEP;
+ fallthrough;
+ case NFS4_READ_LT:
+@@ -7508,7 +7511,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ fl_type = F_RDLCK;
+ break;
+ case NFS4_WRITEW_LT:
+- if (nfsd4_has_session(cstate))
++ if (nfsd4_has_session(cstate) ||
++ exportfs_lock_op_is_async(sb->s_export_op))
+ fl_flags |= FL_SLEEP;
+ fallthrough;
+ case NFS4_WRITE_LT:
+@@ -7536,7 +7540,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ * for file locks), so don't attempt blocking lock notifications
+ * on those filesystems:
+ */
+- if (nf->nf_file->f_op->lock)
++ if (!exportfs_lock_op_is_async(sb->s_export_op))
+ fl_flags &= ~FL_SLEEP;
+
+ nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
+diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
+index 218fc5c54e901..6525f4b7eb97f 100644
+--- a/include/linux/exportfs.h
++++ b/include/linux/exportfs.h
+@@ -222,9 +222,23 @@ struct export_operations {
+ atomic attribute updates
+ */
+ #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */
++#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */
+ unsigned long flags;
+ };
+
++/**
++ * exportfs_lock_op_is_async() - export op supports async lock operation
++ * @export_ops: the nfs export operations to check
++ *
++ * Returns true if the nfs export_operations structure has
++ * EXPORT_OP_ASYNC_LOCK in their flags set
++ */
++static inline bool
++exportfs_lock_op_is_async(const struct export_operations *export_ops)
++{
++ return export_ops->flags & EXPORT_OP_ASYNC_LOCK;
++}
++
+ extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
+ int *max_len, struct inode *parent);
+ extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
+--
+2.43.0
+
--- /dev/null
+From 4739a3344625930d26de7d23a150a80c14a767e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 Aug 2022 23:01:16 +0200
+Subject: lockd: move from strlcpy with unused retval to strscpy
+
+From: Wolfram Sang <wsa+renesas@sang-engineering.com>
+
+[ Upstream commit 97f8e62572555f8ad578d7b1739ba64d5d2cac0f ]
+
+Follow the advice of the below link and prefer 'strscpy' in this
+subsystem. Conversion is 1:1 because the return value is not used.
+Generated by a coccinelle script.
+
+Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/
+Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/host.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/lockd/host.c b/fs/lockd/host.c
+index f802223e71abe..cdc8e12cdac44 100644
+--- a/fs/lockd/host.c
++++ b/fs/lockd/host.c
+@@ -164,7 +164,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
+ host->h_addrbuf = nsm->sm_addrbuf;
+ host->net = ni->net;
+ host->h_cred = get_cred(ni->cred);
+- strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
++ strscpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
+
+ out:
+ return host;
+--
+2.43.0
+
--- /dev/null
+From d0f6937c5b19b23c4bff7b60dacef31ded0b1690 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: lockd: move lockd_start_svc() call into lockd_create_svc()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit b73a2972041bee70eb0cbbb25fa77828c63c916b ]
+
+lockd_start_svc() only needs to be called once, just after the svc is
+created. If the start fails, the svc is discarded too.
+
+It thus makes sense to call lockd_start_svc() from lockd_create_svc().
+This allows us to remove the test against nlmsvc_rqst at the start of
+lockd_start_svc() - it must always be NULL.
+
+lockd_up() only held an extra reference on the svc until a thread was
+created - then it dropped it. The thread - and thus the extra reference
+- will remain until kthread_stop() is called.
+Now that the thread is created in lockd_create_svc(), the extra
+reference can be dropped there. So the 'serv' variable is no longer
+needed in lockd_up().
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 22 ++++++++++------------
+ 1 file changed, 10 insertions(+), 12 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 20cebb191350f..91e7c839841ec 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -359,9 +359,6 @@ static int lockd_start_svc(struct svc_serv *serv)
+ {
+ int error;
+
+- if (nlmsvc_rqst)
+- return 0;
+-
+ /*
+ * Create the kernel thread and wait for it to start.
+ */
+@@ -406,6 +403,7 @@ static const struct svc_serv_ops lockd_sv_ops = {
+ static int lockd_create_svc(void)
+ {
+ struct svc_serv *serv;
++ int error;
+
+ /*
+ * Check whether we're already up and running.
+@@ -432,6 +430,13 @@ static int lockd_create_svc(void)
+ printk(KERN_WARNING "lockd_up: create service failed\n");
+ return -ENOMEM;
+ }
++
++ error = lockd_start_svc(serv);
++ /* The thread now holds the only reference */
++ svc_put(serv);
++ if (error < 0)
++ return error;
++
+ nlmsvc_serv = serv;
+ register_inetaddr_notifier(&lockd_inetaddr_notifier);
+ #if IS_ENABLED(CONFIG_IPV6)
+@@ -446,7 +451,6 @@ static int lockd_create_svc(void)
+ */
+ int lockd_up(struct net *net, const struct cred *cred)
+ {
+- struct svc_serv *serv;
+ int error;
+
+ mutex_lock(&nlmsvc_mutex);
+@@ -454,25 +458,19 @@ int lockd_up(struct net *net, const struct cred *cred)
+ error = lockd_create_svc();
+ if (error)
+ goto err_create;
+- serv = nlmsvc_serv;
+
+- error = lockd_up_net(serv, net, cred);
++ error = lockd_up_net(nlmsvc_serv, net, cred);
+ if (error < 0) {
+ goto err_put;
+ }
+
+- error = lockd_start_svc(serv);
+- if (error < 0) {
+- lockd_down_net(serv, net);
+- goto err_put;
+- }
+ nlmsvc_users++;
+ err_put:
+ if (nlmsvc_users == 0) {
+ lockd_unregister_notifiers();
++ kthread_stop(nlmsvc_task);
+ nlmsvc_serv = NULL;
+ }
+- svc_put(serv);
+ err_create:
+ mutex_unlock(&nlmsvc_mutex);
+ return error;
+--
+2.43.0
+
--- /dev/null
+From c1773402a25fc29736487d0d8cb2a3f44e1c7971 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: lockd: move svc_exit_thread() into the thread
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 6a4e2527a63620a820c4ebf3596b57176da26fb3 ]
+
+The normal place to call svc_exit_thread() is from the thread itself
+just before it exists.
+Do this for lockd.
+
+This means that nlmsvc_rqst is not used out side of lockd_start_svc(),
+so it can be made local to that function, and renamed to 'rqst'.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 91e7c839841ec..9aa499a761591 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -56,7 +56,6 @@ static DEFINE_MUTEX(nlmsvc_mutex);
+ static unsigned int nlmsvc_users;
+ static struct svc_serv *nlmsvc_serv;
+ static struct task_struct *nlmsvc_task;
+-static struct svc_rqst *nlmsvc_rqst;
+ unsigned long nlmsvc_timeout;
+
+ unsigned int lockd_net_id;
+@@ -182,6 +181,11 @@ lockd(void *vrqstp)
+ nlm_shutdown_hosts();
+ cancel_delayed_work_sync(&ln->grace_period_end);
+ locks_end_grace(&ln->lockd_manager);
++
++ dprintk("lockd_down: service stopped\n");
++
++ svc_exit_thread(rqstp);
++
+ return 0;
+ }
+
+@@ -358,13 +362,14 @@ static void lockd_unregister_notifiers(void)
+ static int lockd_start_svc(struct svc_serv *serv)
+ {
+ int error;
++ struct svc_rqst *rqst;
+
+ /*
+ * Create the kernel thread and wait for it to start.
+ */
+- nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
+- if (IS_ERR(nlmsvc_rqst)) {
+- error = PTR_ERR(nlmsvc_rqst);
++ rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
++ if (IS_ERR(rqst)) {
++ error = PTR_ERR(rqst);
+ printk(KERN_WARNING
+ "lockd_up: svc_rqst allocation failed, error=%d\n",
+ error);
+@@ -374,24 +379,23 @@ static int lockd_start_svc(struct svc_serv *serv)
+ svc_sock_update_bufs(serv);
+ serv->sv_maxconn = nlm_max_connections;
+
+- nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
++ nlmsvc_task = kthread_create(lockd, rqst, "%s", serv->sv_name);
+ if (IS_ERR(nlmsvc_task)) {
+ error = PTR_ERR(nlmsvc_task);
+ printk(KERN_WARNING
+ "lockd_up: kthread_run failed, error=%d\n", error);
+ goto out_task;
+ }
+- nlmsvc_rqst->rq_task = nlmsvc_task;
++ rqst->rq_task = nlmsvc_task;
+ wake_up_process(nlmsvc_task);
+
+ dprintk("lockd_up: service started\n");
+ return 0;
+
+ out_task:
+- svc_exit_thread(nlmsvc_rqst);
++ svc_exit_thread(rqst);
+ nlmsvc_task = NULL;
+ out_rqst:
+- nlmsvc_rqst = NULL;
+ return error;
+ }
+
+@@ -500,9 +504,6 @@ lockd_down(struct net *net)
+ }
+ lockd_unregister_notifiers();
+ kthread_stop(nlmsvc_task);
+- dprintk("lockd_down: service stopped\n");
+- svc_exit_thread(nlmsvc_rqst);
+- nlmsvc_rqst = NULL;
+ dprintk("lockd_down: service destroyed\n");
+ nlmsvc_serv = NULL;
+ nlmsvc_task = NULL;
+--
+2.43.0
+
--- /dev/null
+From a1ae6cd53dcc5c6658653fe4c2d4c6ce2923c694 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: lockd: rename lockd_create_svc() to lockd_get()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit ecd3ad68d2c6d3ae178a63a2d9a02c392904fd36 ]
+
+lockd_create_svc() already does an svc_get() if the service already
+exists, so it is more like a "get" than a "create".
+
+So:
+ - Move the increment of nlmsvc_users into the function as well
+ - rename to lockd_get().
+
+It is now the inverse of lockd_put().
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 7f12c280fd30d..1a7c11118b320 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -396,16 +396,14 @@ static const struct svc_serv_ops lockd_sv_ops = {
+ .svo_enqueue_xprt = svc_xprt_do_enqueue,
+ };
+
+-static int lockd_create_svc(void)
++static int lockd_get(void)
+ {
+ struct svc_serv *serv;
+ int error;
+
+- /*
+- * Check whether we're already up and running.
+- */
+ if (nlmsvc_serv) {
+ svc_get(nlmsvc_serv);
++ nlmsvc_users++;
+ return 0;
+ }
+
+@@ -439,6 +437,7 @@ static int lockd_create_svc(void)
+ register_inet6addr_notifier(&lockd_inet6addr_notifier);
+ #endif
+ dprintk("lockd_up: service created\n");
++ nlmsvc_users++;
+ return 0;
+ }
+
+@@ -472,10 +471,9 @@ int lockd_up(struct net *net, const struct cred *cred)
+
+ mutex_lock(&nlmsvc_mutex);
+
+- error = lockd_create_svc();
++ error = lockd_get();
+ if (error)
+ goto err;
+- nlmsvc_users++;
+
+ error = lockd_up_net(nlmsvc_serv, net, cred);
+ if (error < 0) {
+--
+2.43.0
+
--- /dev/null
+From b2a4086d64456be0a19bd6828bcb7c01745103a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Nov 2022 14:36:36 -0500
+Subject: lockd: set missing fl_flags field when retrieving args
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 75c7940d2a86d3f1b60a0a265478cb8fc887b970 ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc4proc.c | 1 +
+ fs/lockd/svcproc.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
+index 742b8d31d2fad..e318d55e4c0ef 100644
+--- a/fs/lockd/svc4proc.c
++++ b/fs/lockd/svc4proc.c
+@@ -52,6 +52,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
+ *filp = file;
+
+ /* Set up the missing parts of the file_lock structure */
++ lock->fl.fl_flags = FL_POSIX;
+ lock->fl.fl_file = file->f_file[mode];
+ lock->fl.fl_pid = current->tgid;
+ lock->fl.fl_start = (loff_t)lock->lock_start;
+diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
+index 7e25d5583c7d0..2a615032f5d0f 100644
+--- a/fs/lockd/svcproc.c
++++ b/fs/lockd/svcproc.c
+@@ -77,6 +77,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
+
+ /* Set up the missing parts of the file_lock structure */
+ mode = lock_to_openmode(&lock->fl);
++ lock->fl.fl_flags = FL_POSIX;
+ lock->fl.fl_file = file->f_file[mode];
+ lock->fl.fl_pid = current->tgid;
+ lock->fl.fl_lmops = &nlmsvc_lock_operations;
+--
+2.43.0
+
--- /dev/null
+From e48928f890a1acd1ffb050f7da1b1b53f8ec0ddf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: lockd: simplify management of network status notifiers
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 5a8a7ff57421b7de3ae72019938ffb5daaee36e7 ]
+
+Now that the network status notifiers use nlmsvc_serv rather then
+nlmsvc_rqst the management can be simplified.
+
+Notifier unregistration synchronises with any pending notifications so
+providing we unregister before nlm_serv is freed no further interlock
+is required.
+
+So we move the unregister call to just before the thread is killed
+(which destroys the service) and just before the service is destroyed in
+the failure-path of lockd_up().
+
+Then nlm_ntf_refcnt and nlm_ntf_wq can be removed.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 35 +++++++++--------------------------
+ 1 file changed, 9 insertions(+), 26 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 83874878f41d8..20cebb191350f 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -59,9 +59,6 @@ static struct task_struct *nlmsvc_task;
+ static struct svc_rqst *nlmsvc_rqst;
+ unsigned long nlmsvc_timeout;
+
+-static atomic_t nlm_ntf_refcnt = ATOMIC_INIT(0);
+-static DECLARE_WAIT_QUEUE_HEAD(nlm_ntf_wq);
+-
+ unsigned int lockd_net_id;
+
+ /*
+@@ -303,8 +300,7 @@ static int lockd_inetaddr_event(struct notifier_block *this,
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct sockaddr_in sin;
+
+- if ((event != NETDEV_DOWN) ||
+- !atomic_inc_not_zero(&nlm_ntf_refcnt))
++ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nlmsvc_serv) {
+@@ -314,8 +310,6 @@ static int lockd_inetaddr_event(struct notifier_block *this,
+ sin.sin_addr.s_addr = ifa->ifa_local;
+ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin);
+ }
+- atomic_dec(&nlm_ntf_refcnt);
+- wake_up(&nlm_ntf_wq);
+
+ out:
+ return NOTIFY_DONE;
+@@ -332,8 +326,7 @@ static int lockd_inet6addr_event(struct notifier_block *this,
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ struct sockaddr_in6 sin6;
+
+- if ((event != NETDEV_DOWN) ||
+- !atomic_inc_not_zero(&nlm_ntf_refcnt))
++ if (event != NETDEV_DOWN)
+ goto out;
+
+ if (nlmsvc_serv) {
+@@ -344,8 +337,6 @@ static int lockd_inet6addr_event(struct notifier_block *this,
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
+ svc_age_temp_xprts_now(nlmsvc_serv, (struct sockaddr *)&sin6);
+ }
+- atomic_dec(&nlm_ntf_refcnt);
+- wake_up(&nlm_ntf_wq);
+
+ out:
+ return NOTIFY_DONE;
+@@ -362,14 +353,6 @@ static void lockd_unregister_notifiers(void)
+ #if IS_ENABLED(CONFIG_IPV6)
+ unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+ #endif
+- wait_event(nlm_ntf_wq, atomic_read(&nlm_ntf_refcnt) == 0);
+-}
+-
+-static void lockd_svc_exit_thread(void)
+-{
+- atomic_dec(&nlm_ntf_refcnt);
+- lockd_unregister_notifiers();
+- svc_exit_thread(nlmsvc_rqst);
+ }
+
+ static int lockd_start_svc(struct svc_serv *serv)
+@@ -388,11 +371,9 @@ static int lockd_start_svc(struct svc_serv *serv)
+ printk(KERN_WARNING
+ "lockd_up: svc_rqst allocation failed, error=%d\n",
+ error);
+- lockd_unregister_notifiers();
+ goto out_rqst;
+ }
+
+- atomic_inc(&nlm_ntf_refcnt);
+ svc_sock_update_bufs(serv);
+ serv->sv_maxconn = nlm_max_connections;
+
+@@ -410,7 +391,7 @@ static int lockd_start_svc(struct svc_serv *serv)
+ return 0;
+
+ out_task:
+- lockd_svc_exit_thread();
++ svc_exit_thread(nlmsvc_rqst);
+ nlmsvc_task = NULL;
+ out_rqst:
+ nlmsvc_rqst = NULL;
+@@ -477,7 +458,6 @@ int lockd_up(struct net *net, const struct cred *cred)
+
+ error = lockd_up_net(serv, net, cred);
+ if (error < 0) {
+- lockd_unregister_notifiers();
+ goto err_put;
+ }
+
+@@ -488,8 +468,10 @@ int lockd_up(struct net *net, const struct cred *cred)
+ }
+ nlmsvc_users++;
+ err_put:
+- if (nlmsvc_users == 0)
++ if (nlmsvc_users == 0) {
++ lockd_unregister_notifiers();
+ nlmsvc_serv = NULL;
++ }
+ svc_put(serv);
+ err_create:
+ mutex_unlock(&nlmsvc_mutex);
+@@ -518,13 +500,14 @@ lockd_down(struct net *net)
+ printk(KERN_ERR "lockd_down: no lockd running.\n");
+ BUG();
+ }
++ lockd_unregister_notifiers();
+ kthread_stop(nlmsvc_task);
+ dprintk("lockd_down: service stopped\n");
+- lockd_svc_exit_thread();
++ svc_exit_thread(nlmsvc_rqst);
++ nlmsvc_rqst = NULL;
+ dprintk("lockd_down: service destroyed\n");
+ nlmsvc_serv = NULL;
+ nlmsvc_task = NULL;
+- nlmsvc_rqst = NULL;
+ out:
+ mutex_unlock(&nlmsvc_mutex);
+ }
+--
+2.43.0
+
--- /dev/null
+From 55c8dc152df98b428aa863806d4b883e02367d94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 09:19:43 -0500
+Subject: lockd: use locks_inode_context helper
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 98b41ffe0afdfeaa1439a5d6bd2db4a94277e31b ]
+
+lockd currently doesn't access i_flctx safely. This requires a
+smp_load_acquire, as the pointer is set via cmpxchg (a release
+operation).
+
+Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
+Cc: Anna Schumaker <anna@kernel.org>
+Cc: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svcsubs.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
+index 3515f17eaf3fb..e3b6229e7ae5c 100644
+--- a/fs/lockd/svcsubs.c
++++ b/fs/lockd/svcsubs.c
+@@ -210,7 +210,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
+ {
+ struct inode *inode = nlmsvc_file_inode(file);
+ struct file_lock *fl;
+- struct file_lock_context *flctx = inode->i_flctx;
++ struct file_lock_context *flctx = locks_inode_context(inode);
+ struct nlm_host *lockhost;
+
+ if (!flctx || list_empty_careful(&flctx->flc_posix))
+@@ -265,7 +265,7 @@ nlm_file_inuse(struct nlm_file *file)
+ {
+ struct inode *inode = nlmsvc_file_inode(file);
+ struct file_lock *fl;
+- struct file_lock_context *flctx = inode->i_flctx;
++ struct file_lock_context *flctx = locks_inode_context(inode);
+
+ if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
+ return 1;
+--
+2.43.0
+
--- /dev/null
+From a77b89a09fd02f8933d4b30526c16016c086de02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: lockd: use svc_set_num_threads() for thread start and stop
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 6b044fbaab02292fedb17565dbb3f2528083b169 ]
+
+svc_set_num_threads() does everything that lockd_start_svc() does, except
+set sv_maxconn. It also (when passed 0) finds the threads and
+stops them with kthread_stop().
+
+So move the setting for sv_maxconn, and use svc_set_num_thread()
+
+We now don't need nlmsvc_task.
+
+Now that we use svc_set_num_threads() it makes sense to set svo_module.
+This request that the thread exists with module_put_and_exit().
+Also fix the documentation for svo_module to make this explicit.
+
+svc_prepare_thread is now only used where it is defined, so it can be
+made static.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+[ cel: upstream, module_put_and_exit was replaced via a merge commit ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 58 ++++++--------------------------------
+ include/linux/sunrpc/svc.h | 6 ++--
+ net/sunrpc/svc.c | 3 +-
+ 3 files changed, 12 insertions(+), 55 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 1a7c11118b320..0475c5a5d061e 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -55,7 +55,6 @@ EXPORT_SYMBOL_GPL(nlmsvc_ops);
+ static DEFINE_MUTEX(nlmsvc_mutex);
+ static unsigned int nlmsvc_users;
+ static struct svc_serv *nlmsvc_serv;
+-static struct task_struct *nlmsvc_task;
+ unsigned long nlmsvc_timeout;
+
+ unsigned int lockd_net_id;
+@@ -186,7 +185,7 @@ lockd(void *vrqstp)
+
+ svc_exit_thread(rqstp);
+
+- return 0;
++ module_put_and_kthread_exit(0);
+ }
+
+ static int create_lockd_listener(struct svc_serv *serv, const char *name,
+@@ -292,8 +291,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
+ __func__, net->ns.inum);
+ }
+ } else {
+- pr_err("%s: no users! task=%p, net=%x\n",
+- __func__, nlmsvc_task, net->ns.inum);
++ pr_err("%s: no users! net=%x\n",
++ __func__, net->ns.inum);
+ BUG();
+ }
+ }
+@@ -351,49 +350,11 @@ static struct notifier_block lockd_inet6addr_notifier = {
+ };
+ #endif
+
+-static int lockd_start_svc(struct svc_serv *serv)
+-{
+- int error;
+- struct svc_rqst *rqst;
+-
+- /*
+- * Create the kernel thread and wait for it to start.
+- */
+- rqst = svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
+- if (IS_ERR(rqst)) {
+- error = PTR_ERR(rqst);
+- printk(KERN_WARNING
+- "lockd_up: svc_rqst allocation failed, error=%d\n",
+- error);
+- goto out_rqst;
+- }
+-
+- svc_sock_update_bufs(serv);
+- serv->sv_maxconn = nlm_max_connections;
+-
+- nlmsvc_task = kthread_create(lockd, rqst, "%s", serv->sv_name);
+- if (IS_ERR(nlmsvc_task)) {
+- error = PTR_ERR(nlmsvc_task);
+- printk(KERN_WARNING
+- "lockd_up: kthread_run failed, error=%d\n", error);
+- goto out_task;
+- }
+- rqst->rq_task = nlmsvc_task;
+- wake_up_process(nlmsvc_task);
+-
+- dprintk("lockd_up: service started\n");
+- return 0;
+-
+-out_task:
+- svc_exit_thread(rqst);
+- nlmsvc_task = NULL;
+-out_rqst:
+- return error;
+-}
+-
+ static const struct svc_serv_ops lockd_sv_ops = {
+ .svo_shutdown = svc_rpcb_cleanup,
++ .svo_function = lockd,
+ .svo_enqueue_xprt = svc_xprt_do_enqueue,
++ .svo_module = THIS_MODULE,
+ };
+
+ static int lockd_get(void)
+@@ -425,7 +386,8 @@ static int lockd_get(void)
+ return -ENOMEM;
+ }
+
+- error = lockd_start_svc(serv);
++ serv->sv_maxconn = nlm_max_connections;
++ error = svc_set_num_threads(serv, NULL, 1);
+ /* The thread now holds the only reference */
+ svc_put(serv);
+ if (error < 0)
+@@ -453,11 +415,7 @@ static void lockd_put(void)
+ unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
+ #endif
+
+- if (nlmsvc_task) {
+- kthread_stop(nlmsvc_task);
+- dprintk("lockd_down: service stopped\n");
+- nlmsvc_task = NULL;
+- }
++ svc_set_num_threads(nlmsvc_serv, NULL, 0);
+ nlmsvc_serv = NULL;
+ dprintk("lockd_down: service destroyed\n");
+ }
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 35bb1c4393400..be535cc4fea07 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -64,7 +64,9 @@ struct svc_serv_ops {
+ /* queue up a transport for servicing */
+ void (*svo_enqueue_xprt)(struct svc_xprt *);
+
+- /* optional module to count when adding threads (pooled svcs only) */
++ /* optional module to count when adding threads.
++ * Thread function must call module_put_and_kthread_exit() to exit.
++ */
+ struct module *svo_module;
+ };
+
+@@ -506,8 +508,6 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int,
+ const struct svc_serv_ops *);
+ struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
+ struct svc_pool *pool, int node);
+-struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
+- struct svc_pool *pool, int node);
+ void svc_rqst_replace_page(struct svc_rqst *rqstp,
+ struct page *page);
+ void svc_rqst_free(struct svc_rqst *);
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index fee7a22578b64..f53ff8f2602f2 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -652,7 +652,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
+ }
+ EXPORT_SYMBOL_GPL(svc_rqst_alloc);
+
+-struct svc_rqst *
++static struct svc_rqst *
+ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
+ {
+ struct svc_rqst *rqstp;
+@@ -672,7 +672,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
+ spin_unlock_bh(&pool->sp_lock);
+ return rqstp;
+ }
+-EXPORT_SYMBOL_GPL(svc_prepare_thread);
+
+ /*
+ * Choose a pool in which to create a new thread, for svc_set_num_threads
+--
+2.43.0
+
--- /dev/null
+From 7998a0fe0c6b7fc63283eae5bfe8c1d8596b153d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Dec 2021 12:20:13 -0500
+Subject: nfs: block notification on fs with its own ->lock
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+[ Upstream commit 40595cdc93edf4110c0f0c0b06f8d82008f23929 ]
+
+NFSv4.1 supports an optional lock notification feature which notifies
+the client when a lock comes available. (Normally NFSv4 clients just
+poll for locks if necessary.) To make that work, we need to request a
+blocking lock from the filesystem.
+
+We turned that off for NFS in commit f657f8eef3ff ("nfs: don't atempt
+blocking locks on nfs reexports") [sic] because it actually blocks the
+nfsd thread while waiting for the lock.
+
+Thanks to Vasily Averin for pointing out that NFS isn't the only
+filesystem with that problem.
+
+Any filesystem that leaves ->lock NULL will use posix_lock_file(), which
+does the right thing. Simplest is just to assume that any filesystem
+that defines its own ->lock is not safe to request a blocking lock from.
+
+So, this patch mostly reverts commit f657f8eef3ff ("nfs: don't atempt
+blocking locks on nfs reexports") [sic] and commit b840be2f00c0 ("lockd:
+don't attempt blocking locks on nfs reexports"), and instead uses a
+check of ->lock (Vasily's suggestion) to decide whether to support
+blocking lock notifications on a given filesystem. Also add a little
+documentation.
+
+Perhaps someday we could add back an export flag later to allow
+filesystems with "good" ->lock methods to support blocking lock
+notifications.
+
+Reported-by: Vasily Averin <vvs@virtuozzo.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+[ cel: Description rewritten to address checkpatch nits ]
+[ cel: Fixed warning when SUNRPC debugging is disabled ]
+[ cel: Fixed NULL check ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Vasily Averin <vvs@virtuozzo.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svclock.c | 6 ++++--
+ fs/nfs/export.c | 2 +-
+ fs/nfsd/nfs4state.c | 18 ++++++++++++------
+ include/linux/exportfs.h | 2 --
+ include/linux/lockd/lockd.h | 9 +++++++--
+ 5 files changed, 24 insertions(+), 13 deletions(-)
+
+diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
+index e9b85d8fd5fe7..cb3658ab9b7ae 100644
+--- a/fs/lockd/svclock.c
++++ b/fs/lockd/svclock.c
+@@ -470,8 +470,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
+ struct nlm_host *host, struct nlm_lock *lock, int wait,
+ struct nlm_cookie *cookie, int reclaim)
+ {
+- struct nlm_block *block = NULL;
++#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ struct inode *inode = nlmsvc_file_inode(file);
++#endif
++ struct nlm_block *block = NULL;
+ int error;
+ int mode;
+ int async_block = 0;
+@@ -484,7 +486,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
+ (long long)lock->fl.fl_end,
+ wait);
+
+- if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) {
++ if (nlmsvc_file_file(file)->f_op->lock) {
+ async_block = wait;
+ wait = 0;
+ }
+diff --git a/fs/nfs/export.c b/fs/nfs/export.c
+index d772c20bbfd15..37a1a88df7717 100644
+--- a/fs/nfs/export.c
++++ b/fs/nfs/export.c
+@@ -180,5 +180,5 @@ const struct export_operations nfs_export_ops = {
+ .fetch_iversion = nfs_fetch_iversion,
+ .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
+ EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
+- EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
++ EXPORT_OP_NOATOMIC_ATTR,
+ };
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index f7e2beded6d7f..5ee11f0e24d3b 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -6874,7 +6874,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_blocked_lock *nbl = NULL;
+ struct file_lock *file_lock = NULL;
+ struct file_lock *conflock = NULL;
+- struct super_block *sb;
+ __be32 status = 0;
+ int lkflg;
+ int err;
+@@ -6896,7 +6895,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ dprintk("NFSD: nfsd4_lock: permission denied!\n");
+ return status;
+ }
+- sb = cstate->current_fh.fh_dentry->d_sb;
+
+ if (lock->lk_is_new) {
+ if (nfsd4_has_session(cstate))
+@@ -6948,8 +6946,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ fp = lock_stp->st_stid.sc_file;
+ switch (lock->lk_type) {
+ case NFS4_READW_LT:
+- if (nfsd4_has_session(cstate) &&
+- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
++ if (nfsd4_has_session(cstate))
+ fl_flags |= FL_SLEEP;
+ fallthrough;
+ case NFS4_READ_LT:
+@@ -6961,8 +6958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ fl_type = F_RDLCK;
+ break;
+ case NFS4_WRITEW_LT:
+- if (nfsd4_has_session(cstate) &&
+- !(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
++ if (nfsd4_has_session(cstate))
+ fl_flags |= FL_SLEEP;
+ fallthrough;
+ case NFS4_WRITE_LT:
+@@ -6983,6 +6979,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ goto out;
+ }
+
++ /*
++ * Most filesystems with their own ->lock operations will block
++ * the nfsd thread waiting to acquire the lock. That leads to
++ * deadlocks (we don't want every nfsd thread tied up waiting
++ * for file locks), so don't attempt blocking lock notifications
++ * on those filesystems:
++ */
++ if (nf->nf_file->f_op->lock)
++ fl_flags &= ~FL_SLEEP;
++
+ nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
+ if (!nbl) {
+ dprintk("NFSD: %s: unable to allocate block!\n", __func__);
+diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
+index 3260fe7148462..fe848901fcc3a 100644
+--- a/include/linux/exportfs.h
++++ b/include/linux/exportfs.h
+@@ -221,8 +221,6 @@ struct export_operations {
+ #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
+ atomic attribute updates
+ */
+-#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do
+- asychronous blocking locks */
+ unsigned long flags;
+ };
+
+diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
+index c4ae6506b8b36..fcef192e5e45e 100644
+--- a/include/linux/lockd/lockd.h
++++ b/include/linux/lockd/lockd.h
+@@ -303,10 +303,15 @@ void nlmsvc_invalidate_all(void);
+ int nlmsvc_unlock_all_by_sb(struct super_block *sb);
+ int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
+
++static inline struct file *nlmsvc_file_file(struct nlm_file *file)
++{
++ return file->f_file[O_RDONLY] ?
++ file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
++}
++
+ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
+ {
+- return locks_inode(file->f_file[O_RDONLY] ?
+- file->f_file[O_RDONLY] : file->f_file[O_WRONLY]);
++ return locks_inode(nlmsvc_file_file(file));
+ }
+
+ static inline int __nlm_privileged_request4(const struct sockaddr *sap)
+--
+2.43.0
+
--- /dev/null
+From 92af5f0b0679e7627d19474edb656cd63b35f8de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Oct 2021 16:16:56 -0400
+Subject: NFS: Move generic FS show macros to global header
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 9d2d48bbbdabf7b2f029369c4f926d133c1d47ad ]
+
+Refactor: Surface useful show_ macros for use by other trace
+subsystems.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/nfs4trace.h | 67 ++++++---------------
+ fs/nfs/nfstrace.h | 80 +++++--------------------
+ include/trace/events/fs.h | 122 ++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 156 insertions(+), 113 deletions(-)
+ create mode 100644 include/trace/events/fs.h
+
+diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
+index 39a45bb7d4311..afbecdb8fa21b 100644
+--- a/fs/nfs/nfs4trace.h
++++ b/fs/nfs/nfs4trace.h
+@@ -11,6 +11,8 @@
+ #include <linux/tracepoint.h>
+ #include <trace/events/sunrpc_base.h>
+
++#include <trace/events/fs.h>
++
+ TRACE_DEFINE_ENUM(EPERM);
+ TRACE_DEFINE_ENUM(ENOENT);
+ TRACE_DEFINE_ENUM(EIO);
+@@ -314,19 +316,6 @@ TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+ { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
+ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
+
+-#define show_open_flags(flags) \
+- __print_flags(flags, "|", \
+- { O_CREAT, "O_CREAT" }, \
+- { O_EXCL, "O_EXCL" }, \
+- { O_TRUNC, "O_TRUNC" }, \
+- { O_DIRECT, "O_DIRECT" })
+-
+-#define show_fmode_flags(mode) \
+- __print_flags(mode, "|", \
+- { ((__force unsigned long)FMODE_READ), "READ" }, \
+- { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \
+- { ((__force unsigned long)FMODE_EXEC), "EXEC" })
+-
+ #define show_nfs_fattr_flags(valid) \
+ __print_flags((unsigned long)valid, "|", \
+ { NFS_ATTR_FATTR_TYPE, "TYPE" }, \
+@@ -796,8 +785,8 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+- __field(unsigned int, flags)
+- __field(unsigned int, fmode)
++ __field(unsigned long, flags)
++ __field(unsigned long, fmode)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+@@ -815,7 +804,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
+
+ __entry->error = -error;
+ __entry->flags = flags;
+- __entry->fmode = (__force unsigned int)ctx->mode;
++ __entry->fmode = (__force unsigned long)ctx->mode;
+ __entry->dev = ctx->dentry->d_sb->s_dev;
+ if (!IS_ERR_OR_NULL(state)) {
+ inode = state->inode;
+@@ -845,15 +834,15 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
+ ),
+
+ TP_printk(
+- "error=%ld (%s) flags=%d (%s) fmode=%s "
++ "error=%ld (%s) flags=%lu (%s) fmode=%s "
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "name=%02x:%02x:%llu/%s stateid=%d:0x%08x "
+ "openstateid=%d:0x%08x",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+ __entry->flags,
+- show_open_flags(__entry->flags),
+- show_fmode_flags(__entry->fmode),
++ show_fs_fcntl_open_flags(__entry->flags),
++ show_fs_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -907,7 +896,7 @@ TRACE_EVENT(nfs4_cached_open,
+ TP_printk(
+ "fmode=%s fileid=%02x:%02x:%llu "
+ "fhandle=0x%08x stateid=%d:0x%08x",
+- __entry->fmode ? show_fmode_flags(__entry->fmode) :
++ __entry->fmode ? show_fs_fmode_flags(__entry->fmode) :
+ "closed",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+@@ -955,7 +944,7 @@ TRACE_EVENT(nfs4_close,
+ "fhandle=0x%08x openstateid=%d:0x%08x",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+- __entry->fmode ? show_fmode_flags(__entry->fmode) :
++ __entry->fmode ? show_fs_fmode_flags(__entry->fmode) :
+ "closed",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+@@ -964,24 +953,6 @@ TRACE_EVENT(nfs4_close,
+ )
+ );
+
+-TRACE_DEFINE_ENUM(F_GETLK);
+-TRACE_DEFINE_ENUM(F_SETLK);
+-TRACE_DEFINE_ENUM(F_SETLKW);
+-TRACE_DEFINE_ENUM(F_RDLCK);
+-TRACE_DEFINE_ENUM(F_WRLCK);
+-TRACE_DEFINE_ENUM(F_UNLCK);
+-
+-#define show_lock_cmd(type) \
+- __print_symbolic((int)type, \
+- { F_GETLK, "GETLK" }, \
+- { F_SETLK, "SETLK" }, \
+- { F_SETLKW, "SETLKW" })
+-#define show_lock_type(type) \
+- __print_symbolic((int)type, \
+- { F_RDLCK, "RDLCK" }, \
+- { F_WRLCK, "WRLCK" }, \
+- { F_UNLCK, "UNLCK" })
+-
+ DECLARE_EVENT_CLASS(nfs4_lock_event,
+ TP_PROTO(
+ const struct file_lock *request,
+@@ -994,8 +965,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+- __field(int, cmd)
+- __field(char, type)
++ __field(unsigned long, cmd)
++ __field(unsigned long, type)
+ __field(loff_t, start)
+ __field(loff_t, end)
+ __field(dev_t, dev)
+@@ -1028,8 +999,8 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
+ "stateid=%d:0x%08x",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+- show_lock_cmd(__entry->cmd),
+- show_lock_type(__entry->type),
++ show_fs_fcntl_cmd(__entry->cmd),
++ show_fs_fcntl_lock_type(__entry->type),
+ (long long)__entry->start,
+ (long long)__entry->end,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+@@ -1064,8 +1035,8 @@ TRACE_EVENT(nfs4_set_lock,
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+- __field(int, cmd)
+- __field(char, type)
++ __field(unsigned long, cmd)
++ __field(unsigned long, type)
+ __field(loff_t, start)
+ __field(loff_t, end)
+ __field(dev_t, dev)
+@@ -1104,8 +1075,8 @@ TRACE_EVENT(nfs4_set_lock,
+ "stateid=%d:0x%08x lockstateid=%d:0x%08x",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+- show_lock_cmd(__entry->cmd),
+- show_lock_type(__entry->type),
++ show_fs_fcntl_cmd(__entry->cmd),
++ show_fs_fcntl_lock_type(__entry->type),
+ (long long)__entry->start,
+ (long long)__entry->end,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+@@ -1222,7 +1193,7 @@ DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
+
+ TP_printk(
+ "fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x",
+- show_fmode_flags(__entry->fmode),
++ show_fs_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
+index 69fa637a4aba8..918237677a383 100644
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -11,20 +11,9 @@
+ #include <linux/tracepoint.h>
+ #include <linux/iversion.h>
+
++#include <trace/events/fs.h>
+ #include <trace/events/sunrpc_base.h>
+
+-#define nfs_show_file_type(ftype) \
+- __print_symbolic(ftype, \
+- { DT_UNKNOWN, "UNKNOWN" }, \
+- { DT_FIFO, "FIFO" }, \
+- { DT_CHR, "CHR" }, \
+- { DT_DIR, "DIR" }, \
+- { DT_BLK, "BLK" }, \
+- { DT_REG, "REG" }, \
+- { DT_LNK, "LNK" }, \
+- { DT_SOCK, "SOCK" }, \
+- { DT_WHT, "WHT" })
+-
+ #define nfs_show_cache_validity(v) \
+ __print_flags(v, "|", \
+ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \
+@@ -131,7 +120,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done,
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->type,
+- nfs_show_file_type(__entry->type),
++ show_fs_dirent_type(__entry->type),
+ (unsigned long long)__entry->version,
+ (long long)__entry->size,
+ __entry->cache_validity,
+@@ -222,7 +211,7 @@ TRACE_EVENT(nfs_access_exit,
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->type,
+- nfs_show_file_type(__entry->type),
++ show_fs_dirent_type(__entry->type),
+ (unsigned long long)__entry->version,
+ (long long)__entry->size,
+ __entry->cache_validity,
+@@ -233,21 +222,6 @@ TRACE_EVENT(nfs_access_exit,
+ )
+ );
+
+-#define show_lookup_flags(flags) \
+- __print_flags(flags, "|", \
+- { LOOKUP_FOLLOW, "FOLLOW" }, \
+- { LOOKUP_DIRECTORY, "DIRECTORY" }, \
+- { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
+- { LOOKUP_PARENT, "PARENT" }, \
+- { LOOKUP_REVAL, "REVAL" }, \
+- { LOOKUP_RCU, "RCU" }, \
+- { LOOKUP_OPEN, "OPEN" }, \
+- { LOOKUP_CREATE, "CREATE" }, \
+- { LOOKUP_EXCL, "EXCL" }, \
+- { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \
+- { LOOKUP_EMPTY, "EMPTY" }, \
+- { LOOKUP_DOWN, "DOWN" })
+-
+ DECLARE_EVENT_CLASS(nfs_lookup_event,
+ TP_PROTO(
+ const struct inode *dir,
+@@ -274,7 +248,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
+ TP_printk(
+ "flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+ __entry->flags,
+- show_lookup_flags(__entry->flags),
++ show_fs_lookup_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -320,7 +294,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
+ "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+ -__entry->error, nfs_show_status(__entry->error),
+ __entry->flags,
+- show_lookup_flags(__entry->flags),
++ show_fs_lookup_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -342,30 +316,6 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit);
+ DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter);
+ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit);
+
+-#define show_open_flags(flags) \
+- __print_flags(flags, "|", \
+- { O_WRONLY, "O_WRONLY" }, \
+- { O_RDWR, "O_RDWR" }, \
+- { O_CREAT, "O_CREAT" }, \
+- { O_EXCL, "O_EXCL" }, \
+- { O_NOCTTY, "O_NOCTTY" }, \
+- { O_TRUNC, "O_TRUNC" }, \
+- { O_APPEND, "O_APPEND" }, \
+- { O_NONBLOCK, "O_NONBLOCK" }, \
+- { O_DSYNC, "O_DSYNC" }, \
+- { O_DIRECT, "O_DIRECT" }, \
+- { O_LARGEFILE, "O_LARGEFILE" }, \
+- { O_DIRECTORY, "O_DIRECTORY" }, \
+- { O_NOFOLLOW, "O_NOFOLLOW" }, \
+- { O_NOATIME, "O_NOATIME" }, \
+- { O_CLOEXEC, "O_CLOEXEC" })
+-
+-#define show_fmode_flags(mode) \
+- __print_flags(mode, "|", \
+- { ((__force unsigned long)FMODE_READ), "READ" }, \
+- { ((__force unsigned long)FMODE_WRITE), "WRITE" }, \
+- { ((__force unsigned long)FMODE_EXEC), "EXEC" })
+-
+ TRACE_EVENT(nfs_atomic_open_enter,
+ TP_PROTO(
+ const struct inode *dir,
+@@ -377,7 +327,7 @@ TRACE_EVENT(nfs_atomic_open_enter,
+
+ TP_STRUCT__entry(
+ __field(unsigned long, flags)
+- __field(unsigned int, fmode)
++ __field(unsigned long, fmode)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, ctx->dentry->d_name.name)
+@@ -387,15 +337,15 @@ TRACE_EVENT(nfs_atomic_open_enter,
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->flags = flags;
+- __entry->fmode = (__force unsigned int)ctx->mode;
++ __entry->fmode = (__force unsigned long)ctx->mode;
+ __assign_str(name, ctx->dentry->d_name.name);
+ ),
+
+ TP_printk(
+ "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s",
+ __entry->flags,
+- show_open_flags(__entry->flags),
+- show_fmode_flags(__entry->fmode),
++ show_fs_fcntl_open_flags(__entry->flags),
++ show_fs_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -415,7 +365,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(unsigned long, flags)
+- __field(unsigned int, fmode)
++ __field(unsigned long, fmode)
+ __field(dev_t, dev)
+ __field(u64, dir)
+ __string(name, ctx->dentry->d_name.name)
+@@ -426,7 +376,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->dir = NFS_FILEID(dir);
+ __entry->flags = flags;
+- __entry->fmode = (__force unsigned int)ctx->mode;
++ __entry->fmode = (__force unsigned long)ctx->mode;
+ __assign_str(name, ctx->dentry->d_name.name);
+ ),
+
+@@ -435,8 +385,8 @@ TRACE_EVENT(nfs_atomic_open_exit,
+ "name=%02x:%02x:%llu/%s",
+ -__entry->error, nfs_show_status(__entry->error),
+ __entry->flags,
+- show_open_flags(__entry->flags),
+- show_fmode_flags(__entry->fmode),
++ show_fs_fcntl_open_flags(__entry->flags),
++ show_fs_fmode_flags(__entry->fmode),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -469,7 +419,7 @@ TRACE_EVENT(nfs_create_enter,
+ TP_printk(
+ "flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+ __entry->flags,
+- show_open_flags(__entry->flags),
++ show_fs_fcntl_open_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -506,7 +456,7 @@ TRACE_EVENT(nfs_create_exit,
+ "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+ -__entry->error, nfs_show_status(__entry->error),
+ __entry->flags,
+- show_open_flags(__entry->flags),
++ show_fs_fcntl_open_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+diff --git a/include/trace/events/fs.h b/include/trace/events/fs.h
+new file mode 100644
+index 0000000000000..738b97f22f365
+--- /dev/null
++++ b/include/trace/events/fs.h
+@@ -0,0 +1,122 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Display helpers for generic filesystem items
++ *
++ * Author: Chuck Lever <chuck.lever@oracle.com>
++ *
++ * Copyright (c) 2020, Oracle and/or its affiliates.
++ */
++
++#include <linux/fs.h>
++
++#define show_fs_dirent_type(x) \
++ __print_symbolic(x, \
++ { DT_UNKNOWN, "UNKNOWN" }, \
++ { DT_FIFO, "FIFO" }, \
++ { DT_CHR, "CHR" }, \
++ { DT_DIR, "DIR" }, \
++ { DT_BLK, "BLK" }, \
++ { DT_REG, "REG" }, \
++ { DT_LNK, "LNK" }, \
++ { DT_SOCK, "SOCK" }, \
++ { DT_WHT, "WHT" })
++
++#define show_fs_fcntl_open_flags(x) \
++ __print_flags(x, "|", \
++ { O_WRONLY, "O_WRONLY" }, \
++ { O_RDWR, "O_RDWR" }, \
++ { O_CREAT, "O_CREAT" }, \
++ { O_EXCL, "O_EXCL" }, \
++ { O_NOCTTY, "O_NOCTTY" }, \
++ { O_TRUNC, "O_TRUNC" }, \
++ { O_APPEND, "O_APPEND" }, \
++ { O_NONBLOCK, "O_NONBLOCK" }, \
++ { O_DSYNC, "O_DSYNC" }, \
++ { O_DIRECT, "O_DIRECT" }, \
++ { O_LARGEFILE, "O_LARGEFILE" }, \
++ { O_DIRECTORY, "O_DIRECTORY" }, \
++ { O_NOFOLLOW, "O_NOFOLLOW" }, \
++ { O_NOATIME, "O_NOATIME" }, \
++ { O_CLOEXEC, "O_CLOEXEC" })
++
++#define __fmode_flag(x) { (__force unsigned long)FMODE_##x, #x }
++#define show_fs_fmode_flags(x) \
++ __print_flags(x, "|", \
++ __fmode_flag(READ), \
++ __fmode_flag(WRITE), \
++ __fmode_flag(EXEC))
++
++#ifdef CONFIG_64BIT
++#define show_fs_fcntl_cmd(x) \
++ __print_symbolic(x, \
++ { F_DUPFD, "DUPFD" }, \
++ { F_GETFD, "GETFD" }, \
++ { F_SETFD, "SETFD" }, \
++ { F_GETFL, "GETFL" }, \
++ { F_SETFL, "SETFL" }, \
++ { F_GETLK, "GETLK" }, \
++ { F_SETLK, "SETLK" }, \
++ { F_SETLKW, "SETLKW" }, \
++ { F_SETOWN, "SETOWN" }, \
++ { F_GETOWN, "GETOWN" }, \
++ { F_SETSIG, "SETSIG" }, \
++ { F_GETSIG, "GETSIG" }, \
++ { F_SETOWN_EX, "SETOWN_EX" }, \
++ { F_GETOWN_EX, "GETOWN_EX" }, \
++ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
++ { F_OFD_GETLK, "OFD_GETLK" }, \
++ { F_OFD_SETLK, "OFD_SETLK" }, \
++ { F_OFD_SETLKW, "OFD_SETLKW" })
++#else /* CONFIG_64BIT */
++#define show_fs_fcntl_cmd(x) \
++ __print_symbolic(x, \
++ { F_DUPFD, "DUPFD" }, \
++ { F_GETFD, "GETFD" }, \
++ { F_SETFD, "SETFD" }, \
++ { F_GETFL, "GETFL" }, \
++ { F_SETFL, "SETFL" }, \
++ { F_GETLK, "GETLK" }, \
++ { F_SETLK, "SETLK" }, \
++ { F_SETLKW, "SETLKW" }, \
++ { F_SETOWN, "SETOWN" }, \
++ { F_GETOWN, "GETOWN" }, \
++ { F_SETSIG, "SETSIG" }, \
++ { F_GETSIG, "GETSIG" }, \
++ { F_GETLK64, "GETLK64" }, \
++ { F_SETLK64, "SETLK64" }, \
++ { F_SETLKW64, "SETLKW64" }, \
++ { F_SETOWN_EX, "SETOWN_EX" }, \
++ { F_GETOWN_EX, "GETOWN_EX" }, \
++ { F_GETOWNER_UIDS, "GETOWNER_UIDS" }, \
++ { F_OFD_GETLK, "OFD_GETLK" }, \
++ { F_OFD_SETLK, "OFD_SETLK" }, \
++ { F_OFD_SETLKW, "OFD_SETLKW" })
++#endif /* CONFIG_64BIT */
++
++#define show_fs_fcntl_lock_type(x) \
++ __print_symbolic(x, \
++ { F_RDLCK, "RDLCK" }, \
++ { F_WRLCK, "WRLCK" }, \
++ { F_UNLCK, "UNLCK" })
++
++#define show_fs_lookup_flags(flags) \
++ __print_flags(flags, "|", \
++ { LOOKUP_FOLLOW, "FOLLOW" }, \
++ { LOOKUP_DIRECTORY, "DIRECTORY" }, \
++ { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
++ { LOOKUP_EMPTY, "EMPTY" }, \
++ { LOOKUP_DOWN, "DOWN" }, \
++ { LOOKUP_MOUNTPOINT, "MOUNTPOINT" }, \
++ { LOOKUP_REVAL, "REVAL" }, \
++ { LOOKUP_RCU, "RCU" }, \
++ { LOOKUP_OPEN, "OPEN" }, \
++ { LOOKUP_CREATE, "CREATE" }, \
++ { LOOKUP_EXCL, "EXCL" }, \
++ { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \
++ { LOOKUP_PARENT, "PARENT" }, \
++ { LOOKUP_NO_SYMLINKS, "NO_SYMLINKS" }, \
++ { LOOKUP_NO_MAGICLINKS, "NO_MAGICLINKS" }, \
++ { LOOKUP_NO_XDEV, "NO_XDEV" }, \
++ { LOOKUP_BENEATH, "BENEATH" }, \
++ { LOOKUP_IN_ROOT, "IN_ROOT" }, \
++ { LOOKUP_CACHED, "CACHED" })
+--
+2.43.0
+
--- /dev/null
+From b4ac3347df1477b8ff725fee5f09dbc5665d2318 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Oct 2021 16:17:03 -0400
+Subject: NFS: Move NFS protocol display macros to global header
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 8791545eda52e8f3bc48e3cd902e38bf4ba4c9de ]
+
+Refactor: surface useful show_ macros so they can be shared between
+the client and server trace code.
+
+Additional clean up:
+- Housekeeping: ensure the correct #include files are pulled in
+ and add proper TRACE_DEFINE_ENUM where they are missing
+- Use a consistent naming scheme for the helpers
+- Store values to be displayed symbolically as unsigned long, as
+ that is the type that the __print_yada() functions take
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/nfs4trace.h | 403 ++++---------------------------------
+ fs/nfs/nfstrace.h | 117 ++---------
+ fs/nfs/pnfs.h | 4 -
+ fs/nfsd/trace.h | 1 +
+ include/linux/nfs4.h | 4 +
+ include/trace/events/nfs.h | 375 ++++++++++++++++++++++++++++++++++
+ 6 files changed, 433 insertions(+), 471 deletions(-)
+ create mode 100644 include/trace/events/nfs.h
+
+diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
+index afbecdb8fa21b..635f13a8d44aa 100644
+--- a/fs/nfs/nfs4trace.h
++++ b/fs/nfs/nfs4trace.h
+@@ -12,309 +12,7 @@
+ #include <trace/events/sunrpc_base.h>
+
+ #include <trace/events/fs.h>
+-
+-TRACE_DEFINE_ENUM(EPERM);
+-TRACE_DEFINE_ENUM(ENOENT);
+-TRACE_DEFINE_ENUM(EIO);
+-TRACE_DEFINE_ENUM(ENXIO);
+-TRACE_DEFINE_ENUM(EACCES);
+-TRACE_DEFINE_ENUM(EEXIST);
+-TRACE_DEFINE_ENUM(EXDEV);
+-TRACE_DEFINE_ENUM(ENOTDIR);
+-TRACE_DEFINE_ENUM(EISDIR);
+-TRACE_DEFINE_ENUM(EFBIG);
+-TRACE_DEFINE_ENUM(ENOSPC);
+-TRACE_DEFINE_ENUM(EROFS);
+-TRACE_DEFINE_ENUM(EMLINK);
+-TRACE_DEFINE_ENUM(ENAMETOOLONG);
+-TRACE_DEFINE_ENUM(ENOTEMPTY);
+-TRACE_DEFINE_ENUM(EDQUOT);
+-TRACE_DEFINE_ENUM(ESTALE);
+-TRACE_DEFINE_ENUM(EBADHANDLE);
+-TRACE_DEFINE_ENUM(EBADCOOKIE);
+-TRACE_DEFINE_ENUM(ENOTSUPP);
+-TRACE_DEFINE_ENUM(ETOOSMALL);
+-TRACE_DEFINE_ENUM(EREMOTEIO);
+-TRACE_DEFINE_ENUM(EBADTYPE);
+-TRACE_DEFINE_ENUM(EAGAIN);
+-TRACE_DEFINE_ENUM(ELOOP);
+-TRACE_DEFINE_ENUM(EOPNOTSUPP);
+-TRACE_DEFINE_ENUM(EDEADLK);
+-TRACE_DEFINE_ENUM(ENOMEM);
+-TRACE_DEFINE_ENUM(EKEYEXPIRED);
+-TRACE_DEFINE_ENUM(ETIMEDOUT);
+-TRACE_DEFINE_ENUM(ERESTARTSYS);
+-TRACE_DEFINE_ENUM(ECONNREFUSED);
+-TRACE_DEFINE_ENUM(ECONNRESET);
+-TRACE_DEFINE_ENUM(ENETUNREACH);
+-TRACE_DEFINE_ENUM(EHOSTUNREACH);
+-TRACE_DEFINE_ENUM(EHOSTDOWN);
+-TRACE_DEFINE_ENUM(EPIPE);
+-TRACE_DEFINE_ENUM(EPFNOSUPPORT);
+-TRACE_DEFINE_ENUM(EPROTONOSUPPORT);
+-
+-TRACE_DEFINE_ENUM(NFS4_OK);
+-TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
+-TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
+-TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
+-TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
+-TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
+-TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
+-TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
+-TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
+-TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
+-TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
+-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
+-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
+-TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
+-TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
+-TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
+-TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
+-TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
+-TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
+-TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
+-TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
+-TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
+-TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
+-TRACE_DEFINE_ENUM(NFS4ERR_IO);
+-TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
+-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
+-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
+-TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
+-TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
+-TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
+-TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
+-TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
+-TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
+-TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
+-TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
+-TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
+-TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
+-TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
+-TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
+-TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
+-TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
+-TRACE_DEFINE_ENUM(NFS4ERR_PERM);
+-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
+-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
+-TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
+-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
+-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
+-TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
+-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
+-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+-TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
+-TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
+-TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
+-TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
+-TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
+-TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
+-TRACE_DEFINE_ENUM(NFS4ERR_SAME);
+-TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
+-TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
+-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
+-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
+-TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
+-TRACE_DEFINE_ENUM(NFS4ERR_STALE);
+-TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
+-TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
+-TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
+-TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
+-TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
+-TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
+-TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
+-TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
+-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
+-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
+-TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
+-
+-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
+-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+-
+-#define show_nfsv4_errors(error) \
+- __print_symbolic(error, \
+- { NFS4_OK, "OK" }, \
+- /* Mapped by nfs4_stat_to_errno() */ \
+- { EPERM, "EPERM" }, \
+- { ENOENT, "ENOENT" }, \
+- { EIO, "EIO" }, \
+- { ENXIO, "ENXIO" }, \
+- { EACCES, "EACCES" }, \
+- { EEXIST, "EEXIST" }, \
+- { EXDEV, "EXDEV" }, \
+- { ENOTDIR, "ENOTDIR" }, \
+- { EISDIR, "EISDIR" }, \
+- { EFBIG, "EFBIG" }, \
+- { ENOSPC, "ENOSPC" }, \
+- { EROFS, "EROFS" }, \
+- { EMLINK, "EMLINK" }, \
+- { ENAMETOOLONG, "ENAMETOOLONG" }, \
+- { ENOTEMPTY, "ENOTEMPTY" }, \
+- { EDQUOT, "EDQUOT" }, \
+- { ESTALE, "ESTALE" }, \
+- { EBADHANDLE, "EBADHANDLE" }, \
+- { EBADCOOKIE, "EBADCOOKIE" }, \
+- { ENOTSUPP, "ENOTSUPP" }, \
+- { ETOOSMALL, "ETOOSMALL" }, \
+- { EREMOTEIO, "EREMOTEIO" }, \
+- { EBADTYPE, "EBADTYPE" }, \
+- { EAGAIN, "EAGAIN" }, \
+- { ELOOP, "ELOOP" }, \
+- { EOPNOTSUPP, "EOPNOTSUPP" }, \
+- { EDEADLK, "EDEADLK" }, \
+- /* RPC errors */ \
+- { ENOMEM, "ENOMEM" }, \
+- { EKEYEXPIRED, "EKEYEXPIRED" }, \
+- { ETIMEDOUT, "ETIMEDOUT" }, \
+- { ERESTARTSYS, "ERESTARTSYS" }, \
+- { ECONNREFUSED, "ECONNREFUSED" }, \
+- { ECONNRESET, "ECONNRESET" }, \
+- { ENETUNREACH, "ENETUNREACH" }, \
+- { EHOSTUNREACH, "EHOSTUNREACH" }, \
+- { EHOSTDOWN, "EHOSTDOWN" }, \
+- { EPIPE, "EPIPE" }, \
+- { EPFNOSUPPORT, "EPFNOSUPPORT" }, \
+- { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
+- /* NFSv4 native errors */ \
+- { NFS4ERR_ACCESS, "ACCESS" }, \
+- { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
+- { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
+- { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
+- { NFS4ERR_BADCHAR, "BADCHAR" }, \
+- { NFS4ERR_BADHANDLE, "BADHANDLE" }, \
+- { NFS4ERR_BADIOMODE, "BADIOMODE" }, \
+- { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
+- { NFS4ERR_BADLABEL, "BADLABEL" }, \
+- { NFS4ERR_BADNAME, "BADNAME" }, \
+- { NFS4ERR_BADOWNER, "BADOWNER" }, \
+- { NFS4ERR_BADSESSION, "BADSESSION" }, \
+- { NFS4ERR_BADSLOT, "BADSLOT" }, \
+- { NFS4ERR_BADTYPE, "BADTYPE" }, \
+- { NFS4ERR_BADXDR, "BADXDR" }, \
+- { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
+- { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
+- { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
+- { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
+- { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
+- { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
+- { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+- { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
+- { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
+- { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
+- { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \
+- "CONN_NOT_BOUND_TO_SESSION" }, \
+- { NFS4ERR_DEADLOCK, "DEADLOCK" }, \
+- { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
+- { NFS4ERR_DELAY, "DELAY" }, \
+- { NFS4ERR_DELEG_ALREADY_WANTED, \
+- "DELEG_ALREADY_WANTED" }, \
+- { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
+- { NFS4ERR_DENIED, "DENIED" }, \
+- { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
+- { NFS4ERR_DQUOT, "DQUOT" }, \
+- { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
+- { NFS4ERR_EXIST, "EXIST" }, \
+- { NFS4ERR_EXPIRED, "EXPIRED" }, \
+- { NFS4ERR_FBIG, "FBIG" }, \
+- { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
+- { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
+- { NFS4ERR_GRACE, "GRACE" }, \
+- { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
+- { NFS4ERR_INVAL, "INVAL" }, \
+- { NFS4ERR_IO, "IO" }, \
+- { NFS4ERR_ISDIR, "ISDIR" }, \
+- { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
+- { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
+- { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
+- { NFS4ERR_LOCKED, "LOCKED" }, \
+- { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
+- { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
+- { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
+- { NFS4ERR_MLINK, "MLINK" }, \
+- { NFS4ERR_MOVED, "MOVED" }, \
+- { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
+- { NFS4ERR_NOENT, "NOENT" }, \
+- { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
+- { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
+- { NFS4ERR_NOSPC, "NOSPC" }, \
+- { NFS4ERR_NOTDIR, "NOTDIR" }, \
+- { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
+- { NFS4ERR_NOTSUPP, "NOTSUPP" }, \
+- { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
+- { NFS4ERR_NOT_SAME, "NOT_SAME" }, \
+- { NFS4ERR_NO_GRACE, "NO_GRACE" }, \
+- { NFS4ERR_NXIO, "NXIO" }, \
+- { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
+- { NFS4ERR_OPENMODE, "OPENMODE" }, \
+- { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
+- { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
+- { NFS4ERR_PERM, "PERM" }, \
+- { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
+- { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
+- { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
+- { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
+- { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
+- { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
+- { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
+- { NFS4ERR_REP_TOO_BIG_TO_CACHE, \
+- "REP_TOO_BIG_TO_CACHE" }, \
+- { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
+- { NFS4ERR_RESOURCE, "RESOURCE" }, \
+- { NFS4ERR_RESTOREFH, "RESTOREFH" }, \
+- { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
+- { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
+- { NFS4ERR_ROFS, "ROFS" }, \
+- { NFS4ERR_SAME, "SAME" }, \
+- { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
+- { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
+- { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
+- { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
+- { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
+- { NFS4ERR_STALE, "STALE" }, \
+- { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
+- { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
+- { NFS4ERR_SYMLINK, "SYMLINK" }, \
+- { NFS4ERR_TOOSMALL, "TOOSMALL" }, \
+- { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
+- { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
+- { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
+- { NFS4ERR_WRONGSEC, "WRONGSEC" }, \
+- { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
+- { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
+- { NFS4ERR_XDEV, "XDEV" }, \
+- /* ***** Internal to Linux NFS client ***** */ \
+- { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
+- { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
++#include <trace/events/nfs.h>
+
+ #define show_nfs_fattr_flags(valid) \
+ __print_flags((unsigned long)valid, "|", \
+@@ -355,7 +53,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
+ TP_printk(
+ "error=%ld (%s) dstaddr=%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ __get_str(dstaddr)
+ )
+ );
+@@ -379,29 +77,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
+ DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
+ DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
+
+-#define show_nfs4_sequence_status_flags(status) \
+- __print_flags((unsigned long)status, "|", \
+- { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \
+- "CB_GSS_CONTEXTS_EXPIRING" }, \
+- { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \
+- "CB_GSS_CONTEXTS_EXPIRED" }, \
+- { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \
+- "EXPIRED_ALL_STATE_REVOKED" }, \
+- { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \
+- "EXPIRED_SOME_STATE_REVOKED" }, \
+- { SEQ4_STATUS_ADMIN_STATE_REVOKED, \
+- "ADMIN_STATE_REVOKED" }, \
+- { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, \
+- "RECALLABLE_STATE_REVOKED" }, \
+- { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
+- { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \
+- "RESTART_RECLAIM_NEEDED" }, \
+- { SEQ4_STATUS_CB_PATH_DOWN_SESSION, \
+- "CB_PATH_DOWN_SESSION" }, \
+- { SEQ4_STATUS_BACKCHANNEL_FAULT, \
+- "BACKCHANNEL_FAULT" })
+-
+ TRACE_EVENT(nfs4_sequence_done,
+ TP_PROTO(
+ const struct nfs4_session *session,
+@@ -415,7 +90,7 @@ TRACE_EVENT(nfs4_sequence_done,
+ __field(unsigned int, seq_nr)
+ __field(unsigned int, highest_slotid)
+ __field(unsigned int, target_highest_slotid)
+- __field(unsigned int, status_flags)
++ __field(unsigned long, status_flags)
+ __field(unsigned long, error)
+ ),
+
+@@ -434,16 +109,16 @@ TRACE_EVENT(nfs4_sequence_done,
+ TP_printk(
+ "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
+ "highest_slotid=%u target_highest_slotid=%u "
+- "status_flags=%u (%s)",
++ "status_flags=0x%lx (%s)",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ __entry->session,
+ __entry->slot_nr,
+ __entry->seq_nr,
+ __entry->highest_slotid,
+ __entry->target_highest_slotid,
+ __entry->status_flags,
+- show_nfs4_sequence_status_flags(__entry->status_flags)
++ show_nfs4_seq4_status(__entry->status_flags)
+ )
+ );
+
+@@ -480,7 +155,7 @@ TRACE_EVENT(nfs4_cb_sequence,
+ "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
+ "highest_slotid=%u",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ __entry->session,
+ __entry->slot_nr,
+ __entry->seq_nr,
+@@ -517,7 +192,7 @@ TRACE_EVENT(nfs4_cb_seqid_err,
+ "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
+ "highest_slotid=%u",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ __entry->session,
+ __entry->slot_nr,
+ __entry->seq_nr,
+@@ -653,7 +328,7 @@ TRACE_EVENT(nfs4_state_mgr_failed,
+ "hostname=%s clp state=%s error=%ld (%s) section=%s",
+ __get_str(hostname),
+ show_nfs4_clp_state(__entry->state), -__entry->error,
+- show_nfsv4_errors(__entry->error), __get_str(section)
++ show_nfs4_status(__entry->error), __get_str(section)
+
+ )
+ )
+@@ -724,7 +399,7 @@ DECLARE_EVENT_CLASS(nfs4_xdr_event,
+ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
+ " xid=0x%08x error=%ld (%s) operation=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+- -__entry->error, show_nfsv4_errors(__entry->error),
++ -__entry->error, show_nfs4_status(__entry->error),
+ __entry->op
+ )
+ );
+@@ -839,7 +514,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
+ "name=%02x:%02x:%llu/%s stateid=%d:0x%08x "
+ "openstateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ __entry->flags,
+ show_fs_fcntl_open_flags(__entry->flags),
+ show_fs_fmode_flags(__entry->fmode),
+@@ -943,7 +618,7 @@ TRACE_EVENT(nfs4_close,
+ "error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu "
+ "fhandle=0x%08x openstateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ __entry->fmode ? show_fs_fmode_flags(__entry->fmode) :
+ "closed",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+@@ -998,7 +673,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ show_fs_fcntl_cmd(__entry->cmd),
+ show_fs_fcntl_lock_type(__entry->type),
+ (long long)__entry->start,
+@@ -1074,7 +749,7 @@ TRACE_EVENT(nfs4_set_lock,
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x lockstateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ show_fs_fcntl_cmd(__entry->cmd),
+ show_fs_fcntl_lock_type(__entry->type),
+ (long long)__entry->start,
+@@ -1240,7 +915,7 @@ TRACE_EVENT(nfs4_delegreturn_exit,
+ "error=%ld (%s) dev=%02x:%02x fhandle=0x%08x "
+ "stateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash
+@@ -1283,7 +958,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1330,7 +1005,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event,
+ TP_printk(
+ "error=%ld (%s) name=%02x:%02x:%llu/%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -1377,7 +1052,7 @@ TRACE_EVENT(nfs4_lookupp,
+ TP_printk(
+ "error=%ld (%s) inode=%02x:%02x:%llu",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->ino
+ )
+@@ -1416,7 +1091,7 @@ TRACE_EVENT(nfs4_rename,
+ "error=%ld (%s) oldname=%02x:%02x:%llu/%s "
+ "newname=%02x:%02x:%llu/%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->olddir,
+ __get_str(oldname),
+@@ -1451,7 +1126,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_event,
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle
+@@ -1509,7 +1184,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1562,7 +1237,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "valid=%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1618,7 +1293,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "dstaddr=%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1679,7 +1354,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x dstaddr=%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1728,7 +1403,7 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event,
+
+ TP_printk(
+ "error=%ld (%s) id=%u name=%s",
+- -__entry->error, show_nfsv4_errors(__entry->error),
++ -__entry->error, show_nfs4_status(__entry->error),
+ __entry->id,
+ __get_str(name)
+ )
+@@ -1806,7 +1481,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
+ "offset=%lld count=%u res=%u stateid=%d:0x%08x "
+ "layoutstateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1880,7 +1555,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
+ "offset=%lld count=%u res=%u stateid=%d:0x%08x "
+ "layoutstateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1944,7 +1619,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u layoutstateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -1964,16 +1639,6 @@ DEFINE_NFS4_COMMIT_EVENT(nfs4_commit);
+ #ifdef CONFIG_NFS_V4_1
+ DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds);
+
+-TRACE_DEFINE_ENUM(IOMODE_READ);
+-TRACE_DEFINE_ENUM(IOMODE_RW);
+-TRACE_DEFINE_ENUM(IOMODE_ANY);
+-
+-#define show_pnfs_iomode(iomode) \
+- __print_symbolic(iomode, \
+- { IOMODE_READ, "READ" }, \
+- { IOMODE_RW, "RW" }, \
+- { IOMODE_ANY, "ANY" })
+-
+ TRACE_EVENT(nfs4_layoutget,
+ TP_PROTO(
+ const struct nfs_open_context *ctx,
+@@ -2029,11 +1694,11 @@ TRACE_EVENT(nfs4_layoutget,
+ "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x "
+ "layoutstateid=%d:0x%08x",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+- show_pnfs_iomode(__entry->iomode),
++ show_pnfs_layout_iomode(__entry->iomode),
+ (unsigned long long)__entry->offset,
+ (unsigned long long)__entry->count,
+ __entry->stateid_seq, __entry->stateid_hash,
+@@ -2127,7 +1792,7 @@ TRACE_EVENT(pnfs_update_layout,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+- show_pnfs_iomode(__entry->iomode),
++ show_pnfs_layout_iomode(__entry->iomode),
+ (unsigned long long)__entry->pos,
+ (unsigned long long)__entry->count,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash,
+@@ -2181,7 +1846,7 @@ DECLARE_EVENT_CLASS(pnfs_layout_event,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+- show_pnfs_iomode(__entry->iomode),
++ show_pnfs_layout_iomode(__entry->iomode),
+ (unsigned long long)__entry->pos,
+ (unsigned long long)__entry->count,
+ __entry->layoutstateid_seq, __entry->layoutstateid_hash,
+@@ -2326,7 +1991,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -2382,7 +2047,7 @@ TRACE_EVENT(ff_layout_commit_error,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%llu count=%u dstaddr=%s",
+ -__entry->error,
+- show_nfsv4_errors(__entry->error),
++ show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
+index 918237677a383..f6013d3b110b8 100644
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -12,6 +12,7 @@
+ #include <linux/iversion.h>
+
+ #include <trace/events/fs.h>
++#include <trace/events/nfs.h>
+ #include <trace/events/sunrpc_base.h>
+
+ #define nfs_show_cache_validity(v) \
+@@ -115,7 +116,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done,
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "type=%u (%s) version=%llu size=%lld "
+ "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -206,7 +207,7 @@ TRACE_EVENT(nfs_access_exit,
+ "type=%u (%s) version=%llu size=%lld "
+ "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) "
+ "mask=0x%x permitted=0x%x",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+@@ -292,7 +293,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
+
+ TP_printk(
+ "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ __entry->flags,
+ show_fs_lookup_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+@@ -383,7 +384,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
+ TP_printk(
+ "error=%ld (%s) flags=0x%lx (%s) fmode=%s "
+ "name=%02x:%02x:%llu/%s",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ __entry->flags,
+ show_fs_fcntl_open_flags(__entry->flags),
+ show_fs_fmode_flags(__entry->fmode),
+@@ -454,7 +455,7 @@ TRACE_EVENT(nfs_create_exit,
+
+ TP_printk(
+ "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ __entry->flags,
+ show_fs_fcntl_open_flags(__entry->flags),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+@@ -524,7 +525,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done,
+
+ TP_printk(
+ "error=%ld (%s) name=%02x:%02x:%llu/%s",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -614,7 +615,7 @@ TRACE_EVENT(nfs_link_exit,
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fileid,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+@@ -701,7 +702,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
+ TP_printk(
+ "error=%ld (%s) old_name=%02x:%02x:%llu/%s "
+ "new_name=%02x:%02x:%llu/%s",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->old_dir,
+ __get_str(old_name),
+@@ -755,7 +756,7 @@ TRACE_EVENT(nfs_sillyrename_unlink,
+
+ TP_printk(
+ "error=%ld (%s) name=%02x:%02x:%llu/%s",
+- -__entry->error, nfs_show_status(__entry->error),
++ -__entry->error, show_nfs_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->dir,
+ __get_str(name)
+@@ -938,16 +939,6 @@ TRACE_EVENT(nfs_pgio_error,
+ )
+ );
+
+-TRACE_DEFINE_ENUM(NFS_UNSTABLE);
+-TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
+-TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
+-
+-#define nfs_show_stable(stable) \
+- __print_symbolic(stable, \
+- { NFS_UNSTABLE, "UNSTABLE" }, \
+- { NFS_DATA_SYNC, "DATA_SYNC" }, \
+- { NFS_FILE_SYNC, "FILE_SYNC" })
+-
+ TRACE_EVENT(nfs_initiate_write,
+ TP_PROTO(
+ const struct nfs_pgio_header *hdr
+@@ -961,7 +952,7 @@ TRACE_EVENT(nfs_initiate_write,
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
+- __field(enum nfs3_stable_how, stable)
++ __field(unsigned long, stable)
+ ),
+
+ TP_fast_assign(
+@@ -985,7 +976,7 @@ TRACE_EVENT(nfs_initiate_write,
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset, __entry->count,
+- nfs_show_stable(__entry->stable)
++ show_nfs_stable_how(__entry->stable)
+ )
+ );
+
+@@ -1005,7 +996,7 @@ TRACE_EVENT(nfs_writeback_done,
+ __field(u32, arg_count)
+ __field(u32, res_count)
+ __field(int, status)
+- __field(enum nfs3_stable_how, stable)
++ __field(unsigned long, stable)
+ __array(char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+
+@@ -1038,8 +1029,8 @@ TRACE_EVENT(nfs_writeback_done,
+ __entry->fhandle,
+ (long long)__entry->offset, __entry->arg_count,
+ __entry->res_count, __entry->status,
+- nfs_show_stable(__entry->stable),
+- __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
++ show_nfs_stable_how(__entry->stable),
++ show_nfs4_verifier(__entry->verifier)
+ )
+ );
+
+@@ -1140,7 +1131,7 @@ TRACE_EVENT(nfs_commit_done,
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(int, status)
+- __field(enum nfs3_stable_how, stable)
++ __field(unsigned long, stable)
+ __array(char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+
+@@ -1169,8 +1160,8 @@ TRACE_EVENT(nfs_commit_done,
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset, __entry->status,
+- nfs_show_stable(__entry->stable),
+- __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
++ show_nfs_stable_how(__entry->stable),
++ show_nfs4_verifier(__entry->verifier)
+ )
+ );
+
+@@ -1207,76 +1198,6 @@ TRACE_EVENT(nfs_fh_to_dentry,
+ )
+ );
+
+-TRACE_DEFINE_ENUM(NFS_OK);
+-TRACE_DEFINE_ENUM(NFSERR_PERM);
+-TRACE_DEFINE_ENUM(NFSERR_NOENT);
+-TRACE_DEFINE_ENUM(NFSERR_IO);
+-TRACE_DEFINE_ENUM(NFSERR_NXIO);
+-TRACE_DEFINE_ENUM(ECHILD);
+-TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
+-TRACE_DEFINE_ENUM(NFSERR_ACCES);
+-TRACE_DEFINE_ENUM(NFSERR_EXIST);
+-TRACE_DEFINE_ENUM(NFSERR_XDEV);
+-TRACE_DEFINE_ENUM(NFSERR_NODEV);
+-TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
+-TRACE_DEFINE_ENUM(NFSERR_ISDIR);
+-TRACE_DEFINE_ENUM(NFSERR_INVAL);
+-TRACE_DEFINE_ENUM(NFSERR_FBIG);
+-TRACE_DEFINE_ENUM(NFSERR_NOSPC);
+-TRACE_DEFINE_ENUM(NFSERR_ROFS);
+-TRACE_DEFINE_ENUM(NFSERR_MLINK);
+-TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
+-TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
+-TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
+-TRACE_DEFINE_ENUM(NFSERR_DQUOT);
+-TRACE_DEFINE_ENUM(NFSERR_STALE);
+-TRACE_DEFINE_ENUM(NFSERR_REMOTE);
+-TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
+-TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
+-TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
+-TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
+-TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
+-TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
+-TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
+-TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
+-TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
+-
+-#define nfs_show_status(x) \
+- __print_symbolic(x, \
+- { NFS_OK, "OK" }, \
+- { NFSERR_PERM, "PERM" }, \
+- { NFSERR_NOENT, "NOENT" }, \
+- { NFSERR_IO, "IO" }, \
+- { NFSERR_NXIO, "NXIO" }, \
+- { ECHILD, "CHILD" }, \
+- { NFSERR_EAGAIN, "AGAIN" }, \
+- { NFSERR_ACCES, "ACCES" }, \
+- { NFSERR_EXIST, "EXIST" }, \
+- { NFSERR_XDEV, "XDEV" }, \
+- { NFSERR_NODEV, "NODEV" }, \
+- { NFSERR_NOTDIR, "NOTDIR" }, \
+- { NFSERR_ISDIR, "ISDIR" }, \
+- { NFSERR_INVAL, "INVAL" }, \
+- { NFSERR_FBIG, "FBIG" }, \
+- { NFSERR_NOSPC, "NOSPC" }, \
+- { NFSERR_ROFS, "ROFS" }, \
+- { NFSERR_MLINK, "MLINK" }, \
+- { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \
+- { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
+- { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
+- { NFSERR_DQUOT, "DQUOT" }, \
+- { NFSERR_STALE, "STALE" }, \
+- { NFSERR_REMOTE, "REMOTE" }, \
+- { NFSERR_WFLUSH, "WFLUSH" }, \
+- { NFSERR_BADHANDLE, "BADHANDLE" }, \
+- { NFSERR_NOT_SYNC, "NOTSYNC" }, \
+- { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
+- { NFSERR_NOTSUPP, "NOTSUPP" }, \
+- { NFSERR_TOOSMALL, "TOOSMALL" }, \
+- { NFSERR_SERVERFAULT, "REMOTEIO" }, \
+- { NFSERR_BADTYPE, "BADTYPE" }, \
+- { NFSERR_JUKEBOX, "JUKEBOX" })
+-
+ DECLARE_EVENT_CLASS(nfs_xdr_event,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+@@ -1316,7 +1237,7 @@ DECLARE_EVENT_CLASS(nfs_xdr_event,
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __get_str(program), __entry->version,
+ __get_str(procedure), -__entry->error,
+- nfs_show_status(__entry->error)
++ show_nfs_status(__entry->error)
+ )
+ );
+ #define DEFINE_NFS_XDR_EVENT(name) \
+diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
+index 3307361c79560..f331f067691b0 100644
+--- a/fs/nfs/pnfs.h
++++ b/fs/nfs/pnfs.h
+@@ -82,10 +82,6 @@ enum pnfs_try_status {
+ PNFS_TRY_AGAIN = 2,
+ };
+
+-/* error codes for internal use */
+-#define NFS4ERR_RESET_TO_MDS 12001
+-#define NFS4ERR_RESET_TO_PNFS 12002
+-
+ #ifdef CONFIG_NFS_V4_1
+
+ #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 7e7d0f1705ed3..61943a629cdee 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -9,6 +9,7 @@
+ #define _NFSD_TRACE_H
+
+ #include <linux/tracepoint.h>
++
+ #include "export.h"
+ #include "nfsfh.h"
+
+diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
+index 15004c469807d..5662d8be04eb0 100644
+--- a/include/linux/nfs4.h
++++ b/include/linux/nfs4.h
+@@ -292,6 +292,10 @@ enum nfsstat4 {
+ NFS4ERR_XATTR2BIG = 10096,
+ };
+
++/* error codes for internal client use */
++#define NFS4ERR_RESET_TO_MDS 12001
++#define NFS4ERR_RESET_TO_PNFS 12002
++
+ static inline bool seqid_mutating_err(u32 err)
+ {
+ /* See RFC 7530, section 9.1.7 */
+diff --git a/include/trace/events/nfs.h b/include/trace/events/nfs.h
+new file mode 100644
+index 0000000000000..09ffdbb04134d
+--- /dev/null
++++ b/include/trace/events/nfs.h
+@@ -0,0 +1,375 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Display helpers for NFS protocol elements
++ *
++ * Author: Chuck Lever <chuck.lever@oracle.com>
++ *
++ * Copyright (c) 2020, Oracle and/or its affiliates.
++ */
++
++#include <linux/nfs.h>
++#include <linux/nfs4.h>
++#include <uapi/linux/nfs.h>
++
++TRACE_DEFINE_ENUM(NFS_OK);
++TRACE_DEFINE_ENUM(NFSERR_PERM);
++TRACE_DEFINE_ENUM(NFSERR_NOENT);
++TRACE_DEFINE_ENUM(NFSERR_IO);
++TRACE_DEFINE_ENUM(NFSERR_NXIO);
++TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
++TRACE_DEFINE_ENUM(NFSERR_ACCES);
++TRACE_DEFINE_ENUM(NFSERR_EXIST);
++TRACE_DEFINE_ENUM(NFSERR_XDEV);
++TRACE_DEFINE_ENUM(NFSERR_NODEV);
++TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
++TRACE_DEFINE_ENUM(NFSERR_ISDIR);
++TRACE_DEFINE_ENUM(NFSERR_INVAL);
++TRACE_DEFINE_ENUM(NFSERR_FBIG);
++TRACE_DEFINE_ENUM(NFSERR_NOSPC);
++TRACE_DEFINE_ENUM(NFSERR_ROFS);
++TRACE_DEFINE_ENUM(NFSERR_MLINK);
++TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
++TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
++TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
++TRACE_DEFINE_ENUM(NFSERR_DQUOT);
++TRACE_DEFINE_ENUM(NFSERR_STALE);
++TRACE_DEFINE_ENUM(NFSERR_REMOTE);
++TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
++TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
++TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
++TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
++TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
++TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
++TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
++TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
++TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
++
++#define show_nfs_status(x) \
++ __print_symbolic(x, \
++ { NFS_OK, "OK" }, \
++ { NFSERR_PERM, "PERM" }, \
++ { NFSERR_NOENT, "NOENT" }, \
++ { NFSERR_IO, "IO" }, \
++ { NFSERR_NXIO, "NXIO" }, \
++ { ECHILD, "CHILD" }, \
++ { NFSERR_EAGAIN, "AGAIN" }, \
++ { NFSERR_ACCES, "ACCES" }, \
++ { NFSERR_EXIST, "EXIST" }, \
++ { NFSERR_XDEV, "XDEV" }, \
++ { NFSERR_NODEV, "NODEV" }, \
++ { NFSERR_NOTDIR, "NOTDIR" }, \
++ { NFSERR_ISDIR, "ISDIR" }, \
++ { NFSERR_INVAL, "INVAL" }, \
++ { NFSERR_FBIG, "FBIG" }, \
++ { NFSERR_NOSPC, "NOSPC" }, \
++ { NFSERR_ROFS, "ROFS" }, \
++ { NFSERR_MLINK, "MLINK" }, \
++ { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \
++ { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
++ { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
++ { NFSERR_DQUOT, "DQUOT" }, \
++ { NFSERR_STALE, "STALE" }, \
++ { NFSERR_REMOTE, "REMOTE" }, \
++ { NFSERR_WFLUSH, "WFLUSH" }, \
++ { NFSERR_BADHANDLE, "BADHANDLE" }, \
++ { NFSERR_NOT_SYNC, "NOTSYNC" }, \
++ { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
++ { NFSERR_NOTSUPP, "NOTSUPP" }, \
++ { NFSERR_TOOSMALL, "TOOSMALL" }, \
++ { NFSERR_SERVERFAULT, "REMOTEIO" }, \
++ { NFSERR_BADTYPE, "BADTYPE" }, \
++ { NFSERR_JUKEBOX, "JUKEBOX" })
++
++TRACE_DEFINE_ENUM(NFS_UNSTABLE);
++TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
++TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
++
++#define show_nfs_stable_how(x) \
++ __print_symbolic(x, \
++ { NFS_UNSTABLE, "UNSTABLE" }, \
++ { NFS_DATA_SYNC, "DATA_SYNC" }, \
++ { NFS_FILE_SYNC, "FILE_SYNC" })
++
++TRACE_DEFINE_ENUM(NFS4_OK);
++TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
++TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
++TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
++TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
++TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
++TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
++TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
++TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
++TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
++TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
++TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
++TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
++TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
++TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
++TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
++TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
++TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
++TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
++TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
++TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
++TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
++TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
++TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
++TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
++TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
++TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
++TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
++TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
++TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
++TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
++TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
++TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
++TRACE_DEFINE_ENUM(NFS4ERR_IO);
++TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
++TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
++TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
++TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
++TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
++TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
++TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
++TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
++TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
++TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
++TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
++TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
++TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
++TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
++TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
++TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
++TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
++TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
++TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
++TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
++TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
++TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
++TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
++TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
++TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
++TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
++TRACE_DEFINE_ENUM(NFS4ERR_PERM);
++TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
++TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
++TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
++TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
++TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
++TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
++TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
++TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
++TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
++TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
++TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
++TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
++TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
++TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
++TRACE_DEFINE_ENUM(NFS4ERR_SAME);
++TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
++TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
++TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
++TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
++TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
++TRACE_DEFINE_ENUM(NFS4ERR_STALE);
++TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
++TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
++TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
++TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
++TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
++TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
++TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
++TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
++TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
++TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
++TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
++
++TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
++TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
++
++#define show_nfs4_status(x) \
++ __print_symbolic(x, \
++ { NFS4_OK, "OK" }, \
++ { EPERM, "EPERM" }, \
++ { ENOENT, "ENOENT" }, \
++ { EIO, "EIO" }, \
++ { ENXIO, "ENXIO" }, \
++ { EACCES, "EACCES" }, \
++ { EEXIST, "EEXIST" }, \
++ { EXDEV, "EXDEV" }, \
++ { ENOTDIR, "ENOTDIR" }, \
++ { EISDIR, "EISDIR" }, \
++ { EFBIG, "EFBIG" }, \
++ { ENOSPC, "ENOSPC" }, \
++ { EROFS, "EROFS" }, \
++ { EMLINK, "EMLINK" }, \
++ { ENAMETOOLONG, "ENAMETOOLONG" }, \
++ { ENOTEMPTY, "ENOTEMPTY" }, \
++ { EDQUOT, "EDQUOT" }, \
++ { ESTALE, "ESTALE" }, \
++ { EBADHANDLE, "EBADHANDLE" }, \
++ { EBADCOOKIE, "EBADCOOKIE" }, \
++ { ENOTSUPP, "ENOTSUPP" }, \
++ { ETOOSMALL, "ETOOSMALL" }, \
++ { EREMOTEIO, "EREMOTEIO" }, \
++ { EBADTYPE, "EBADTYPE" }, \
++ { EAGAIN, "EAGAIN" }, \
++ { ELOOP, "ELOOP" }, \
++ { EOPNOTSUPP, "EOPNOTSUPP" }, \
++ { EDEADLK, "EDEADLK" }, \
++ { ENOMEM, "ENOMEM" }, \
++ { EKEYEXPIRED, "EKEYEXPIRED" }, \
++ { ETIMEDOUT, "ETIMEDOUT" }, \
++ { ERESTARTSYS, "ERESTARTSYS" }, \
++ { ECONNREFUSED, "ECONNREFUSED" }, \
++ { ECONNRESET, "ECONNRESET" }, \
++ { ENETUNREACH, "ENETUNREACH" }, \
++ { EHOSTUNREACH, "EHOSTUNREACH" }, \
++ { EHOSTDOWN, "EHOSTDOWN" }, \
++ { EPIPE, "EPIPE" }, \
++ { EPFNOSUPPORT, "EPFNOSUPPORT" }, \
++ { EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
++ { NFS4ERR_ACCESS, "ACCESS" }, \
++ { NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
++ { NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
++ { NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
++ { NFS4ERR_BADCHAR, "BADCHAR" }, \
++ { NFS4ERR_BADHANDLE, "BADHANDLE" }, \
++ { NFS4ERR_BADIOMODE, "BADIOMODE" }, \
++ { NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
++ { NFS4ERR_BADLABEL, "BADLABEL" }, \
++ { NFS4ERR_BADNAME, "BADNAME" }, \
++ { NFS4ERR_BADOWNER, "BADOWNER" }, \
++ { NFS4ERR_BADSESSION, "BADSESSION" }, \
++ { NFS4ERR_BADSLOT, "BADSLOT" }, \
++ { NFS4ERR_BADTYPE, "BADTYPE" }, \
++ { NFS4ERR_BADXDR, "BADXDR" }, \
++ { NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
++ { NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
++ { NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
++ { NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
++ { NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
++ { NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
++ { NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
++ { NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
++ { NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
++ { NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
++ { NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \
++ { NFS4ERR_DEADLOCK, "DEADLOCK" }, \
++ { NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
++ { NFS4ERR_DELAY, "DELAY" }, \
++ { NFS4ERR_DELEG_ALREADY_WANTED, "DELEG_ALREADY_WANTED" }, \
++ { NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
++ { NFS4ERR_DENIED, "DENIED" }, \
++ { NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
++ { NFS4ERR_DQUOT, "DQUOT" }, \
++ { NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
++ { NFS4ERR_EXIST, "EXIST" }, \
++ { NFS4ERR_EXPIRED, "EXPIRED" }, \
++ { NFS4ERR_FBIG, "FBIG" }, \
++ { NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
++ { NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
++ { NFS4ERR_GRACE, "GRACE" }, \
++ { NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
++ { NFS4ERR_INVAL, "INVAL" }, \
++ { NFS4ERR_IO, "IO" }, \
++ { NFS4ERR_ISDIR, "ISDIR" }, \
++ { NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
++ { NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
++ { NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
++ { NFS4ERR_LOCKED, "LOCKED" }, \
++ { NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
++ { NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
++ { NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
++ { NFS4ERR_MLINK, "MLINK" }, \
++ { NFS4ERR_MOVED, "MOVED" }, \
++ { NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
++ { NFS4ERR_NOENT, "NOENT" }, \
++ { NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
++ { NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
++ { NFS4ERR_NOSPC, "NOSPC" }, \
++ { NFS4ERR_NOTDIR, "NOTDIR" }, \
++ { NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
++ { NFS4ERR_NOTSUPP, "NOTSUPP" }, \
++ { NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
++ { NFS4ERR_NOT_SAME, "NOT_SAME" }, \
++ { NFS4ERR_NO_GRACE, "NO_GRACE" }, \
++ { NFS4ERR_NXIO, "NXIO" }, \
++ { NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
++ { NFS4ERR_OPENMODE, "OPENMODE" }, \
++ { NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
++ { NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
++ { NFS4ERR_PERM, "PERM" }, \
++ { NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
++ { NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
++ { NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
++ { NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
++ { NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
++ { NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
++ { NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
++ { NFS4ERR_REP_TOO_BIG_TO_CACHE, "REP_TOO_BIG_TO_CACHE" }, \
++ { NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
++ { NFS4ERR_RESOURCE, "RESOURCE" }, \
++ { NFS4ERR_RESTOREFH, "RESTOREFH" }, \
++ { NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
++ { NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
++ { NFS4ERR_ROFS, "ROFS" }, \
++ { NFS4ERR_SAME, "SAME" }, \
++ { NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
++ { NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
++ { NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
++ { NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
++ { NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
++ { NFS4ERR_STALE, "STALE" }, \
++ { NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
++ { NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
++ { NFS4ERR_SYMLINK, "SYMLINK" }, \
++ { NFS4ERR_TOOSMALL, "TOOSMALL" }, \
++ { NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
++ { NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
++ { NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
++ { NFS4ERR_WRONGSEC, "WRONGSEC" }, \
++ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
++ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
++ { NFS4ERR_XDEV, "XDEV" }, \
++ /* ***** Internal to Linux NFS client ***** */ \
++ { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
++ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
++
++#define show_nfs4_verifier(x) \
++ __print_hex_str(x, NFS4_VERIFIER_SIZE)
++
++TRACE_DEFINE_ENUM(IOMODE_READ);
++TRACE_DEFINE_ENUM(IOMODE_RW);
++TRACE_DEFINE_ENUM(IOMODE_ANY);
++
++#define show_pnfs_layout_iomode(x) \
++ __print_symbolic(x, \
++ { IOMODE_READ, "READ" }, \
++ { IOMODE_RW, "RW" }, \
++ { IOMODE_ANY, "ANY" })
++
++#define show_nfs4_seq4_status(x) \
++ __print_flags(x, "|", \
++ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
++ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, "CB_GSS_CONTEXTS_EXPIRING" }, \
++ { SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, "CB_GSS_CONTEXTS_EXPIRED" }, \
++ { SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \
++ { SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \
++ { SEQ4_STATUS_ADMIN_STATE_REVOKED, "ADMIN_STATE_REVOKED" }, \
++ { SEQ4_STATUS_RECALLABLE_STATE_REVOKED, "RECALLABLE_STATE_REVOKED" }, \
++ { SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
++ { SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
++ { SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
++ { SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
+--
+2.43.0
+
--- /dev/null
+From 22d8f1115cda79ca69d597de3cc3c49ed4787192 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Oct 2021 10:09:57 -0400
+Subject: NFS: Remove unnecessary TRACE_DEFINE_ENUM()s
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 8e09650f5ec68858f4b8b67cdef9e2ece9b208f3 ]
+
+Clean up: TRACE_DEFINE_ENUM is unnecessary because the target
+symbols are all C macros, not enums.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/nfstrace.h | 68 -----------------------------------------------
+ 1 file changed, 68 deletions(-)
+
+diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
+index 8a224871be74c..589f32fdbe637 100644
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -11,16 +11,6 @@
+ #include <linux/tracepoint.h>
+ #include <linux/iversion.h>
+
+-TRACE_DEFINE_ENUM(DT_UNKNOWN);
+-TRACE_DEFINE_ENUM(DT_FIFO);
+-TRACE_DEFINE_ENUM(DT_CHR);
+-TRACE_DEFINE_ENUM(DT_DIR);
+-TRACE_DEFINE_ENUM(DT_BLK);
+-TRACE_DEFINE_ENUM(DT_REG);
+-TRACE_DEFINE_ENUM(DT_LNK);
+-TRACE_DEFINE_ENUM(DT_SOCK);
+-TRACE_DEFINE_ENUM(DT_WHT);
+-
+ #define nfs_show_file_type(ftype) \
+ __print_symbolic(ftype, \
+ { DT_UNKNOWN, "UNKNOWN" }, \
+@@ -33,24 +23,6 @@ TRACE_DEFINE_ENUM(DT_WHT);
+ { DT_SOCK, "SOCK" }, \
+ { DT_WHT, "WHT" })
+
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_DATA);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_ATIME);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACCESS);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACL);
+-TRACE_DEFINE_ENUM(NFS_INO_REVAL_PAGECACHE);
+-TRACE_DEFINE_ENUM(NFS_INO_REVAL_FORCED);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_LABEL);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_CHANGE);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
+-TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE);
+-
+ #define nfs_show_cache_validity(v) \
+ __print_flags(v, "|", \
+ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \
+@@ -71,17 +43,6 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE);
+ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
+ { NFS_INO_INVALID_MODE, "INVALID_MODE" })
+
+-TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
+-TRACE_DEFINE_ENUM(NFS_INO_STALE);
+-TRACE_DEFINE_ENUM(NFS_INO_ACL_LRU_SET);
+-TRACE_DEFINE_ENUM(NFS_INO_INVALIDATING);
+-TRACE_DEFINE_ENUM(NFS_INO_FSCACHE);
+-TRACE_DEFINE_ENUM(NFS_INO_FSCACHE_LOCK);
+-TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMIT);
+-TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMITTING);
+-TRACE_DEFINE_ENUM(NFS_INO_LAYOUTSTATS);
+-TRACE_DEFINE_ENUM(NFS_INO_ODIRECT);
+-
+ #define nfs_show_nfsi_flags(v) \
+ __print_flags(v, "|", \
+ { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \
+@@ -270,19 +231,6 @@ TRACE_EVENT(nfs_access_exit,
+ )
+ );
+
+-TRACE_DEFINE_ENUM(LOOKUP_FOLLOW);
+-TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY);
+-TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT);
+-TRACE_DEFINE_ENUM(LOOKUP_PARENT);
+-TRACE_DEFINE_ENUM(LOOKUP_REVAL);
+-TRACE_DEFINE_ENUM(LOOKUP_RCU);
+-TRACE_DEFINE_ENUM(LOOKUP_OPEN);
+-TRACE_DEFINE_ENUM(LOOKUP_CREATE);
+-TRACE_DEFINE_ENUM(LOOKUP_EXCL);
+-TRACE_DEFINE_ENUM(LOOKUP_RENAME_TARGET);
+-TRACE_DEFINE_ENUM(LOOKUP_EMPTY);
+-TRACE_DEFINE_ENUM(LOOKUP_DOWN);
+-
+ #define show_lookup_flags(flags) \
+ __print_flags(flags, "|", \
+ { LOOKUP_FOLLOW, "FOLLOW" }, \
+@@ -392,22 +340,6 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit);
+ DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter);
+ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit);
+
+-TRACE_DEFINE_ENUM(O_WRONLY);
+-TRACE_DEFINE_ENUM(O_RDWR);
+-TRACE_DEFINE_ENUM(O_CREAT);
+-TRACE_DEFINE_ENUM(O_EXCL);
+-TRACE_DEFINE_ENUM(O_NOCTTY);
+-TRACE_DEFINE_ENUM(O_TRUNC);
+-TRACE_DEFINE_ENUM(O_APPEND);
+-TRACE_DEFINE_ENUM(O_NONBLOCK);
+-TRACE_DEFINE_ENUM(O_DSYNC);
+-TRACE_DEFINE_ENUM(O_DIRECT);
+-TRACE_DEFINE_ENUM(O_LARGEFILE);
+-TRACE_DEFINE_ENUM(O_DIRECTORY);
+-TRACE_DEFINE_ENUM(O_NOFOLLOW);
+-TRACE_DEFINE_ENUM(O_NOATIME);
+-TRACE_DEFINE_ENUM(O_CLOEXEC);
+-
+ #define show_open_flags(flags) \
+ __print_flags(flags, "|", \
+ { O_WRONLY, "O_WRONLY" }, \
+--
+2.43.0
+
--- /dev/null
+From 3874f3f60b04816f701d748eded2435f7ac15115 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 14:47:34 +1000
+Subject: NFS: restore module put when manager exits.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 080abad71e99d2becf38c978572982130b927a28 ]
+
+Commit f49169c97fce ("NFSD: Remove svc_serv_ops::svo_module") removed
+calls to module_put_and_kthread_exit() from threads that acted as SUNRPC
+servers and had a related svc_serv_ops structure. This was correct.
+
+It ALSO removed the module_put_and_kthread_exit() call from
+nfs4_run_state_manager() which is NOT a SUNRPC service.
+
+Consequently every time the NFSv4 state manager runs the module count
+increments and won't be decremented. So the nfsv4 module cannot be
+unloaded.
+
+So restore the module_put_and_kthread_exit() call.
+
+Fixes: f49169c97fce ("NFSD: Remove svc_serv_ops::svo_module")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/nfs4state.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index 61050ffac93ef..d7868cc527805 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -2766,5 +2766,6 @@ static int nfs4_run_state_manager(void *ptr)
+ goto again;
+
+ nfs_put_client(clp);
++ module_put_and_kthread_exit(0);
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 3e74773839445b1705b01c04f29e88b8cfe4db63 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: NFS: switch the callback service back to non-pooled.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 23a1a573c61ccb5e7829c1f5472d3e025293a031 ]
+
+Now that thread management is consistent there is no need for
+nfs-callback to use svc_create_pooled() as introduced in Commit
+df807fffaabd ("NFSv4.x/callback: Create the callback service through
+svc_create_pooled"). So switch back to svc_create().
+
+If service pools were configured, but the number of threads were left at
+'1', nfs callback may not work reliably when svc_create_pooled() is used.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/callback.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 422055a1092f0..054cc1255fac6 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -286,7 +286,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
+ printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
+ cb_info->users);
+
+- serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
++ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+ if (!serv) {
+ printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
+ return ERR_PTR(-ENOMEM);
+--
+2.43.0
+
--- /dev/null
+From eb378d8055358a76c395c512572d23a7f21a0c06 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:14:00 -0400
+Subject: NFSD: Add a mechanism to wait for a DELEGRETURN
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c035362eb935fe9381d9d1cc453bc2a37460e24c ]
+
+Subsequent patches will use this mechanism to wake up an operation
+that is waiting for a client to return a delegation.
+
+The new tracepoint records whether the wait timed out or was
+properly awoken by the expected DELEGRETURN:
+
+ nfsd-1155 [002] 83799.493199: nfsd_delegret_wakeup: xid=0x14b7d6ef fh_hash=0xf6826792 (timed out)
+
+Suggested-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 30 ++++++++++++++++++++++++++++++
+ fs/nfsd/nfsd.h | 7 +++++++
+ fs/nfsd/trace.h | 23 +++++++++++++++++++++++
+ 3 files changed, 60 insertions(+)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index a0c5658599ead..6cb654e308787 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4734,6 +4734,35 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+ return ret;
+ }
+
++static bool nfsd4_deleg_present(const struct inode *inode)
++{
++ struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
++
++ return ctx && !list_empty_careful(&ctx->flc_lease);
++}
++
++/**
++ * nfsd_wait_for_delegreturn - wait for delegations to be returned
++ * @rqstp: the RPC transaction being executed
++ * @inode: in-core inode of the file being waited for
++ *
++ * The timeout prevents deadlock if all nfsd threads happen to be
++ * tied up waiting for returning delegations.
++ *
++ * Return values:
++ * %true: delegation was returned
++ * %false: timed out waiting for delegreturn
++ */
++bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode)
++{
++ long __maybe_unused timeo;
++
++ timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode),
++ NFSD_DELEGRETURN_TIMEOUT);
++ trace_nfsd_delegret_wakeup(rqstp, inode, timeo);
++ return timeo > 0;
++}
++
+ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
+ {
+ struct nfs4_delegation *dp = cb_to_delegation(cb);
+@@ -6797,6 +6826,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (status)
+ goto put_stateid;
+
++ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
+ destroy_delegation(dp);
+ put_stateid:
+ nfs4_put_stid(&dp->dl_stid);
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 57a468ed85c35..6ab4ad41ae84e 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -164,6 +164,7 @@ char * nfs4_recoverydir(void);
+ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
+ int nfsd4_create_laundry_wq(void);
+ void nfsd4_destroy_laundry_wq(void);
++bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode);
+ #else
+ static inline int nfsd4_init_slabs(void) { return 0; }
+ static inline void nfsd4_free_slabs(void) { }
+@@ -179,6 +180,11 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
+ }
+ static inline int nfsd4_create_laundry_wq(void) { return 0; };
+ static inline void nfsd4_destroy_laundry_wq(void) {};
++static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp,
++ struct inode *inode)
++{
++ return false;
++}
+ #endif
+
+ /*
+@@ -343,6 +349,7 @@ void nfsd_lockd_shutdown(void);
+ #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
+ #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
+ #define NFS4_CLIENTS_PER_GB 1024
++#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
+
+ /*
+ * The following attributes are currently not supported by the NFSv4 server:
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index d832429e575e4..1229502b6e9e0 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -538,6 +538,29 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
+ #include "filecache.h"
+ #include "vfs.h"
+
++TRACE_EVENT(nfsd_delegret_wakeup,
++ TP_PROTO(
++ const struct svc_rqst *rqstp,
++ const struct inode *inode,
++ long timeo
++ ),
++ TP_ARGS(rqstp, inode, timeo),
++ TP_STRUCT__entry(
++ __field(u32, xid)
++ __field(const void *, inode)
++ __field(long, timeo)
++ ),
++ TP_fast_assign(
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ __entry->inode = inode;
++ __entry->timeo = timeo;
++ ),
++ TP_printk("xid=0x%08x inode=%p%s",
++ __entry->xid, __entry->inode,
++ __entry->timeo == 0 ? " (timed out)" : ""
++ )
++);
++
+ DECLARE_EVENT_CLASS(nfsd_stateid_class,
+ TP_PROTO(stateid_t *stp),
+ TP_ARGS(stp),
+--
+2.43.0
+
--- /dev/null
+From e1131502c4829e4c82d493742be2758f84a815b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:34 -0400
+Subject: NFSD: Add a nfsd4_file_hash_remove() helper
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3341678f2fd6106055cead09e513fad6950a0d19 ]
+
+Refactor to relocate hash deletion operation to a helper function
+that is close to most other nfs4_file data structure operations.
+
+The "noinline" annotation will become useful in a moment when the
+hlist_del_rcu() is replaced with a more complex rhash remove
+operation. It also guarantees that hash remove operations can be
+traced with "-p function -l remove_nfs4_file_locked".
+
+This also simplifies the organization of forward declarations: the
+to-be-added rhashtable and its param structure will be defined
+/after/ put_nfs4_file().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 1cb3ea90eb4ca..f723d7d5e1557 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -84,6 +84,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
+ void nfsd4_end_grace(struct nfsd_net *nn);
+ static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
++static void nfsd4_file_hash_remove(struct nfs4_file *fi);
+
+ /* Locking: */
+
+@@ -591,7 +592,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ might_lock(&state_lock);
+
+ if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
+- hlist_del_rcu(&fi->fi_hash);
++ nfsd4_file_hash_remove(fi);
+ spin_unlock(&state_lock);
+ WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
+ WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
+@@ -4749,6 +4750,11 @@ find_or_add_file(struct nfs4_file *new, struct svc_fh *fh)
+ return insert_file(new, fh, hashval);
+ }
+
++static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
++{
++ hlist_del_rcu(&fi->fi_hash);
++}
++
+ /*
+ * Called to check deny when READ with all zero stateid or
+ * WRITE with all zero or all one stateid
+--
+2.43.0
+
--- /dev/null
+From 483ae7b5942709aa771af316c966069b552d9b50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 18 Dec 2021 20:38:00 -0500
+Subject: nfsd: Add a tracepoint for errors in nfsd4_clone_file_range()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit a2f4c3fa4db94ba44d32a72201927cfd132a8e82 ]
+
+Since a clone error commit can cause the boot verifier to change,
+we should trace those errors.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+[ cel: Addressed a checkpatch.pl splat in fs/nfsd/vfs.h ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/trace.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++
+ fs/nfsd/vfs.c | 18 +++++++++++++++--
+ fs/nfsd/vfs.h | 3 ++-
+ 4 files changed, 69 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 002473c59fc6f..861af46ebc6cf 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1108,7 +1108,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (status)
+ goto out;
+
+- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
++ status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
+ dst, clone->cl_dst_pos, clone->cl_count,
+ EX_ISSYNC(cstate->current_fh.fh_export));
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 1c98a0f857498..52c4a4e001729 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -399,6 +399,56 @@ TRACE_EVENT(nfsd_dirent,
+ )
+ )
+
++DECLARE_EVENT_CLASS(nfsd_copy_err_class,
++ TP_PROTO(struct svc_rqst *rqstp,
++ struct svc_fh *src_fhp,
++ loff_t src_offset,
++ struct svc_fh *dst_fhp,
++ loff_t dst_offset,
++ u64 count,
++ int status),
++ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
++ TP_STRUCT__entry(
++ __field(u32, xid)
++ __field(u32, src_fh_hash)
++ __field(loff_t, src_offset)
++ __field(u32, dst_fh_hash)
++ __field(loff_t, dst_offset)
++ __field(u64, count)
++ __field(int, status)
++ ),
++ TP_fast_assign(
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
++ __entry->src_offset = src_offset;
++ __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
++ __entry->dst_offset = dst_offset;
++ __entry->count = count;
++ __entry->status = status;
++ ),
++ TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
++ "dst_fh_hash=0x%08x dst_offset=%lld "
++ "count=%llu status=%d",
++ __entry->xid, __entry->src_fh_hash, __entry->src_offset,
++ __entry->dst_fh_hash, __entry->dst_offset,
++ (unsigned long long)__entry->count,
++ __entry->status)
++)
++
++#define DEFINE_NFSD_COPY_ERR_EVENT(name) \
++DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \
++ TP_PROTO(struct svc_rqst *rqstp, \
++ struct svc_fh *src_fhp, \
++ loff_t src_offset, \
++ struct svc_fh *dst_fhp, \
++ loff_t dst_offset, \
++ u64 count, \
++ int status), \
++ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
++ count, status))
++
++DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
++
+ #include "state.h"
+ #include "filecache.h"
+ #include "vfs.h"
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 17985d868887a..721cf315551ad 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -40,6 +40,7 @@
+ #include "../internal.h"
+ #include "acl.h"
+ #include "idmap.h"
++#include "xdr4.h"
+ #endif /* CONFIG_NFSD_V4 */
+
+ #include "nfsd.h"
+@@ -523,8 +524,15 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+ #endif
+
+-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+- struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
++static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
++{
++ return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
++}
++
++__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
++ struct nfsd_file *nf_src, u64 src_pos,
++ struct nfsd_file *nf_dst, u64 dst_pos,
++ u64 count, bool sync)
+ {
+ struct file *src = nf_src->nf_file;
+ struct file *dst = nf_dst->nf_file;
+@@ -551,6 +559,12 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ if (!status)
+ status = commit_inode_metadata(file_inode(src));
+ if (status < 0) {
++ trace_nfsd_clone_file_range_err(rqstp,
++ &nfsd4_get_cstate(rqstp)->save_fh,
++ src_pos,
++ &nfsd4_get_cstate(rqstp)->current_fh,
++ dst_pos,
++ count, status);
+ nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
+ nfsd_net_id));
+ ret = nfserrno(status);
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 3cf5a8a13da50..2c43d10e3cab4 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -57,7 +57,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
+ struct xdr_netobj *);
+ __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
+ struct file *, loff_t, loff_t, int);
+-__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
++__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
++ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync);
+ #endif /* CONFIG_NFSD_V4 */
+--
+2.43.0
+
--- /dev/null
+From 26c25eb6fcc4f2c4ec6364b404c29b5584ea8203 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Jun 2023 10:13:39 -0400
+Subject: NFSD: Add an nfsd4_encode_nfstime4() helper
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 262176798b18b12fd8ab84c94cfece0a6a652476 ]
+
+Clean up: de-duplicate some common code.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Tom Talpey <tom@talpey.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 26 insertions(+), 20 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 30c64c3f5fa05..c40876daf60c0 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2542,6 +2542,20 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+ return p;
+ }
+
++static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
++ struct timespec64 *tv)
++{
++ __be32 *p;
++
++ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
++ if (!p)
++ return nfserr_resource;
++
++ p = xdr_encode_hyper(p, (s64)tv->tv_sec);
++ *p = cpu_to_be32(tv->tv_nsec);
++ return nfs_ok;
++}
++
+ /*
+ * ctime (in NFSv4, time_metadata) is not writeable, and the client
+ * doesn't really care what resolution could theoretically be stored by
+@@ -3347,11 +3361,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ p = xdr_encode_hyper(p, dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
+- *p++ = cpu_to_be32(stat.atime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.atime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+ p = xdr_reserve_space(xdr, 12);
+@@ -3360,25 +3372,19 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ p = encode_time_delta(p, d_inode(dentry));
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
+- *p++ = cpu_to_be32(stat.ctime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.ctime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
+- *p++ = cpu_to_be32(stat.mtime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.mtime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+- p = xdr_reserve_space(xdr, 12);
+- if (!p)
+- goto out_resource;
+- p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
+- *p++ = cpu_to_be32(stat.btime.tv_nsec);
++ status = nfsd4_encode_nfstime4(xdr, &stat.btime);
++ if (status)
++ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ u64 ino = stat.ino;
+--
+2.43.0
+
--- /dev/null
+From 77dbd1bfe85dd88160f2e5b550384003ef32a3d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:08:57 -0400
+Subject: NFSD: Add an nfsd4_read::rd_eof field
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 24c7fb85498eda1d4c6b42cc4886328429814990 ]
+
+Refactor: Make the EOF result available in the entire NFSv4 READ
+path.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 11 +++++------
+ fs/nfsd/xdr4.h | 5 +++--
+ 2 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index afc8a51cf60f1..b31103221fee9 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3887,7 +3887,6 @@ static __be32 nfsd4_encode_splice_read(
+ struct xdr_stream *xdr = resp->xdr;
+ struct xdr_buf *buf = xdr->buf;
+ int status, space_left;
+- u32 eof;
+ __be32 nfserr;
+ __be32 *p = xdr->p - 2;
+
+@@ -3896,7 +3895,8 @@ static __be32 nfsd4_encode_splice_read(
+ return nfserr_resource;
+
+ nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
+- file, read->rd_offset, &maxcount, &eof);
++ file, read->rd_offset, &maxcount,
++ &read->rd_eof);
+ read->rd_length = maxcount;
+ if (nfserr)
+ goto out_err;
+@@ -3907,7 +3907,7 @@ static __be32 nfsd4_encode_splice_read(
+ goto out_err;
+ }
+
+- *(p++) = htonl(eof);
++ *(p++) = htonl(read->rd_eof);
+ *(p++) = htonl(maxcount);
+
+ buf->page_len = maxcount;
+@@ -3951,7 +3951,6 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ struct file *file, unsigned long maxcount)
+ {
+ struct xdr_stream *xdr = resp->xdr;
+- u32 eof;
+ int starting_len = xdr->buf->len - 8;
+ __be32 nfserr;
+ __be32 tmp;
+@@ -3963,7 +3962,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+
+ nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
+ resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
+- &eof);
++ &read->rd_eof);
+ read->rd_length = maxcount;
+ if (nfserr)
+ return nfserr;
+@@ -3971,7 +3970,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ return nfserr_io;
+ xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount));
+
+- tmp = htonl(eof);
++ tmp = htonl(read->rd_eof);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
+ tmp = htonl(maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 77286e8c9ab02..0737f81c1004e 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -302,9 +302,10 @@ struct nfsd4_read {
+ u32 rd_length; /* request */
+ int rd_vlen;
+ struct nfsd_file *rd_nf;
+-
++
+ struct svc_rqst *rd_rqstp; /* response */
+- struct svc_fh *rd_fhp; /* response */
++ struct svc_fh *rd_fhp; /* response */
++ u32 rd_eof; /* response */
+ };
+
+ struct nfsd4_readdir {
+--
+2.43.0
+
--- /dev/null
+From c0629d6013d038d0c6d484c0c5905274a4fe775f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Nov 2022 16:22:48 -0400
+Subject: NFSD: Add an nfsd_file_fsync tracepoint
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit d7064eaf688cfe454c50db9f59298463d80d403c ]
+
+Add a tracepoint to capture the number of filecache-triggered fsync
+calls and which files needed it. Also, record when an fsync triggers
+a write verifier reset.
+
+Examples:
+
+<...>-97 [007] 262.505611: nfsd_file_free: inode=0xffff888171e08140 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d2400
+<...>-97 [007] 262.505612: nfsd_file_fsync: inode=0xffff888171e08140 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d2400 ret=0
+<...>-97 [007] 262.505623: nfsd_file_free: inode=0xffff888171e08dc0 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d1e00
+<...>-97 [007] 262.505624: nfsd_file_fsync: inode=0xffff888171e08dc0 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d1e00 ret=0
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 5 ++++-
+ fs/nfsd/trace.h | 31 +++++++++++++++++++++++++++++++
+ 2 files changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 522e900a88605..6b8873b0c2c38 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -335,10 +335,13 @@ static void
+ nfsd_file_fsync(struct nfsd_file *nf)
+ {
+ struct file *file = nf->nf_file;
++ int ret;
+
+ if (!file || !(file->f_mode & FMODE_WRITE))
+ return;
+- if (vfs_fsync(file, 1) != 0)
++ ret = vfs_fsync(file, 1);
++ trace_nfsd_file_fsync(nf, ret);
++ if (ret)
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ }
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 2c72a666aa9c2..eb155e98a9dc2 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -1237,6 +1237,37 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \
+ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
+ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
+
++TRACE_EVENT(nfsd_file_fsync,
++ TP_PROTO(
++ const struct nfsd_file *nf,
++ int ret
++ ),
++ TP_ARGS(nf, ret),
++ TP_STRUCT__entry(
++ __field(void *, nf_inode)
++ __field(int, nf_ref)
++ __field(int, ret)
++ __field(unsigned long, nf_flags)
++ __field(unsigned char, nf_may)
++ __field(struct file *, nf_file)
++ ),
++ TP_fast_assign(
++ __entry->nf_inode = nf->nf_inode;
++ __entry->nf_ref = refcount_read(&nf->nf_ref);
++ __entry->ret = ret;
++ __entry->nf_flags = nf->nf_flags;
++ __entry->nf_may = nf->nf_may;
++ __entry->nf_file = nf->nf_file;
++ ),
++ TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d",
++ __entry->nf_inode,
++ __entry->nf_ref,
++ show_nf_flags(__entry->nf_flags),
++ show_nfsd_may_flags(__entry->nf_may),
++ __entry->nf_file, __entry->ret
++ )
++);
++
+ #include "cache.h"
+
+ TRACE_DEFINE_ENUM(RC_DROPIT);
+--
+2.43.0
+
--- /dev/null
+From 83d0824afa6d264a1964f906b3036c5cf1db21fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:46:51 -0400
+Subject: NFSD: Add an NFSD_FILE_GC flag to enable nfsd_file garbage collection
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 4d1ea8455716ca070e3cd85767e6f6a562a58b1b ]
+
+NFSv4 operations manage the lifetime of nfsd_file items they use by
+means of NFSv4 OPEN and CLOSE. Hence there's no need for them to be
+garbage collected.
+
+Introduce a mechanism to enable garbage collection for nfsd_file
+items used only by NFSv2/3 callers.
+
+Note that the change in nfsd_file_put() ensures that both CLOSE and
+DELEGRETURN will actually close out and free an nfsd_file on last
+reference of a non-garbage-collected file.
+
+Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=394
+Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 63 +++++++++++++++++++++++++++++++++++++++------
+ fs/nfsd/filecache.h | 3 +++
+ fs/nfsd/nfs3proc.c | 4 +--
+ fs/nfsd/trace.h | 3 ++-
+ fs/nfsd/vfs.c | 4 +--
+ 5 files changed, 64 insertions(+), 13 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index e429fce894316..13a25503b80e1 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -62,6 +62,7 @@ struct nfsd_file_lookup_key {
+ struct net *net;
+ const struct cred *cred;
+ unsigned char need;
++ bool gc;
+ enum nfsd_file_lookup_type type;
+ };
+
+@@ -161,6 +162,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ return 1;
+ if (!nfsd_match_cred(nf->nf_cred, key->cred))
+ return 1;
++ if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
++ return 1;
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+ return 1;
+ break;
+@@ -296,6 +299,8 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+ nf->nf_flags = 0;
+ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+ __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
++ if (key->gc)
++ __set_bit(NFSD_FILE_GC, &nf->nf_flags);
+ nf->nf_inode = key->inode;
+ /* nf_ref is pre-incremented for hash table */
+ refcount_set(&nf->nf_ref, 2);
+@@ -427,16 +432,27 @@ nfsd_file_put_noref(struct nfsd_file *nf)
+ }
+ }
+
++static void
++nfsd_file_unhash_and_put(struct nfsd_file *nf)
++{
++ if (nfsd_file_unhash(nf))
++ nfsd_file_put_noref(nf);
++}
++
+ void
+ nfsd_file_put(struct nfsd_file *nf)
+ {
+ might_sleep();
+
+- nfsd_file_lru_add(nf);
+- if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags))
++ nfsd_file_lru_add(nf);
++ else if (refcount_read(&nf->nf_ref) == 2)
++ nfsd_file_unhash_and_put(nf);
++
++ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ nfsd_file_flush(nf);
+ nfsd_file_put_noref(nf);
+- } else if (nf->nf_file) {
++ } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
+ nfsd_file_put_noref(nf);
+ nfsd_file_schedule_laundrette();
+ } else
+@@ -1015,12 +1031,14 @@ nfsd_file_is_cached(struct inode *inode)
+
+ static __be32
+ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- unsigned int may_flags, struct nfsd_file **pnf, bool open)
++ unsigned int may_flags, struct nfsd_file **pnf,
++ bool open, bool want_gc)
+ {
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_FULL,
+ .need = may_flags & NFSD_FILE_MAY_MASK,
+ .net = SVC_NET(rqstp),
++ .gc = want_gc,
+ };
+ bool open_retry = true;
+ struct nfsd_file *nf;
+@@ -1116,14 +1134,35 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * then unhash.
+ */
+ if (status != nfs_ok || key.inode->i_nlink == 0)
+- if (nfsd_file_unhash(nf))
+- nfsd_file_put_noref(nf);
++ nfsd_file_unhash_and_put(nf);
+ clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+ goto out;
+ }
+
++/**
++ * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file
++ * @rqstp: the RPC transaction being executed
++ * @fhp: the NFS filehandle of the file to be opened
++ * @may_flags: NFSD_MAY_ settings for the file
++ * @pnf: OUT: new or found "struct nfsd_file" object
++ *
++ * The nfsd_file object returned by this API is reference-counted
++ * and garbage-collected. The object is retained for a few
++ * seconds after the final nfsd_file_put() in case the caller
++ * wants to re-use it.
++ *
++ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
++ * network byte order is returned.
++ */
++__be32
++nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct nfsd_file **pnf)
++{
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true);
++}
++
+ /**
+ * nfsd_file_acquire - Get a struct nfsd_file with an open file
+ * @rqstp: the RPC transaction being executed
+@@ -1131,6 +1170,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
++ * The nfsd_file_object returned by this API is reference-counted
++ * but not garbage-collected. The object is unhashed after the
++ * final nfsd_file_put().
++ *
+ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+ * network byte order is returned.
+ */
+@@ -1138,7 +1181,7 @@ __be32
+ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+ {
+- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true);
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false);
+ }
+
+ /**
+@@ -1148,6 +1191,10 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
++ * The nfsd_file_object returned by this API is reference-counted
++ * but not garbage-collected. The object is released immediately
++ * one RCU grace period after the final nfsd_file_put().
++ *
+ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+ * network byte order is returned.
+ */
+@@ -1155,7 +1202,7 @@ __be32
+ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+ {
+- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false);
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false);
+ }
+
+ /*
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 6b012ea4bd9da..b7efb2c3ddb18 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -38,6 +38,7 @@ struct nfsd_file {
+ #define NFSD_FILE_HASHED (0)
+ #define NFSD_FILE_PENDING (1)
+ #define NFSD_FILE_REFERENCED (2)
++#define NFSD_FILE_GC (3)
+ unsigned long nf_flags;
+ struct inode *nf_inode; /* don't deref */
+ refcount_t nf_ref;
+@@ -55,6 +56,8 @@ void nfsd_file_put(struct nfsd_file *nf);
+ struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+ void nfsd_file_close_inode_sync(struct inode *inode);
+ bool nfsd_file_is_cached(struct inode *inode);
++__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct nfsd_file **nfp);
+ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index ff29205463332..d01b29aba6623 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -772,8 +772,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
+ (unsigned long long) argp->offset);
+
+ fh_copy(&resp->fh, &argp->fh);
+- resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE |
+- NFSD_MAY_NOT_BREAK_LEASE, &nf);
++ resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE |
++ NFSD_MAY_NOT_BREAK_LEASE, &nf);
+ if (resp->status)
+ goto out;
+ resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset,
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 72aa7435d55bd..d449c364cc76b 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -816,7 +816,8 @@ DEFINE_CLID_EVENT(confirmed_r);
+ __print_flags(val, "|", \
+ { 1 << NFSD_FILE_HASHED, "HASHED" }, \
+ { 1 << NFSD_FILE_PENDING, "PENDING" }, \
+- { 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
++ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \
++ { 1 << NFSD_FILE_GC, "GC"})
+
+ DECLARE_EVENT_CLASS(nfsd_file_class,
+ TP_PROTO(struct nfsd_file *nf),
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 3c43a51e17865..db7f0119433cf 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1149,7 +1149,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ __be32 err;
+
+ trace_nfsd_read_start(rqstp, fhp, offset, *count);
+- err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
++ err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf);
+ if (err)
+ return err;
+
+@@ -1181,7 +1181,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+
+ trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
+
+- err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf);
++ err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf);
+ if (err)
+ goto out;
+
+--
+2.43.0
+
--- /dev/null
+From ef90fcef9433e76ac576c6c795c67e5bb08eab2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 19:44:48 -0800
+Subject: NFSD: add CB_RECALL_ANY tracepoints
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 638593be55c0b37a1930038460a9918215d5c24b ]
+
+Add tracepoints to trace start and end of CB_RECALL_ANY operation.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+[ cel: added show_rca_mask() macro ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 2 ++
+ fs/nfsd/trace.h | 50 ++++++++++++++++++++++++++++++++++++++++
+ include/trace/misc/nfs.h | 12 ++++++++++
+ 3 files changed, 64 insertions(+)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 1705aeb2a1b8e..8bb75adbd4e6a 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2876,6 +2876,7 @@ static int
+ nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+ {
++ trace_nfsd_cb_recall_any_done(cb, task);
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+@@ -6234,6 +6235,7 @@ deleg_reaper(struct nfsd_net *nn)
+ list_del_init(&clp->cl_ra_cblist);
+ clp->cl_ra->ra_keep = 0;
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
++ trace_nfsd_cb_recall_any(clp->cl_ra);
+ nfsd4_run_cb(&clp->cl_ra->ra_cb);
+ }
+ }
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index eb155e98a9dc2..f8eaef5b319eb 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -9,9 +9,12 @@
+ #define _NFSD_TRACE_H
+
+ #include <linux/tracepoint.h>
++#include <linux/sunrpc/xprt.h>
++#include <trace/misc/nfs.h>
+
+ #include "export.h"
+ #include "nfsfh.h"
++#include "xdr4.h"
+
+ #define NFSD_TRACE_PROC_RES_FIELDS \
+ __field(unsigned int, netns_ino) \
+@@ -1563,6 +1566,32 @@ TRACE_EVENT(nfsd_cb_offload,
+ __entry->fh_hash, __entry->count, __entry->status)
+ );
+
++TRACE_EVENT(nfsd_cb_recall_any,
++ TP_PROTO(
++ const struct nfsd4_cb_recall_any *ra
++ ),
++ TP_ARGS(ra),
++ TP_STRUCT__entry(
++ __field(u32, cl_boot)
++ __field(u32, cl_id)
++ __field(u32, keep)
++ __field(unsigned long, bmval0)
++ __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen)
++ ),
++ TP_fast_assign(
++ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
++ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
++ __entry->keep = ra->ra_keep;
++ __entry->bmval0 = ra->ra_bmval[0];
++ __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
++ ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen);
++ ),
++ TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s",
++ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
++ __entry->keep, show_rca_mask(__entry->bmval0)
++ )
++);
++
+ DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+ TP_PROTO(
+ const stateid_t *stp,
+@@ -1602,6 +1631,27 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
+ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
+ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+
++TRACE_EVENT(nfsd_cb_recall_any_done,
++ TP_PROTO(
++ const struct nfsd4_callback *cb,
++ const struct rpc_task *task
++ ),
++ TP_ARGS(cb, task),
++ TP_STRUCT__entry(
++ __field(u32, cl_boot)
++ __field(u32, cl_id)
++ __field(int, status)
++ ),
++ TP_fast_assign(
++ __entry->status = task->tk_status;
++ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
++ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
++ ),
++ TP_printk("client %08x:%08x status=%d",
++ __entry->cl_boot, __entry->cl_id, __entry->status
++ )
++);
++
+ #endif /* _NFSD_TRACE_H */
+
+ #undef TRACE_INCLUDE_PATH
+diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h
+index 09ffdbb04134d..0d9d48dca38a8 100644
+--- a/include/trace/misc/nfs.h
++++ b/include/trace/misc/nfs.h
+@@ -360,6 +360,18 @@ TRACE_DEFINE_ENUM(IOMODE_ANY);
+ { IOMODE_RW, "RW" }, \
+ { IOMODE_ANY, "ANY" })
+
++#define show_rca_mask(x) \
++ __print_flags(x, "|", \
++ { BIT(RCA4_TYPE_MASK_RDATA_DLG), "RDATA_DLG" }, \
++ { BIT(RCA4_TYPE_MASK_WDATA_DLG), "WDATA_DLG" }, \
++ { BIT(RCA4_TYPE_MASK_DIR_DLG), "DIR_DLG" }, \
++ { BIT(RCA4_TYPE_MASK_FILE_LAYOUT), "FILE_LAYOUT" }, \
++ { BIT(RCA4_TYPE_MASK_BLK_LAYOUT), "BLK_LAYOUT" }, \
++ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MIN), "OBJ_LAYOUT_MIN" }, \
++ { BIT(RCA4_TYPE_MASK_OBJ_LAYOUT_MAX), "OBJ_LAYOUT_MAX" }, \
++ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MIN), "OTHER_LAYOUT_MIN" }, \
++ { BIT(RCA4_TYPE_MASK_OTHER_LAYOUT_MAX), "OTHER_LAYOUT_MAX" })
++
+ #define show_nfs4_seq4_status(x) \
+ __print_flags(x, "|", \
+ { SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+--
+2.43.0
+
--- /dev/null
+From 80d7f6a30b69d0e7407f521dd8250fe4f8fc2a5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 14:19:21 -0700
+Subject: NFSD: add courteous server support for thread with only delegation
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 66af25799940b26efd41ea6e648f75c41a48a2c2 ]
+
+This patch provides courteous server support for delegation only.
+Only expired client with delegation but no conflict and no open
+or lock state is allowed to be in COURTESY state.
+
+Delegation conflict with COURTESY/EXPIRABLE client is resolved by
+setting it to EXPIRABLE, queue work for the laundromat and return
+delay to the caller. Conflict is resolved when the laudromat runs
+and expires the EXIRABLE client while the NFS client retries the
+OPEN request. Local thread request that gets conflict is doing the
+retry in _break_lease.
+
+Client in COURTESY or EXPIRABLE state is allowed to reconnect and
+continues to have access to its state. Access to the nfs4_client by
+the reconnecting thread and the laundromat is serialized via the
+client_lock.
+
+Reviewed-by: J. Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 82 ++++++++++++++++++++++++++++++++++++---------
+ fs/nfsd/nfsd.h | 1 +
+ fs/nfsd/state.h | 31 +++++++++++++++++
+ 3 files changed, 99 insertions(+), 15 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 5f3adb59c1ffd..1e15bb2d8382b 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -125,6 +125,8 @@ static void free_session(struct nfsd4_session *);
+ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
+ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+
++static struct workqueue_struct *laundry_wq;
++
+ static bool is_session_dead(struct nfsd4_session *ses)
+ {
+ return ses->se_flags & NFS4_SESSION_DEAD;
+@@ -152,6 +154,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
+ if (is_client_expired(clp))
+ return nfserr_expired;
+ atomic_inc(&clp->cl_rpc_users);
++ clp->cl_state = NFSD4_ACTIVE;
+ return nfs_ok;
+ }
+
+@@ -172,6 +175,7 @@ renew_client_locked(struct nfs4_client *clp)
+
+ list_move_tail(&clp->cl_lru, &nn->client_lru);
+ clp->cl_time = ktime_get_boottime_seconds();
++ clp->cl_state = NFSD4_ACTIVE;
+ }
+
+ static void put_client_renew_locked(struct nfs4_client *clp)
+@@ -1102,6 +1106,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+ get_clnt_odstate(odstate);
+ dp->dl_type = NFS4_OPEN_DELEGATE_READ;
+ dp->dl_retries = 1;
++ dp->dl_recalled = false;
+ nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
+ &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ get_nfs4_file(fp);
+@@ -2017,6 +2022,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
+ idr_init(&clp->cl_stateids);
+ atomic_set(&clp->cl_rpc_users, 0);
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
++ clp->cl_state = NFSD4_ACTIVE;
++ atomic_set(&clp->cl_delegs_in_recall, 0);
+ INIT_LIST_HEAD(&clp->cl_idhash);
+ INIT_LIST_HEAD(&clp->cl_openowners);
+ INIT_LIST_HEAD(&clp->cl_delegations);
+@@ -4711,9 +4718,18 @@ nfsd_break_deleg_cb(struct file_lock *fl)
+ bool ret = false;
+ struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+ struct nfs4_file *fp = dp->dl_stid.sc_file;
++ struct nfs4_client *clp = dp->dl_stid.sc_client;
++ struct nfsd_net *nn;
+
+ trace_nfsd_cb_recall(&dp->dl_stid);
+
++ dp->dl_recalled = true;
++ atomic_inc(&clp->cl_delegs_in_recall);
++ if (try_to_expire_client(clp)) {
++ nn = net_generic(clp->net, nfsd_net_id);
++ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
++ }
++
+ /*
+ * We don't want the locks code to timeout the lease for us;
+ * we'll remove it ourself if a delegation isn't returned
+@@ -4756,9 +4772,14 @@ static int
+ nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
+ struct list_head *dispose)
+ {
+- if (arg & F_UNLCK)
++ struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner;
++ struct nfs4_client *clp = dp->dl_stid.sc_client;
++
++ if (arg & F_UNLCK) {
++ if (dp->dl_recalled)
++ atomic_dec(&clp->cl_delegs_in_recall);
+ return lease_modify(onlist, arg, dispose);
+- else
++ } else
+ return -EAGAIN;
+ }
+
+@@ -5622,6 +5643,49 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+ }
+ #endif
+
++/*
++ * place holder for now, no check for lock blockers yet
++ */
++static bool
++nfs4_anylock_blockers(struct nfs4_client *clp)
++{
++ if (atomic_read(&clp->cl_delegs_in_recall) ||
++ client_has_openowners(clp) ||
++ !list_empty(&clp->async_copies))
++ return true;
++ return false;
++}
++
++static void
++nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
++ struct laundry_time *lt)
++{
++ struct list_head *pos, *next;
++ struct nfs4_client *clp;
++
++ INIT_LIST_HEAD(reaplist);
++ spin_lock(&nn->client_lock);
++ list_for_each_safe(pos, next, &nn->client_lru) {
++ clp = list_entry(pos, struct nfs4_client, cl_lru);
++ if (clp->cl_state == NFSD4_EXPIRABLE)
++ goto exp_client;
++ if (!state_expired(lt, clp->cl_time))
++ break;
++ if (!atomic_read(&clp->cl_rpc_users))
++ clp->cl_state = NFSD4_COURTESY;
++ if (!client_has_state(clp) ||
++ ktime_get_boottime_seconds() >=
++ (clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
++ goto exp_client;
++ if (nfs4_anylock_blockers(clp)) {
++exp_client:
++ if (!mark_client_expired_locked(clp))
++ list_add(&clp->cl_lru, reaplist);
++ }
++ }
++ spin_unlock(&nn->client_lock);
++}
++
+ static time64_t
+ nfs4_laundromat(struct nfsd_net *nn)
+ {
+@@ -5644,7 +5708,6 @@ nfs4_laundromat(struct nfsd_net *nn)
+ goto out;
+ }
+ nfsd4_end_grace(nn);
+- INIT_LIST_HEAD(&reaplist);
+
+ spin_lock(&nn->s2s_cp_lock);
+ idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
+@@ -5654,17 +5717,7 @@ nfs4_laundromat(struct nfsd_net *nn)
+ _free_cpntf_state_locked(nn, cps);
+ }
+ spin_unlock(&nn->s2s_cp_lock);
+-
+- spin_lock(&nn->client_lock);
+- list_for_each_safe(pos, next, &nn->client_lru) {
+- clp = list_entry(pos, struct nfs4_client, cl_lru);
+- if (!state_expired(<, clp->cl_time))
+- break;
+- if (mark_client_expired_locked(clp))
+- continue;
+- list_add(&clp->cl_lru, &reaplist);
+- }
+- spin_unlock(&nn->client_lock);
++ nfs4_get_client_reaplist(nn, &reaplist, <);
+ list_for_each_safe(pos, next, &reaplist) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ trace_nfsd_clid_purged(&clp->cl_clientid);
+@@ -5739,7 +5792,6 @@ nfs4_laundromat(struct nfsd_net *nn)
+ return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
+ }
+
+-static struct workqueue_struct *laundry_wq;
+ static void laundromat_main(struct work_struct *);
+
+ static void
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 4fc1fd639527a..23996c6ca75e3 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -336,6 +336,7 @@ void nfsd_lockd_shutdown(void);
+ #define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */
+
+ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
++#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
+
+ /*
+ * The following attributes are currently not supported by the NFSv4 server:
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index 95457cfd37fc0..f3d6313914ed0 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -149,6 +149,7 @@ struct nfs4_delegation {
+ /* For recall: */
+ int dl_retries;
+ struct nfsd4_callback dl_recall;
++ bool dl_recalled;
+ };
+
+ #define cb_to_delegation(cb) \
+@@ -282,6 +283,28 @@ struct nfsd4_sessionid {
+
+ #define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+
++/*
++ * State Meaning Where set
++ * --------------------------------------------------------------------------
++ * | NFSD4_ACTIVE | Confirmed, active | Default |
++ * |------------------- ----------------------------------------------------|
++ * | NFSD4_COURTESY | Courtesy state. | nfs4_get_client_reaplist |
++ * | | Lease/lock/share | |
++ * | | reservation conflict | |
++ * | | can cause Courtesy | |
++ * | | client to be expired | |
++ * |------------------------------------------------------------------------|
++ * | NFSD4_EXPIRABLE | Courtesy client to be| nfs4_laundromat |
++ * | | expired by Laundromat| try_to_expire_client |
++ * | | due to conflict | |
++ * |------------------------------------------------------------------------|
++ */
++enum {
++ NFSD4_ACTIVE = 0,
++ NFSD4_COURTESY,
++ NFSD4_EXPIRABLE,
++};
++
+ /*
+ * struct nfs4_client - one per client. Clientids live here.
+ *
+@@ -385,6 +408,9 @@ struct nfs4_client {
+ struct list_head async_copies; /* list of async copies */
+ spinlock_t async_lock; /* lock for async copies */
+ atomic_t cl_cb_inflight; /* Outstanding callbacks */
++
++ unsigned int cl_state;
++ atomic_t cl_delegs_in_recall;
+ };
+
+ /* struct nfs4_client_reset
+@@ -702,4 +728,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
+ extern int nfsd4_client_record_check(struct nfs4_client *clp);
+ extern void nfsd4_record_grace_done(struct nfsd_net *nn);
+
++static inline bool try_to_expire_client(struct nfs4_client *clp)
++{
++ cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
++ return clp->cl_state == NFSD4_EXPIRABLE;
++}
+ #endif /* NFSD4_STATE_H */
+--
+2.43.0
+
--- /dev/null
+From 326afb41ad11a736ac432a4485b9b58ab9e16ae6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 19:44:47 -0800
+Subject: NFSD: add delegation reaper to react to low memory condition
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 44df6f439a1790a5f602e3842879efa88f346672 ]
+
+The delegation reaper is called by nfsd memory shrinker's on
+the 'count' callback. It scans the client list and sends the
+courtesy CB_RECALL_ANY to the clients that hold delegations.
+
+To avoid flooding the clients with CB_RECALL_ANY requests, the
+delegation reaper sends only one CB_RECALL_ANY request to each
+client per 5 seconds.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+[ cel: moved definition of RCA4_TYPE_MASK_RDATA_DLG ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 88 ++++++++++++++++++++++++++++++++++++++++++--
+ fs/nfsd/state.h | 5 +++
+ include/linux/nfs4.h | 13 +++++++
+ 3 files changed, 102 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 34ae4a3d86f3e..1705aeb2a1b8e 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2144,6 +2144,7 @@ static void __free_client(struct kref *k)
+ kfree(clp->cl_nii_domain.data);
+ kfree(clp->cl_nii_name.data);
+ idr_destroy(&clp->cl_stateids);
++ kfree(clp->cl_ra);
+ kmem_cache_free(client_slab, clp);
+ }
+
+@@ -2871,6 +2872,36 @@ static const struct tree_descr client_files[] = {
+ [3] = {""},
+ };
+
++static int
++nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
++ struct rpc_task *task)
++{
++ switch (task->tk_status) {
++ case -NFS4ERR_DELAY:
++ rpc_delay(task, 2 * HZ);
++ return 0;
++ default:
++ return 1;
++ }
++}
++
++static void
++nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
++{
++ struct nfs4_client *clp = cb->cb_clp;
++ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
++
++ spin_lock(&nn->client_lock);
++ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
++ put_client_renew_locked(clp);
++ spin_unlock(&nn->client_lock);
++}
++
++static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
++ .done = nfsd4_cb_recall_any_done,
++ .release = nfsd4_cb_recall_any_release,
++};
++
+ static struct nfs4_client *create_client(struct xdr_netobj name,
+ struct svc_rqst *rqstp, nfs4_verifier *verf)
+ {
+@@ -2908,6 +2939,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
+ free_client(clp);
+ return NULL;
+ }
++ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
++ if (!clp->cl_ra) {
++ free_client(clp);
++ return NULL;
++ }
++ clp->cl_ra_time = 0;
++ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
++ NFSPROC4_CLNT_CB_RECALL_ANY);
+ return clp;
+ }
+
+@@ -4363,14 +4402,16 @@ nfsd4_init_slabs(void)
+ static unsigned long
+ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+ {
+- int cnt;
++ int count;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_client_shrinker);
+
+- cnt = atomic_read(&nn->nfsd_courtesy_clients);
+- if (cnt > 0)
++ count = atomic_read(&nn->nfsd_courtesy_clients);
++ if (!count)
++ count = atomic_long_read(&num_delegations);
++ if (count)
+ mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+- return (unsigned long)cnt;
++ return (unsigned long)count;
+ }
+
+ static unsigned long
+@@ -6159,6 +6200,44 @@ courtesy_client_reaper(struct nfsd_net *nn)
+ nfs4_process_client_reaplist(&reaplist);
+ }
+
++static void
++deleg_reaper(struct nfsd_net *nn)
++{
++ struct list_head *pos, *next;
++ struct nfs4_client *clp;
++ struct list_head cblist;
++
++ INIT_LIST_HEAD(&cblist);
++ spin_lock(&nn->client_lock);
++ list_for_each_safe(pos, next, &nn->client_lru) {
++ clp = list_entry(pos, struct nfs4_client, cl_lru);
++ if (clp->cl_state != NFSD4_ACTIVE ||
++ list_empty(&clp->cl_delegations) ||
++ atomic_read(&clp->cl_delegs_in_recall) ||
++ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
++ (ktime_get_boottime_seconds() -
++ clp->cl_ra_time < 5)) {
++ continue;
++ }
++ list_add(&clp->cl_ra_cblist, &cblist);
++
++ /* release in nfsd4_cb_recall_any_release */
++ atomic_inc(&clp->cl_rpc_users);
++ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
++ clp->cl_ra_time = ktime_get_boottime_seconds();
++ }
++ spin_unlock(&nn->client_lock);
++
++ while (!list_empty(&cblist)) {
++ clp = list_first_entry(&cblist, struct nfs4_client,
++ cl_ra_cblist);
++ list_del_init(&clp->cl_ra_cblist);
++ clp->cl_ra->ra_keep = 0;
++ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
++ nfsd4_run_cb(&clp->cl_ra->ra_cb);
++ }
++}
++
+ static void
+ nfsd4_state_shrinker_worker(struct work_struct *work)
+ {
+@@ -6167,6 +6246,7 @@ nfsd4_state_shrinker_worker(struct work_struct *work)
+ nfsd_shrinker_work);
+
+ courtesy_client_reaper(nn);
++ deleg_reaper(nn);
+ }
+
+ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index e30882f8b8516..e94634d305912 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -368,6 +368,7 @@ struct nfs4_client {
+ #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
+ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
+ 1 << NFSD4_CLIENT_CB_KILL)
++#define NFSD4_CLIENT_CB_RECALL_ANY (6)
+ unsigned long cl_flags;
+ const struct cred *cl_cb_cred;
+ struct rpc_clnt *cl_cb_client;
+@@ -411,6 +412,10 @@ struct nfs4_client {
+
+ unsigned int cl_state;
+ atomic_t cl_delegs_in_recall;
++
++ struct nfsd4_cb_recall_any *cl_ra;
++ time64_t cl_ra_time;
++ struct list_head cl_ra_cblist;
+ };
+
+ /* struct nfs4_client_reset
+diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
+index 5662d8be04eb0..8e2b532919e0d 100644
+--- a/include/linux/nfs4.h
++++ b/include/linux/nfs4.h
+@@ -730,4 +730,17 @@ enum nfs4_setxattr_options {
+ SETXATTR4_CREATE = 1,
+ SETXATTR4_REPLACE = 2,
+ };
++
++enum {
++ RCA4_TYPE_MASK_RDATA_DLG = 0,
++ RCA4_TYPE_MASK_WDATA_DLG = 1,
++ RCA4_TYPE_MASK_DIR_DLG = 2,
++ RCA4_TYPE_MASK_FILE_LAYOUT = 3,
++ RCA4_TYPE_MASK_BLK_LAYOUT = 4,
++ RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8,
++ RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9,
++ RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12,
++ RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15,
++};
++
+ #endif
+--
+2.43.0
+
--- /dev/null
+From a3c8df261fac3a0ad96b610433cab67473b9ef70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 May 2022 12:34:38 -0400
+Subject: NFSD: Add documenting comment for nfsd4_release_lockowner()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 043862b09cc00273e35e6c3a6389957953a34207 ]
+
+And return explicit nfserr values that match what is documented in the
+new comment / API contract.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 23 ++++++++++++++++++++---
+ 1 file changed, 20 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 2d52656095340..08700b6acba31 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7542,6 +7542,23 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ return status;
+ }
+
++/**
++ * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations
++ * @rqstp: RPC transaction
++ * @cstate: NFSv4 COMPOUND state
++ * @u: RELEASE_LOCKOWNER arguments
++ *
++ * The lockowner's so_count is bumped when a lock record is added
++ * or when copying a conflicting lock. The latter case is brief,
++ * but can lead to fleeting false positives when looking for
++ * locks-in-use.
++ *
++ * Return values:
++ * %nfs_ok: lockowner released or not found
++ * %nfserr_locks_held: lockowner still in use
++ * %nfserr_stale_clientid: clientid no longer active
++ * %nfserr_expired: clientid not recognized
++ */
+ __be32
+ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+@@ -7568,7 +7585,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
+ if (!lo) {
+ spin_unlock(&clp->cl_lock);
+- return status;
++ return nfs_ok;
+ }
+ if (atomic_read(&lo->lo_owner.so_count) != 2) {
+ spin_unlock(&clp->cl_lock);
+@@ -7584,11 +7601,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ put_ol_stateid_locked(stp, &reaplist);
+ }
+ spin_unlock(&clp->cl_lock);
++
+ free_ol_stateid_reaplist(&reaplist);
+ remove_blocked_locks(lo);
+ nfs4_put_stateowner(&lo->lo_owner);
+-
+- return status;
++ return nfs_ok;
+ }
+
+ static inline struct nfs4_client_reclaim *
+--
+2.43.0
+
--- /dev/null
+From 2af8dff028895625403b46a491a3925c9d104c4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 18 Dec 2021 20:37:55 -0500
+Subject: nfsd: Add errno mapping for EREMOTEIO
+
+From: Jeff Layton <jeff.layton@primarydata.com>
+
+[ Upstream commit a2694e51f60c5a18c7e43d1a9feaa46d7f153e65 ]
+
+The NFS client can occasionally return EREMOTEIO when signalling issues
+with the server. ...map to NFSERR_IO.
+
+Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
+Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsproc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 3c5e87805cc8d..406dc50fea7ba 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -874,6 +874,7 @@ nfserrno (int errno)
+ { nfserr_toosmall, -ETOOSMALL },
+ { nfserr_serverfault, -ESERVERFAULT },
+ { nfserr_serverfault, -ENFILE },
++ { nfserr_io, -EREMOTEIO },
+ { nfserr_io, -EUCLEAN },
+ { nfserr_perm, -ENOKEY },
+ { nfserr_no_grace, -ENOGRACE},
+--
+2.43.0
+
--- /dev/null
+From 31e4d0bfcafd1a8f322ebff8e6cd37862195287e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:41:12 -0400
+Subject: NFSD: Add nfsd4_send_cb_offload()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit e72f9bc006c08841c46d27747a4debc747a8fe13 ]
+
+Refactor for legibility.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 37 ++++++++++++++++++++++---------------
+ 1 file changed, 22 insertions(+), 15 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index f63c3c4c10ca7..be51338deda46 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1755,6 +1755,27 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
+ nfs4_put_copy(copy);
+ }
+
++static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
++{
++ struct nfsd4_copy *cb_copy;
++
++ cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
++ if (!cb_copy)
++ return;
++
++ refcount_set(&cb_copy->refcount, 1);
++ memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res));
++ cb_copy->cp_clp = copy->cp_clp;
++ cb_copy->nfserr = copy->nfserr;
++ memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh));
++
++ nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp,
++ &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
++ trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid,
++ ©->fh, copy->cp_count, copy->nfserr);
++ nfsd4_run_cb(&cb_copy->cp_cb);
++}
++
+ /**
+ * nfsd4_do_async_copy - kthread function for background server-side COPY
+ * @data: arguments for COPY operation
+@@ -1765,7 +1786,6 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
+ static int nfsd4_do_async_copy(void *data)
+ {
+ struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
+- struct nfsd4_copy *cb_copy;
+
+ if (nfsd4_ssc_is_inter(copy)) {
+ struct file *filp;
+@@ -1787,20 +1807,7 @@ static int nfsd4_do_async_copy(void *data)
+ }
+
+ do_callback:
+- cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+- if (!cb_copy)
+- goto out;
+- refcount_set(&cb_copy->refcount, 1);
+- memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res));
+- cb_copy->cp_clp = copy->cp_clp;
+- cb_copy->nfserr = copy->nfserr;
+- memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh));
+- nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp,
+- &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
+- trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid,
+- ©->fh, copy->cp_count, copy->nfserr);
+- nfsd4_run_cb(&cb_copy->cp_cb);
+-out:
++ nfsd4_send_cb_offload(copy);
+ cleanup_async_copy(copy);
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 121774be20e2c50e96d998bee6db044ab81a0f62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:18 -0400
+Subject: NFSD: Add nfsd_file_lru_dispose_list() helper
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 0bac5a264d9a923f5b01f3521e1519a8d0358342 ]
+
+Refactor the invariant part of nfsd_file_lru_walk_list() into a
+separate helper function.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 29 ++++++++++++++++++++++-------
+ 1 file changed, 22 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 0cd72c20fc12d..ffe46f3f33495 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -450,11 +450,31 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ return LRU_SKIP;
+ }
+
++/*
++ * Unhash items on @dispose immediately, then queue them on the
++ * disposal workqueue to finish releasing them in the background.
++ *
++ * cel: Note that between the time list_lru_shrink_walk runs and
++ * now, these items are in the hash table but marked unhashed.
++ * Why release these outside of lru_cb ? There's no lock ordering
++ * problem since lru_cb currently takes no lock.
++ */
++static void nfsd_file_gc_dispose_list(struct list_head *dispose)
++{
++ struct nfsd_file *nf;
++
++ list_for_each_entry(nf, dispose, nf_lru) {
++ spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
++ nfsd_file_do_unhash(nf);
++ spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
++ }
++ nfsd_file_dispose_list_delayed(dispose);
++}
++
+ static unsigned long
+ nfsd_file_lru_walk_list(struct shrink_control *sc)
+ {
+ LIST_HEAD(head);
+- struct nfsd_file *nf;
+ unsigned long ret;
+
+ if (sc)
+@@ -464,12 +484,7 @@ nfsd_file_lru_walk_list(struct shrink_control *sc)
+ ret = list_lru_walk(&nfsd_file_lru,
+ nfsd_file_lru_cb,
+ &head, LONG_MAX);
+- list_for_each_entry(nf, &head, nf_lru) {
+- spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+- nfsd_file_do_unhash(nf);
+- spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+- }
+- nfsd_file_dispose_list_delayed(&head);
++ nfsd_file_gc_dispose_list(&head);
+ return ret;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 2018f63501c99bb933f42c6e79527e59aa6a6ad2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: add posix ACLs to struct nfsd_attrs
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit c0cbe70742f4a70893cd6e5f6b10b6e89b6db95b ]
+
+pacl and dpacl pointers are added to struct nfsd_attrs, which requires
+that we have an nfsd_attrs_free() function to free them.
+Those nfsv4 functions that can set ACLs now set up these pointers
+based on the passed in NFSv4 ACL.
+
+nfsd_setattr() sets the acls as appropriate.
+
+Errors are handled as with security labels.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/acl.h | 6 ++++--
+ fs/nfsd/nfs4acl.c | 46 +++++++---------------------------------------
+ fs/nfsd/nfs4proc.c | 46 ++++++++++++++++------------------------------
+ fs/nfsd/vfs.c | 9 +++++++++
+ fs/nfsd/vfs.h | 11 +++++++++++
+ 5 files changed, 47 insertions(+), 71 deletions(-)
+
+diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
+index ba14d2f4b64f4..4b7324458a94e 100644
+--- a/fs/nfsd/acl.h
++++ b/fs/nfsd/acl.h
+@@ -38,6 +38,8 @@
+ struct nfs4_acl;
+ struct svc_fh;
+ struct svc_rqst;
++struct nfsd_attrs;
++enum nfs_ftype4;
+
+ int nfs4_acl_bytes(int entries);
+ int nfs4_acl_get_whotype(char *, u32);
+@@ -45,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
+
+ int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
+ struct nfs4_acl **acl);
+-__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct nfs4_acl *acl);
++__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl,
++ struct nfsd_attrs *attr);
+
+ #endif /* LINUX_NFS4_ACL_H */
+diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
+index eaa3a0cf38f14..bb8e2f6d7d03c 100644
+--- a/fs/nfsd/nfs4acl.c
++++ b/fs/nfsd/nfs4acl.c
+@@ -751,58 +751,26 @@ static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl,
+ return ret;
+ }
+
+-__be32
+-nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct nfs4_acl *acl)
++__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl,
++ struct nfsd_attrs *attr)
+ {
+- __be32 error;
+ int host_error;
+- struct dentry *dentry;
+- struct inode *inode;
+- struct posix_acl *pacl = NULL, *dpacl = NULL;
+ unsigned int flags = 0;
+
+- /* Get inode */
+- error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
+- if (error)
+- return error;
+-
+- dentry = fhp->fh_dentry;
+- inode = d_inode(dentry);
++ if (!acl)
++ return nfs_ok;
+
+- if (S_ISDIR(inode->i_mode))
++ if (type == NF4DIR)
+ flags = NFS4_ACL_DIR;
+
+- host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
++ host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl,
++ &attr->na_dpacl, flags);
+ if (host_error == -EINVAL)
+ return nfserr_attrnotsupp;
+- if (host_error < 0)
+- goto out_nfserr;
+-
+- fh_lock(fhp);
+-
+- host_error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, pacl);
+- if (host_error < 0)
+- goto out_drop_lock;
+-
+- if (S_ISDIR(inode->i_mode)) {
+- host_error = set_posix_acl(&init_user_ns, inode,
+- ACL_TYPE_DEFAULT, dpacl);
+- }
+-
+-out_drop_lock:
+- fh_unlock(fhp);
+-
+- posix_acl_release(pacl);
+- posix_acl_release(dpacl);
+-out_nfserr:
+- if (host_error == -EOPNOTSUPP)
+- return nfserr_attrnotsupp;
+ else
+ return nfserrno(host_error);
+ }
+
+-
+ static short
+ ace2type(struct nfs4_ace *ace)
+ {
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 7ebf807f33d98..ffa2806fd5d3b 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -128,26 +128,6 @@ is_create_with_attrs(struct nfsd4_open *open)
+ || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
+ }
+
+-/*
+- * if error occurs when setting the acl, just clear the acl bit
+- * in the returned attr bitmap.
+- */
+-static void
+-do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct nfs4_acl *acl, u32 *bmval)
+-{
+- __be32 status;
+-
+- status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
+- if (status)
+- /*
+- * We should probably fail the whole open at this point,
+- * but we've already created the file, so it's too late;
+- * So this seems the least of evils:
+- */
+- bmval[0] &= ~FATTR4_WORD0_ACL;
+-}
+-
+ static inline void
+ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
+ {
+@@ -281,6 +261,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (host_err)
+ return nfserrno(host_err);
+
++ if (is_create_with_attrs(open))
++ nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs);
++
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
+
+ child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
+@@ -382,8 +365,11 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ if (attrs.na_labelerr)
+ open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
++ if (attrs.na_aclerr)
++ open->op_bmval[0] &= ~FATTR4_WORD0_ACL;
+ out:
+ fh_unlock(fhp);
++ nfsd_attrs_free(&attrs);
+ if (child && !IS_ERR(child))
+ dput(child);
+ fh_drop_write(fhp);
+@@ -446,9 +432,6 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
+ if (status)
+ goto out;
+
+- if (is_create_with_attrs(open) && open->op_acl != NULL)
+- do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval);
+-
+ nfsd4_set_open_owner_reply_cache(cstate, open, *resfh);
+ accmode = NFSD_MAY_NOP;
+ if (open->op_created ||
+@@ -779,6 +762,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (status)
+ return status;
+
++ status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs);
+ current->fs->umask = create->cr_umask;
+ switch (create->cr_type) {
+ case NF4LNK:
+@@ -837,10 +821,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+
+ if (attrs.na_labelerr)
+ create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+-
+- if (create->cr_acl != NULL)
+- do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
+- create->cr_bmval);
++ if (attrs.na_aclerr)
++ create->cr_bmval[0] &= ~FATTR4_WORD0_ACL;
+
+ fh_unlock(&cstate->current_fh);
+ set_change_info(&create->cr_cinfo, &cstate->current_fh);
+@@ -849,6 +831,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ fh_put(&resfh);
+ out_umask:
+ current->fs->umask = 0;
++ nfsd_attrs_free(&attrs);
+ return status;
+ }
+
+@@ -1123,6 +1106,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ .na_iattr = &setattr->sa_iattr,
+ .na_seclabel = &setattr->sa_label,
+ };
++ struct inode *inode;
+ __be32 status = nfs_ok;
+ int err;
+
+@@ -1145,9 +1129,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (status)
+ goto out;
+
+- if (setattr->sa_acl != NULL)
+- status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
+- setattr->sa_acl);
++ inode = cstate->current_fh.fh_dentry->d_inode;
++ status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG,
++ setattr->sa_acl, &attrs);
++
+ if (status)
+ goto out;
+ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
+@@ -1155,6 +1140,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (!status)
+ status = nfserrno(attrs.na_labelerr);
+ out:
++ nfsd_attrs_free(&attrs);
+ fh_drop_write(&cstate->current_fh);
+ return status;
+ }
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index f9f62282d91f8..e91ac3bc68764 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -462,6 +462,15 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (attr->na_seclabel && attr->na_seclabel->len)
+ attr->na_labelerr = security_inode_setsecctx(dentry,
+ attr->na_seclabel->data, attr->na_seclabel->len);
++ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl)
++ attr->na_aclerr = set_posix_acl(&init_user_ns,
++ inode, ACL_TYPE_ACCESS,
++ attr->na_pacl);
++ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) &&
++ !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode))
++ attr->na_aclerr = set_posix_acl(&init_user_ns,
++ inode, ACL_TYPE_DEFAULT,
++ attr->na_dpacl);
+ fh_unlock(fhp);
+ if (size_change)
+ put_write_access(inode);
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index d5d4cfe37c933..c95cd414b4bb0 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -6,6 +6,8 @@
+ #ifndef LINUX_NFSD_VFS_H
+ #define LINUX_NFSD_VFS_H
+
++#include <linux/fs.h>
++#include <linux/posix_acl.h>
+ #include "nfsfh.h"
+ #include "nfsd.h"
+
+@@ -45,10 +47,19 @@ typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
+ struct nfsd_attrs {
+ struct iattr *na_iattr; /* input */
+ struct xdr_netobj *na_seclabel; /* input */
++ struct posix_acl *na_pacl; /* input */
++ struct posix_acl *na_dpacl; /* input */
+
+ int na_labelerr; /* output */
++ int na_aclerr; /* output */
+ };
+
++static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
++{
++ posix_acl_release(attrs->na_pacl);
++ posix_acl_release(attrs->na_dpacl);
++}
++
+ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp);
+ __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+--
+2.43.0
+
--- /dev/null
+From a7b310372860e3b9f1a41177a010799ad4e97614 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: add security label to struct nfsd_attrs
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit d6a97d3f589a3a46a16183e03f3774daee251317 ]
+
+nfsd_setattr() now sets a security label if provided, and nfsv4 provides
+it in the 'open' and 'create' paths and the 'setattr' path.
+If setting the label failed (including because the kernel doesn't
+support labels), an error field in 'struct nfsd_attrs' is set, and the
+caller can respond. The open/create callers clear
+FATTR4_WORD2_SECURITY_LABEL in the returned attr set in this case.
+The setattr caller returns the error.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 49 +++++++++-------------------------------------
+ fs/nfsd/vfs.c | 29 +++------------------------
+ fs/nfsd/vfs.h | 5 +++--
+ 3 files changed, 15 insertions(+), 68 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 96f6fe4f86fd8..7ebf807f33d98 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -64,36 +64,6 @@ MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
+ "idle msecs before unmount export from source server");
+ #endif
+
+-#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+-#include <linux/security.h>
+-
+-static inline void
+-nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
+-{
+- struct inode *inode = d_inode(resfh->fh_dentry);
+- int status;
+-
+- inode_lock(inode);
+- status = security_inode_setsecctx(resfh->fh_dentry,
+- label->data, label->len);
+- inode_unlock(inode);
+-
+- if (status)
+- /*
+- * XXX: We should really fail the whole open, but we may
+- * already have created a new file, so it may be too
+- * late. For now this seems the least of evils:
+- */
+- bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+-
+- return;
+-}
+-#else
+-static inline void
+-nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
+-{ }
+-#endif
+-
+ #define NFSDDBG_FACILITY NFSDDBG_PROC
+
+ static u32 nfsd_attrmask[] = {
+@@ -288,6 +258,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct iattr *iap = &open->op_iattr;
+ struct nfsd_attrs attrs = {
+ .na_iattr = iap,
++ .na_seclabel = &open->op_label,
+ };
+ struct dentry *parent, *child;
+ __u32 v_mtime, v_atime;
+@@ -409,6 +380,8 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ set_attr:
+ status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
+
++ if (attrs.na_labelerr)
++ open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ out:
+ fh_unlock(fhp);
+ if (child && !IS_ERR(child))
+@@ -450,9 +423,6 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
+ status = nfsd4_create_file(rqstp, current_fh, *resfh, open);
+ current->fs->umask = 0;
+
+- if (!status && open->op_label.len)
+- nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
+-
+ /*
+ * Following rfc 3530 14.2.16, and rfc 5661 18.16.4
+ * use the returned bitmask to indicate which attributes
+@@ -792,6 +762,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_create *create = &u->create;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &create->cr_iattr,
++ .na_seclabel = &create->cr_label,
+ };
+ struct svc_fh resfh;
+ __be32 status;
+@@ -864,8 +835,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (status)
+ goto out;
+
+- if (create->cr_label.len)
+- nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval);
++ if (attrs.na_labelerr)
++ create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
+ if (create->cr_acl != NULL)
+ do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
+@@ -1150,6 +1121,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_setattr *setattr = &u->setattr;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &setattr->sa_iattr,
++ .na_seclabel = &setattr->sa_label,
+ };
+ __be32 status = nfs_ok;
+ int err;
+@@ -1178,13 +1150,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ setattr->sa_acl);
+ if (status)
+ goto out;
+- if (setattr->sa_label.len)
+- status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh,
+- &setattr->sa_label);
+- if (status)
+- goto out;
+ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
+ 0, (time64_t)0);
++ if (!status)
++ status = nfserrno(attrs.na_labelerr);
+ out:
+ fh_drop_write(&cstate->current_fh);
+ return status;
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index bfdb42aa23a01..f9f62282d91f8 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -459,6 +459,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+
+ out_unlock:
++ if (attr->na_seclabel && attr->na_seclabel->len)
++ attr->na_labelerr = security_inode_setsecctx(dentry,
++ attr->na_seclabel->data, attr->na_seclabel->len);
+ fh_unlock(fhp);
+ if (size_change)
+ put_write_access(inode);
+@@ -497,32 +500,6 @@ int nfsd4_is_junction(struct dentry *dentry)
+ return 0;
+ return 1;
+ }
+-#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+-__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct xdr_netobj *label)
+-{
+- __be32 error;
+- int host_error;
+- struct dentry *dentry;
+-
+- error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
+- if (error)
+- return error;
+-
+- dentry = fhp->fh_dentry;
+-
+- inode_lock(d_inode(dentry));
+- host_error = security_inode_setsecctx(dentry, label->data, label->len);
+- inode_unlock(d_inode(dentry));
+- return nfserrno(host_error);
+-}
+-#else
+-__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct xdr_netobj *label)
+-{
+- return nfserr_notsupp;
+-}
+-#endif
+
+ static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
+ {
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 5047cec4c423c..d5d4cfe37c933 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -44,6 +44,9 @@ typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
+ /* nfsd/vfs.c */
+ struct nfsd_attrs {
+ struct iattr *na_iattr; /* input */
++ struct xdr_netobj *na_seclabel; /* input */
++
++ int na_labelerr; /* output */
+ };
+
+ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+@@ -57,8 +60,6 @@ __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+ struct nfsd_attrs *, int, time64_t);
+ int nfsd_mountpoint(struct dentry *, struct svc_export *);
+ #ifdef CONFIG_NFSD_V4
+-__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
+- struct xdr_netobj *);
+ __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
+ struct file *, loff_t, loff_t, int);
+ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+--
+2.43.0
+
--- /dev/null
+From b8bedb849ee8d6c726b6493838a2de26cfad9f37 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Sep 2022 08:54:26 -0700
+Subject: NFSD: add shrinker to reap courtesy clients on low memory condition
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 7746b32f467b3813fb61faaab3258de35806a7ac ]
+
+Add courtesy_client_reaper to react to low memory condition triggered
+by the system memory shrinker.
+
+The delayed_work for the courtesy_client_reaper is scheduled on
+the shrinker's count callback using the laundry_wq.
+
+The shrinker's scan callback is not used for expiring the courtesy
+clients due to potential deadlocks.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 2 +
+ fs/nfsd/nfs4state.c | 94 +++++++++++++++++++++++++++++++++++++++++----
+ fs/nfsd/nfsctl.c | 6 ++-
+ fs/nfsd/nfsd.h | 6 ++-
+ 4 files changed, 96 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 55c7006d6109a..8c854ba3285bb 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -194,6 +194,8 @@ struct nfsd_net {
+ int nfs4_max_clients;
+
+ atomic_t nfsd_courtesy_clients;
++ struct shrinker nfsd_client_shrinker;
++ struct delayed_work nfsd_shrinker_work;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 6a7a99511111d..8cbb66b07d519 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4366,7 +4366,27 @@ nfsd4_init_slabs(void)
+ return -ENOMEM;
+ }
+
+-void nfsd4_init_leases_net(struct nfsd_net *nn)
++static unsigned long
++nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
++{
++ int cnt;
++ struct nfsd_net *nn = container_of(shrink,
++ struct nfsd_net, nfsd_client_shrinker);
++
++ cnt = atomic_read(&nn->nfsd_courtesy_clients);
++ if (cnt > 0)
++ mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
++ return (unsigned long)cnt;
++}
++
++static unsigned long
++nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
++{
++ return SHRINK_STOP;
++}
++
++int
++nfsd4_init_leases_net(struct nfsd_net *nn)
+ {
+ struct sysinfo si;
+ u64 max_clients;
+@@ -4387,6 +4407,16 @@ void nfsd4_init_leases_net(struct nfsd_net *nn)
+ nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
+
+ atomic_set(&nn->nfsd_courtesy_clients, 0);
++ nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
++ nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
++ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
++ return register_shrinker(&nn->nfsd_client_shrinker);
++}
++
++void
++nfsd4_leases_net_shutdown(struct nfsd_net *nn)
++{
++ unregister_shrinker(&nn->nfsd_client_shrinker);
+ }
+
+ static void init_nfs4_replay(struct nfs4_replay *rp)
+@@ -5959,10 +5989,49 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
+ spin_unlock(&nn->client_lock);
+ }
+
++static void
++nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn,
++ struct list_head *reaplist)
++{
++ unsigned int maxreap = 0, reapcnt = 0;
++ struct list_head *pos, *next;
++ struct nfs4_client *clp;
++
++ maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN;
++ INIT_LIST_HEAD(reaplist);
++
++ spin_lock(&nn->client_lock);
++ list_for_each_safe(pos, next, &nn->client_lru) {
++ clp = list_entry(pos, struct nfs4_client, cl_lru);
++ if (clp->cl_state == NFSD4_ACTIVE)
++ break;
++ if (reapcnt >= maxreap)
++ break;
++ if (!mark_client_expired_locked(clp)) {
++ list_add(&clp->cl_lru, reaplist);
++ reapcnt++;
++ }
++ }
++ spin_unlock(&nn->client_lock);
++}
++
++static void
++nfs4_process_client_reaplist(struct list_head *reaplist)
++{
++ struct list_head *pos, *next;
++ struct nfs4_client *clp;
++
++ list_for_each_safe(pos, next, reaplist) {
++ clp = list_entry(pos, struct nfs4_client, cl_lru);
++ trace_nfsd_clid_purged(&clp->cl_clientid);
++ list_del_init(&clp->cl_lru);
++ expire_client(clp);
++ }
++}
++
+ static time64_t
+ nfs4_laundromat(struct nfsd_net *nn)
+ {
+- struct nfs4_client *clp;
+ struct nfs4_openowner *oo;
+ struct nfs4_delegation *dp;
+ struct nfs4_ol_stateid *stp;
+@@ -5991,12 +6060,8 @@ nfs4_laundromat(struct nfsd_net *nn)
+ }
+ spin_unlock(&nn->s2s_cp_lock);
+ nfs4_get_client_reaplist(nn, &reaplist, <);
+- list_for_each_safe(pos, next, &reaplist) {
+- clp = list_entry(pos, struct nfs4_client, cl_lru);
+- trace_nfsd_clid_purged(&clp->cl_clientid);
+- list_del_init(&clp->cl_lru);
+- expire_client(clp);
+- }
++ nfs4_process_client_reaplist(&reaplist);
++
+ spin_lock(&state_lock);
+ list_for_each_safe(pos, next, &nn->del_recall_lru) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+@@ -6079,6 +6144,18 @@ laundromat_main(struct work_struct *laundry)
+ queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
+ }
+
++static void
++courtesy_client_reaper(struct work_struct *reaper)
++{
++ struct list_head reaplist;
++ struct delayed_work *dwork = to_delayed_work(reaper);
++ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
++ nfsd_shrinker_work);
++
++ nfs4_get_courtesy_client_reaplist(nn, &reaplist);
++ nfs4_process_client_reaplist(&reaplist);
++}
++
+ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
+ {
+ if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
+@@ -7897,6 +7974,7 @@ static int nfs4_state_create_net(struct net *net)
+ INIT_LIST_HEAD(&nn->blocked_locks_lru);
+
+ INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
++ INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ get_net(net);
+
+ return 0;
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 917fa1892fd2d..597a26ad4183f 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1481,11 +1481,12 @@ static __net_init int nfsd_init_net(struct net *net)
+ goto out_idmap_error;
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
++ retval = nfsd4_init_leases_net(nn);
++ if (retval)
++ goto out_drc_error;
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_drc_error;
+- nfsd4_init_leases_net(nn);
+-
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
+
+@@ -1507,6 +1508,7 @@ static __net_exit void nfsd_exit_net(struct net *net)
+ nfsd_idmap_shutdown(net);
+ nfsd_export_shutdown(net);
+ nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
++ nfsd4_leases_net_shutdown(nn);
+ }
+
+ static struct pernet_operations nfsd_net_ops = {
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 6ab4ad41ae84e..09726c5b9a317 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -505,7 +505,8 @@ extern void unregister_cld_notifier(void);
+ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
+ #endif
+
+-extern void nfsd4_init_leases_net(struct nfsd_net *nn);
++extern int nfsd4_init_leases_net(struct nfsd_net *nn);
++extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
+
+ #else /* CONFIG_NFSD_V4 */
+ static inline int nfsd4_is_junction(struct dentry *dentry)
+@@ -513,7 +514,8 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
+ return 0;
+ }
+
+-static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {};
++static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
++static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
+
+ #define register_cld_notifier() 0
+ #define unregister_cld_notifier() do { } while(0)
+--
+2.43.0
+
--- /dev/null
+From e66920294bcd2d442771ef8f15fdfeeff628e18e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:12 -0500
+Subject: nfsd: add some comments to nfsd_file_do_acquire
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b680cb9b737331aad271feebbedafb865504e234 ]
+
+David Howells mentioned that he found this bit of code confusing, so
+sprinkle in some comments to clarify.
+
+Reported-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 43bb2fd47cf58..faa0c7d0253eb 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1093,6 +1093,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ rcu_read_unlock();
+
+ if (nf) {
++ /*
++ * If the nf is on the LRU then it holds an extra reference
++ * that must be put if it's removed. It had better not be
++ * the last one however, since we should hold another.
++ */
+ if (nfsd_file_lru_remove(nf))
+ WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
+ goto wait_for_construction;
+--
+2.43.0
+
--- /dev/null
+From ff37a4d344eca7f819cb5977b8aaafc566459af4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 14:19:26 -0700
+Subject: NFSD: add support for lock conflict to courteous server
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 27431affb0dbc259ac6ffe6071243a576c8f38f1 ]
+
+This patch allows expired client with lock state to be in COURTESY
+state. Lock conflict with COURTESY client is resolved by the fs/lock
+code using the lm_lock_expirable and lm_expire_lock callback in the
+struct lock_manager_operations.
+
+If conflict client is in COURTESY state, set it to EXPIRABLE and
+schedule the laundromat to run immediately to expire the client. The
+callback lm_expire_lock waits for the laundromat to flush its work
+queue before returning to caller.
+
+Reviewed-by: J. Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 54 insertions(+), 16 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 582c9c7ba60a8..447faa4348227 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5731,39 +5731,51 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+ }
+ #endif
+
++/* Check if any lock belonging to this lockowner has any blockers */
+ static bool
+-nfs4_has_any_locks(struct nfs4_client *clp)
++nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
++{
++ struct file_lock_context *ctx;
++ struct nfs4_ol_stateid *stp;
++ struct nfs4_file *nf;
++
++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
++ nf = stp->st_stid.sc_file;
++ ctx = nf->fi_inode->i_flctx;
++ if (!ctx)
++ continue;
++ if (locks_owner_has_blockers(ctx, lo))
++ return true;
++ }
++ return false;
++}
++
++static bool
++nfs4_anylock_blockers(struct nfs4_client *clp)
+ {
+ int i;
+ struct nfs4_stateowner *so;
++ struct nfs4_lockowner *lo;
+
++ if (atomic_read(&clp->cl_delegs_in_recall))
++ return true;
+ spin_lock(&clp->cl_lock);
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i],
+ so_strhash) {
+ if (so->so_is_open_owner)
+ continue;
+- spin_unlock(&clp->cl_lock);
+- return true;
++ lo = lockowner(so);
++ if (nfs4_lockowner_has_blockers(lo)) {
++ spin_unlock(&clp->cl_lock);
++ return true;
++ }
+ }
+ }
+ spin_unlock(&clp->cl_lock);
+ return false;
+ }
+
+-/*
+- * place holder for now, no check for lock blockers yet
+- */
+-static bool
+-nfs4_anylock_blockers(struct nfs4_client *clp)
+-{
+- if (atomic_read(&clp->cl_delegs_in_recall) ||
+- !list_empty(&clp->async_copies) ||
+- nfs4_has_any_locks(clp))
+- return true;
+- return false;
+-}
+-
+ static void
+ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
+ struct laundry_time *lt)
+@@ -6729,6 +6741,29 @@ nfsd4_lm_put_owner(fl_owner_t owner)
+ nfs4_put_stateowner(&lo->lo_owner);
+ }
+
++/* return pointer to struct nfs4_client if client is expirable */
++static bool
++nfsd4_lm_lock_expirable(struct file_lock *cfl)
++{
++ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner;
++ struct nfs4_client *clp = lo->lo_owner.so_client;
++ struct nfsd_net *nn;
++
++ if (try_to_expire_client(clp)) {
++ nn = net_generic(clp->net, nfsd_net_id);
++ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
++ return true;
++ }
++ return false;
++}
++
++/* schedule laundromat to run immediately and wait for it to complete */
++static void
++nfsd4_lm_expire_lock(void)
++{
++ flush_workqueue(laundry_wq);
++}
++
+ static void
+ nfsd4_lm_notify(struct file_lock *fl)
+ {
+@@ -6755,9 +6790,12 @@ nfsd4_lm_notify(struct file_lock *fl)
+ }
+
+ static const struct lock_manager_operations nfsd_posix_mng_ops = {
++ .lm_mod_owner = THIS_MODULE,
+ .lm_notify = nfsd4_lm_notify,
+ .lm_get_owner = nfsd4_lm_get_owner,
+ .lm_put_owner = nfsd4_lm_put_owner,
++ .lm_lock_expirable = nfsd4_lm_lock_expirable,
++ .lm_expire_lock = nfsd4_lm_expire_lock,
+ };
+
+ static inline void
+--
+2.43.0
+
--- /dev/null
+From 2ef20ef14c852ecdb34be0df1ac97d826becc9df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 19:44:46 -0800
+Subject: NFSD: add support for sending CB_RECALL_ANY
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 3959066b697b5dfbb7141124ae9665337d4bc638 ]
+
+Add XDR encode and decode function for CB_RECALL_ANY.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4callback.c | 72 ++++++++++++++++++++++++++++++++++++++++++
+ fs/nfsd/state.h | 1 +
+ fs/nfsd/xdr4.h | 5 +++
+ fs/nfsd/xdr4cb.h | 6 ++++
+ 4 files changed, 84 insertions(+)
+
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 39989c14c8a1e..4eae2c5af2edf 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -76,6 +76,17 @@ static __be32 *xdr_encode_empty_array(__be32 *p)
+ * 1 Protocol"
+ */
+
++static void encode_uint32(struct xdr_stream *xdr, u32 n)
++{
++ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
++}
++
++static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
++ size_t len)
++{
++ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
++}
++
+ /*
+ * nfs_cb_opnum4
+ *
+@@ -328,6 +339,24 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
+ hdr->nops++;
+ }
+
++/*
++ * CB_RECALLANY4args
++ *
++ * struct CB_RECALLANY4args {
++ * uint32_t craa_objects_to_keep;
++ * bitmap4 craa_type_mask;
++ * };
++ */
++static void
++encode_cb_recallany4args(struct xdr_stream *xdr,
++ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
++{
++ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
++ encode_uint32(xdr, ra->ra_keep);
++ encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval));
++ hdr->nops++;
++}
++
+ /*
+ * CB_SEQUENCE4args
+ *
+@@ -482,6 +511,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
+ encode_cb_nops(&hdr);
+ }
+
++/*
++ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
++ */
++static void
++nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
++ struct xdr_stream *xdr, const void *data)
++{
++ const struct nfsd4_callback *cb = data;
++ struct nfsd4_cb_recall_any *ra;
++ struct nfs4_cb_compound_hdr hdr = {
++ .ident = cb->cb_clp->cl_cb_ident,
++ .minorversion = cb->cb_clp->cl_minorversion,
++ };
++
++ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
++ encode_cb_compound4args(xdr, &hdr);
++ encode_cb_sequence4args(xdr, cb, &hdr);
++ encode_cb_recallany4args(xdr, &hdr, ra);
++ encode_cb_nops(&hdr);
++}
+
+ /*
+ * NFSv4.0 and NFSv4.1 XDR decode functions
+@@ -520,6 +569,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
+ return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
+ }
+
++/*
++ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
++ */
++static int
++nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
++ struct xdr_stream *xdr,
++ void *data)
++{
++ struct nfsd4_callback *cb = data;
++ struct nfs4_cb_compound_hdr hdr;
++ int status;
++
++ status = decode_cb_compound4res(xdr, &hdr);
++ if (unlikely(status))
++ return status;
++ status = decode_cb_sequence4res(xdr, cb);
++ if (unlikely(status || cb->cb_seq_status))
++ return status;
++ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
++ return status;
++}
++
+ #ifdef CONFIG_NFSD_PNFS
+ /*
+ * CB_LAYOUTRECALL4args
+@@ -783,6 +854,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
+ #endif
+ PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
+ PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
++ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
+ };
+
+ static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index eadd7f465bf52..e30882f8b8516 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -636,6 +636,7 @@ enum nfsd4_cb_op {
+ NFSPROC4_CLNT_CB_OFFLOAD,
+ NFSPROC4_CLNT_CB_SEQUENCE,
+ NFSPROC4_CLNT_CB_NOTIFY_LOCK,
++ NFSPROC4_CLNT_CB_RECALL_ANY,
+ };
+
+ /* Returns true iff a is later than b: */
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 0eb00105d845b..4fd2cf6d1d2dc 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -896,5 +896,10 @@ struct nfsd4_operation {
+ union nfsd4_op_u *);
+ };
+
++struct nfsd4_cb_recall_any {
++ struct nfsd4_callback ra_cb;
++ u32 ra_keep;
++ u32 ra_bmval[1];
++};
+
+ #endif
+diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
+index 547cf07cf4e08..0d39af1b00a0f 100644
+--- a/fs/nfsd/xdr4cb.h
++++ b/fs/nfsd/xdr4cb.h
+@@ -48,3 +48,9 @@
+ #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
++#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
++ cb_sequence_enc_sz + \
++ 1 + 1 + 1)
++#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
++ cb_sequence_dec_sz + \
++ op_dec_sz)
+--
+2.43.0
+
--- /dev/null
+From b865c307e525e1b17b31e4cb95921a55242a059c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 14:19:22 -0700
+Subject: NFSD: add support for share reservation conflict to courteous server
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 3d69427151806656abf129342028f3f4e5e1fee0 ]
+
+This patch allows expired client with open state to be in COURTESY
+state. Share/access conflict with COURTESY client is resolved by
+setting COURTESY client to EXPIRABLE state, schedule laundromat
+to run and returning nfserr_jukebox to the request client.
+
+Reviewed-by: J. Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 109 ++++++++++++++++++++++++++++++++++++++++----
+ 1 file changed, 101 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 1e15bb2d8382b..3dfdd9e1c5028 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -705,6 +705,57 @@ static unsigned int file_hashval(struct svc_fh *fh)
+
+ static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
+
++/*
++ * Check if courtesy clients have conflicting access and resolve it if possible
++ *
++ * access: is op_share_access if share_access is true.
++ * Check if access mode, op_share_access, would conflict with
++ * the current deny mode of the file 'fp'.
++ * access: is op_share_deny if share_access is false.
++ * Check if the deny mode, op_share_deny, would conflict with
++ * current access of the file 'fp'.
++ * stp: skip checking this entry.
++ * new_stp: normal open, not open upgrade.
++ *
++ * Function returns:
++ * false - access/deny mode conflict with normal client.
++ * true - no conflict or conflict with courtesy client(s) is resolved.
++ */
++static bool
++nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp,
++ struct nfs4_ol_stateid *stp, u32 access, bool share_access)
++{
++ struct nfs4_ol_stateid *st;
++ bool resolvable = true;
++ unsigned char bmap;
++ struct nfsd_net *nn;
++ struct nfs4_client *clp;
++
++ lockdep_assert_held(&fp->fi_lock);
++ list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
++ /* ignore lock stateid */
++ if (st->st_openstp)
++ continue;
++ if (st == stp && new_stp)
++ continue;
++ /* check file access against deny mode or vice versa */
++ bmap = share_access ? st->st_deny_bmap : st->st_access_bmap;
++ if (!(access & bmap_to_share_mode(bmap)))
++ continue;
++ clp = st->st_stid.sc_client;
++ if (try_to_expire_client(clp))
++ continue;
++ resolvable = false;
++ break;
++ }
++ if (resolvable) {
++ clp = stp->st_stid.sc_client;
++ nn = net_generic(clp->net, nfsd_net_id);
++ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
++ }
++ return resolvable;
++}
++
+ static void
+ __nfs4_file_get_access(struct nfs4_file *fp, u32 access)
+ {
+@@ -4985,7 +5036,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
+
+ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
+- struct nfsd4_open *open)
++ struct nfsd4_open *open, bool new_stp)
+ {
+ struct nfsd_file *nf = NULL;
+ __be32 status;
+@@ -5001,6 +5052,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ */
+ status = nfs4_file_check_deny(fp, open->op_share_deny);
+ if (status != nfs_ok) {
++ if (status != nfserr_share_denied) {
++ spin_unlock(&fp->fi_lock);
++ goto out;
++ }
++ if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
++ stp, open->op_share_deny, false))
++ status = nfserr_jukebox;
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+@@ -5008,6 +5066,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ /* set access to the file */
+ status = nfs4_file_get_access(fp, open->op_share_access);
+ if (status != nfs_ok) {
++ if (status != nfserr_share_denied) {
++ spin_unlock(&fp->fi_lock);
++ goto out;
++ }
++ if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
++ stp, open->op_share_access, true))
++ status = nfserr_jukebox;
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+@@ -5054,21 +5119,29 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ }
+
+ static __be32
+-nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
++nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp,
++ struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
++ struct nfsd4_open *open)
+ {
+ __be32 status;
+ unsigned char old_deny_bmap = stp->st_deny_bmap;
+
+ if (!test_access(open->op_share_access, stp))
+- return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
++ return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false);
+
+ /* test and set deny mode */
+ spin_lock(&fp->fi_lock);
+ status = nfs4_file_check_deny(fp, open->op_share_deny);
+ if (status == nfs_ok) {
+- set_deny(open->op_share_deny, stp);
+- fp->fi_share_deny |=
++ if (status != nfserr_share_denied) {
++ set_deny(open->op_share_deny, stp);
++ fp->fi_share_deny |=
+ (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
++ } else {
++ if (nfs4_resolve_deny_conflicts_locked(fp, false,
++ stp, open->op_share_deny, false))
++ status = nfserr_jukebox;
++ }
+ }
+ spin_unlock(&fp->fi_lock);
+
+@@ -5409,7 +5482,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ goto out;
+ }
+ } else {
+- status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
++ status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
+ if (status) {
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
+ release_open_stateid(stp);
+@@ -5643,6 +5716,26 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+ }
+ #endif
+
++static bool
++nfs4_has_any_locks(struct nfs4_client *clp)
++{
++ int i;
++ struct nfs4_stateowner *so;
++
++ spin_lock(&clp->cl_lock);
++ for (i = 0; i < OWNER_HASH_SIZE; i++) {
++ list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i],
++ so_strhash) {
++ if (so->so_is_open_owner)
++ continue;
++ spin_unlock(&clp->cl_lock);
++ return true;
++ }
++ }
++ spin_unlock(&clp->cl_lock);
++ return false;
++}
++
+ /*
+ * place holder for now, no check for lock blockers yet
+ */
+@@ -5650,8 +5743,8 @@ static bool
+ nfs4_anylock_blockers(struct nfs4_client *clp)
+ {
+ if (atomic_read(&clp->cl_delegs_in_recall) ||
+- client_has_openowners(clp) ||
+- !list_empty(&clp->async_copies))
++ !list_empty(&clp->async_copies) ||
++ nfs4_has_any_locks(clp))
+ return true;
+ return false;
+ }
+--
+2.43.0
+
--- /dev/null
+From 992a7daef109458b80ef796ff780cefc9a9cdc5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jan 2022 13:08:42 +0100
+Subject: nfsd: Add support for the birth time attribute
+
+From: Ondrej Valousek <ondrej.valousek.xm@renesas.com>
+
+[ Upstream commit e377a3e698fb56cb63f6bddbebe7da76dc37e316 ]
+
+For filesystems that supports "btime" timestamp (i.e. most modern
+filesystems do) we share it via kernel nfsd. Btime support for NFS
+client has already been added by Trond recently.
+
+Suggested-by: Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Ondrej Valousek <ondrej.valousek.xm@renesas.com>
+[ cel: addressed some whitespace/checkpatch nits ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 10 ++++++++++
+ fs/nfsd/nfsd.h | 2 +-
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 19ddd80239944..771d3057577ef 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2851,6 +2851,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ if (err)
+ goto out_nfserr;
++ if (!(stat.result_mask & STATX_BTIME))
++ /* underlying FS does not offer btime so we can't share it */
++ bmval1 &= ~FATTR4_WORD1_TIME_CREATE;
+ if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
+ (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+@@ -3251,6 +3254,13 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
+ *p++ = cpu_to_be32(stat.mtime.tv_nsec);
+ }
++ if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
++ p = xdr_reserve_space(xdr, 12);
++ if (!p)
++ goto out_resource;
++ p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec);
++ *p++ = cpu_to_be32(stat.btime.tv_nsec);
++ }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ struct kstat parent_stat;
+ u64 ino = stat.ino;
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 3e5008b475ff0..4fc1fd639527a 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -364,7 +364,7 @@ void nfsd_lockd_shutdown(void);
+ | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
+ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
+ | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
+- | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
++ | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_CREATE \
+ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
+
+ #define NFSD4_SUPPORTED_ATTRS_WORD2 0
+--
+2.43.0
+
--- /dev/null
+From b4be75b002d9c7a6c6a4e3fb53dd7491588bcf2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:13:54 -0400
+Subject: NFSD: Add tracepoints to report NFSv4 callback completions
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 1035d65446a018ca2dd179e29a2fcd6d29057781 ]
+
+Wireshark has always been lousy about dissecting NFSv4 callbacks,
+especially NFSv4.0 backchannel requests. Add tracepoints so we
+can surgically capture these events in the trace log.
+
+Tracepoints are time-stamped and ordered so that we can now observe
+the timing relationship between a CB_RECALL Reply and the client's
+DELEGRETURN Call. Example:
+
+ nfsd-1153 [002] 211.986391: nfsd_cb_recall: addr=192.168.1.67:45767 client 62ea82e4:fee7492a stateid 00000003:00000001
+
+ nfsd-1153 [002] 212.095634: nfsd_compound: xid=0x0000002c opcnt=2
+ nfsd-1153 [002] 212.095647: nfsd_compound_status: op=1/2 OP_PUTFH status=0
+ nfsd-1153 [002] 212.095658: nfsd_file_put: hash=0xf72 inode=0xffff9291148c7410 ref=3 flags=HASHED|REFERENCED may=READ file=0xffff929103b3ea00
+ nfsd-1153 [002] 212.095661: nfsd_compound_status: op=2/2 OP_DELEGRETURN status=0
+ kworker/u25:8-148 [002] 212.096713: nfsd_cb_recall_done: client 62ea82e4:fee7492a stateid 00000003:00000001 status=0
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4layouts.c | 2 +-
+ fs/nfsd/nfs4proc.c | 4 ++++
+ fs/nfsd/nfs4state.c | 4 ++++
+ fs/nfsd/trace.h | 39 +++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 48 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
+index 7018d209b784a..e4e23b2a3e655 100644
+--- a/fs/nfsd/nfs4layouts.c
++++ b/fs/nfsd/nfs4layouts.c
+@@ -657,7 +657,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
+ ktime_t now, cutoff;
+ const struct nfsd4_layout_ops *ops;
+
+-
++ trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
+ switch (task->tk_status) {
+ case 0:
+ case -NFS4ERR_DELAY:
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index e4c0dc577fe35..ce8062c959315 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1604,6 +1604,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
+ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+ {
++ struct nfsd4_cb_offload *cbo =
++ container_of(cb, struct nfsd4_cb_offload, co_cb);
++
++ trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task);
+ return 1;
+ }
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 66cf8217ebe57..a0c5658599ead 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -357,6 +357,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+ static int
+ nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
+ {
++ trace_nfsd_cb_notify_lock_done(&zero_stateid, task);
++
+ /*
+ * Since this is just an optimization, we don't try very hard if it
+ * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
+@@ -4760,6 +4762,8 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
+ {
+ struct nfs4_delegation *dp = cb_to_delegation(cb);
+
++ trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
++
+ if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
+ dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
+ return 1;
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index c5d4a258680c3..d832429e575e4 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -1449,6 +1449,45 @@ TRACE_EVENT(nfsd_cb_offload,
+ __entry->fh_hash, __entry->count, __entry->status)
+ );
+
++DECLARE_EVENT_CLASS(nfsd_cb_done_class,
++ TP_PROTO(
++ const stateid_t *stp,
++ const struct rpc_task *task
++ ),
++ TP_ARGS(stp, task),
++ TP_STRUCT__entry(
++ __field(u32, cl_boot)
++ __field(u32, cl_id)
++ __field(u32, si_id)
++ __field(u32, si_generation)
++ __field(int, status)
++ ),
++ TP_fast_assign(
++ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
++ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
++ __entry->si_id = stp->si_opaque.so_id;
++ __entry->si_generation = stp->si_generation;
++ __entry->status = task->tk_status;
++ ),
++ TP_printk("client %08x:%08x stateid %08x:%08x status=%d",
++ __entry->cl_boot, __entry->cl_id, __entry->si_id,
++ __entry->si_generation, __entry->status
++ )
++);
++
++#define DEFINE_NFSD_CB_DONE_EVENT(name) \
++DEFINE_EVENT(nfsd_cb_done_class, name, \
++ TP_PROTO( \
++ const stateid_t *stp, \
++ const struct rpc_task *task \
++ ), \
++ TP_ARGS(stp, task))
++
++DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
++DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
++DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
++DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
++
+ #endif /* _NFSD_TRACE_H */
+
+ #undef TRACE_INCLUDE_PATH
+--
+2.43.0
+
--- /dev/null
+From 39e3827f778a38c8f5d809bac0acd8fff8f70e9f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 07:47:56 -0400
+Subject: nfsd: allow disabling NFSv2 at compile time
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 2f3a4b2ac2f28b9be78ad21f401f31e263845214 ]
+
+rpc.nfsd stopped supporting NFSv2 a year ago. Take the next logical
+step toward deprecating it and allow NFSv2 support to be compiled out.
+
+Add a new CONFIG_NFSD_V2 option that can be turned off and rework the
+CONFIG_NFSD_V?_ACL option dependencies. Add a description that
+discourages enabling it.
+
+Also, change the description of CONFIG_NFSD to state that the always-on
+version is now 3 instead of 2.
+
+Finally, add an #ifdef around "case 2:" in __write_versions. When NFSv2
+is disabled at compile time, this should make the kernel ignore attempts
+to disable it at runtime, but still error out when trying to enable it.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Tom Talpey <tom@talpey.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/Kconfig | 19 +++++++++++++++----
+ fs/nfsd/Makefile | 5 +++--
+ fs/nfsd/nfsctl.c | 2 ++
+ fs/nfsd/nfsd.h | 3 +--
+ fs/nfsd/nfssvc.c | 6 ++++++
+ 5 files changed, 27 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
+index b83a6e3bf8080..7f071519fb2e0 100644
+--- a/fs/nfsd/Kconfig
++++ b/fs/nfsd/Kconfig
+@@ -8,6 +8,7 @@ config NFSD
+ select SUNRPC
+ select EXPORTFS
+ select NFS_ACL_SUPPORT if NFSD_V2_ACL
++ select NFS_ACL_SUPPORT if NFSD_V3_ACL
+ depends on MULTIUSER
+ help
+ Choose Y here if you want to allow other computers to access
+@@ -26,19 +27,29 @@ config NFSD
+
+ Below you can choose which versions of the NFS protocol are
+ available to clients mounting the NFS server on this system.
+- Support for NFS version 2 (RFC 1094) is always available when
++ Support for NFS version 3 (RFC 1813) is always available when
+ CONFIG_NFSD is selected.
+
+ If unsure, say N.
+
+-config NFSD_V2_ACL
+- bool
++config NFSD_V2
++ bool "NFS server support for NFS version 2 (DEPRECATED)"
+ depends on NFSD
++ default n
++ help
++ NFSv2 (RFC 1094) was the first publicly-released version of NFS.
++ Unless you are hosting ancient (1990's era) NFS clients, you don't
++ need this.
++
++ If unsure, say N.
++
++config NFSD_V2_ACL
++ bool "NFS server support for the NFSv2 ACL protocol extension"
++ depends on NFSD_V2
+
+ config NFSD_V3_ACL
+ bool "NFS server support for the NFSv3 ACL protocol extension"
+ depends on NFSD
+- select NFSD_V2_ACL
+ help
+ Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
+ never became an official part of the NFS version 3 protocol.
+diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
+index 805c06d5f1b4b..6fffc8f03f740 100644
+--- a/fs/nfsd/Makefile
++++ b/fs/nfsd/Makefile
+@@ -10,9 +10,10 @@ obj-$(CONFIG_NFSD) += nfsd.o
+ # this one should be compiled first, as the tracing macros can easily blow up
+ nfsd-y += trace.o
+
+-nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+- export.o auth.o lockd.o nfscache.o nfsxdr.o \
++nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \
++ export.o auth.o lockd.o nfscache.o \
+ stats.o filecache.o nfs3proc.o nfs3xdr.o
++nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
+ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 68ed42fd29fc8..d1e581a60480c 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -581,7 +581,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
+
+ cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
+ switch(num) {
++#ifdef CONFIG_NFSD_V2
+ case 2:
++#endif
+ case 3:
+ nfsd_vers(nn, num, cmd);
+ break;
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 09726c5b9a317..93b42ef9ed91b 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -64,8 +64,7 @@ struct readdir_cd {
+
+
+ extern struct svc_program nfsd_program;
+-extern const struct svc_version nfsd_version2, nfsd_version3,
+- nfsd_version4;
++extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
+ extern struct mutex nfsd_mutex;
+ extern spinlock_t nfsd_drc_lock;
+ extern unsigned long nfsd_drc_max_mem;
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 8b1afde192118..429f38c986280 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -91,8 +91,12 @@ unsigned long nfsd_drc_mem_used;
+ #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ static struct svc_stat nfsd_acl_svcstats;
+ static const struct svc_version *nfsd_acl_version[] = {
++# if defined(CONFIG_NFSD_V2_ACL)
+ [2] = &nfsd_acl_version2,
++# endif
++# if defined(CONFIG_NFSD_V3_ACL)
+ [3] = &nfsd_acl_version3,
++# endif
+ };
+
+ #define NFSD_ACL_MINVERS 2
+@@ -116,7 +120,9 @@ static struct svc_stat nfsd_acl_svcstats = {
+ #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+
+ static const struct svc_version *nfsd_version[] = {
++#if defined(CONFIG_NFSD_V2)
+ [2] = &nfsd_version2,
++#endif
+ [3] = &nfsd_version3,
+ #if defined(CONFIG_NFSD_V4)
+ [4] = &nfsd_version4,
+--
+2.43.0
+
--- /dev/null
+From ac437762dc04468ae96b618199d2c97c22c7ebc9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:33:47 -0500
+Subject: nfsd: allow nfsd_file_get to sanely handle a NULL pointer
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 70f62231cdfd52357836733dd31db787e0412ab2 ]
+
+...and remove some now-useless NULL pointer checks in its callers.
+
+Suggested-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 5 ++---
+ fs/nfsd/nfs4state.c | 4 +---
+ 2 files changed, 3 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 68c7c82f8b3bb..206742bbbd682 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -451,7 +451,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf)
+ struct nfsd_file *
+ nfsd_file_get(struct nfsd_file *nf)
+ {
+- if (likely(refcount_inc_not_zero(&nf->nf_ref)))
++ if (nf && refcount_inc_not_zero(&nf->nf_ref))
+ return nf;
+ return NULL;
+ }
+@@ -1106,8 +1106,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ rcu_read_lock();
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
+- if (nf)
+- nf = nfsd_file_get(nf);
++ nf = nfsd_file_get(nf);
+ rcu_read_unlock();
+
+ if (nf) {
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 5c261cc807e8e..628e564e530bf 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -602,9 +602,7 @@ put_nfs4_file(struct nfs4_file *fi)
+ static struct nfsd_file *
+ __nfs4_get_fd(struct nfs4_file *f, int oflag)
+ {
+- if (f->fi_fds[oflag])
+- return nfsd_file_get(f->fi_fds[oflag]);
+- return NULL;
++ return nfsd_file_get(f->fi_fds[oflag]);
+ }
+
+ static struct nfsd_file *
+--
+2.43.0
+
--- /dev/null
+From 81a1eaf68bbfaaf8885c02b7417d462c80421b32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 06:53:54 -0500
+Subject: nfsd: allow reaping files still under writeback
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit dcb779fcd4ed5984ad15991d574943d12a8693d1 ]
+
+On most filesystems, there is no reason to delay reaping an nfsd_file
+just because its underlying inode is still under writeback. nfsd just
+relies on client activity or the local flusher threads to do writeback.
+
+The main exception is NFS, which flushes all of its dirty data on last
+close. Add a new EXPORT_OP_FLUSH_ON_CLOSE flag to allow filesystems to
+signal that they do this, and only skip closing files under writeback on
+such filesystems.
+
+Also, remove a redundant NULL file pointer check in
+nfsd_file_check_writeback, and clean up nfs's export op flag
+definitions.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Acked-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+[ cel: adjusted to apply to v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/export.c | 9 ++++++---
+ fs/nfsd/filecache.c | 12 +++++++++++-
+ include/linux/exportfs.h | 1 +
+ 3 files changed, 18 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfs/export.c b/fs/nfs/export.c
+index 37a1a88df7717..eafa9d7b0911b 100644
+--- a/fs/nfs/export.c
++++ b/fs/nfs/export.c
+@@ -178,7 +178,10 @@ const struct export_operations nfs_export_ops = {
+ .fh_to_dentry = nfs_fh_to_dentry,
+ .get_parent = nfs_get_parent,
+ .fetch_iversion = nfs_fetch_iversion,
+- .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
+- EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
+- EXPORT_OP_NOATOMIC_ATTR,
++ .flags = EXPORT_OP_NOWCC |
++ EXPORT_OP_NOSUBTREECHK |
++ EXPORT_OP_CLOSE_BEFORE_UNLINK |
++ EXPORT_OP_REMOTE_FS |
++ EXPORT_OP_NOATOMIC_ATTR |
++ EXPORT_OP_FLUSH_ON_CLOSE,
+ };
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 1d4c0387c4192..080d796547854 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -401,13 +401,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
+ struct file *file = nf->nf_file;
+ struct address_space *mapping;
+
+- if (!file || !(file->f_mode & FMODE_WRITE))
++ /* File not open for write? */
++ if (!(file->f_mode & FMODE_WRITE))
+ return false;
++
++ /*
++ * Some filesystems (e.g. NFS) flush all dirty data on close.
++ * On others, there is no need to wait for writeback.
++ */
++ if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
++ return false;
++
+ mapping = file->f_mapping;
+ return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+ }
+
++
+ static bool nfsd_file_lru_add(struct nfsd_file *nf)
+ {
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
+index fe848901fcc3a..218fc5c54e901 100644
+--- a/include/linux/exportfs.h
++++ b/include/linux/exportfs.h
+@@ -221,6 +221,7 @@ struct export_operations {
+ #define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
+ atomic attribute updates
+ */
++#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */
+ unsigned long flags;
+ };
+
+--
+2.43.0
+
--- /dev/null
+From 18ba8699a4e6d74d6b40e563f521a42764113292 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: always drop directory lock in nfsd_unlink()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit b677c0c63a135a916493c064906582e9f3ed4802 ]
+
+Some error paths in nfsd_unlink() allow it to exit without unlocking the
+directory. This is not a problem in practice as the directory will be
+locked with an fh_put(), but it is untidy and potentially confusing.
+
+This allows us to remove all the fh_unlock() calls that are immediately
+after nfsd_unlink() calls.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 2 --
+ fs/nfsd/nfs4proc.c | 4 +---
+ fs/nfsd/vfs.c | 7 +++++--
+ 3 files changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index c7c2c7db30f54..fbdc109fbd067 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -490,7 +490,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp)
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR,
+ argp->name, argp->len);
+- fh_unlock(&resp->fh);
+ return rpc_success;
+ }
+
+@@ -511,7 +510,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp)
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR,
+ argp->name, argp->len);
+- fh_unlock(&resp->fh);
+ return rpc_success;
+ }
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index fb2487ceac46e..26cd2479e30cf 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1002,10 +1002,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ return nfserr_grace;
+ status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
+ remove->rm_name, remove->rm_namelen);
+- if (!status) {
+- fh_unlock(&cstate->current_fh);
++ if (!status)
+ set_change_info(&remove->rm_cinfo, &cstate->current_fh);
+- }
+ return status;
+ }
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 90bd6968fbf68..4b1304fe718fd 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1754,12 +1754,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ rdentry = lookup_one_len(fname, dentry, flen);
+ host_err = PTR_ERR(rdentry);
+ if (IS_ERR(rdentry))
+- goto out_drop_write;
++ goto out_unlock;
+
+ if (d_really_is_negative(rdentry)) {
+ dput(rdentry);
+ host_err = -ENOENT;
+- goto out_drop_write;
++ goto out_unlock;
+ }
+ rinode = d_inode(rdentry);
+ ihold(rinode);
+@@ -1797,6 +1797,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ }
+ out:
+ return err;
++out_unlock:
++ fh_unlock(fhp);
++ goto out_drop_write;
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 804011b4cf296e66c81988480e357b3f0276e94c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Mar 2022 10:16:42 -0400
+Subject: NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 14ee45b70dd0d9ae76fb066cd8c0652d657353f6 ]
+
+Clean up: The "out" label already invokes fh_drop_write().
+
+Note that fh_drop_write() is already careful not to invoke
+mnt_drop_write() if either it has already been done or there is
+nothing to drop. Therefore no change in behavior is expected.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 541f39ab450ce..a46ab32216dee 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1487,7 +1487,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ case NFS3_CREATE_GUARDED:
+ err = nfserr_exist;
+ }
+- fh_drop_write(fhp);
+ goto out;
+ }
+
+@@ -1495,10 +1494,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ iap->ia_mode &= ~current_umask();
+
+ host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
+- if (host_err < 0) {
+- fh_drop_write(fhp);
++ if (host_err < 0)
+ goto out_nfserr;
+- }
+ if (created)
+ *created = true;
+
+--
+2.43.0
+
--- /dev/null
+From 5224777bfff5533bb0d30a9bc41eba2b1051e432 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Dec 2022 12:48:59 -0800
+Subject: NFSD: Avoid clashing function prototypes
+
+From: Kees Cook <keescook@chromium.org>
+
+[ Upstream commit e78e274eb22d966258a3845acc71d3c5b8ee2ea8 ]
+
+When built with Control Flow Integrity, function prototypes between
+caller and function declaration must match. These mismatches are visible
+at compile time with the new -Wcast-function-type-strict in Clang[1].
+
+There were 97 warnings produced by NFS. For example:
+
+fs/nfsd/nfs4xdr.c:2228:17: warning: cast from '__be32 (*)(struct nfsd4_compoundargs *, struct nfsd4_access *)' (aka 'unsigned int (*)(struct nfsd4_compoundargs *, struct nfsd4_access *)') to 'nfsd4_dec' (aka 'unsigned int (*)(struct nfsd4_compoundargs *, void *)') converts to incompatible function type [-Wcast-function-type-strict]
+ [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
+ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The enc/dec callbacks were defined as passing "void *" as the second
+argument, but were being implicitly cast to a new type. Replace the
+argument with union nfsd4_op_u, and perform explicit member selection
+in the function body. There are no resulting binary differences.
+
+Changes were made mechanically using the following Coccinelle script,
+with minor by-hand fixes for members that didn't already match their
+existing argument name:
+
+@find@
+identifier func;
+type T, opsT;
+identifier ops, N;
+@@
+
+ opsT ops[] = {
+ [N] = (T) func,
+ };
+
+@already_void@
+identifier find.func;
+identifier name;
+@@
+
+ func(...,
+-void
++union nfsd4_op_u
+ *name)
+ {
+ ...
+ }
+
+@proto depends on !already_void@
+identifier find.func;
+type T;
+identifier name;
+position p;
+@@
+
+ func@p(...,
+ T name
+ ) {
+ ...
+ }
+
+@script:python get_member@
+type_name << proto.T;
+member;
+@@
+
+coccinelle.member = cocci.make_ident(type_name.split("_", 1)[1].split(' ',1)[0])
+
+@convert@
+identifier find.func;
+type proto.T;
+identifier proto.name;
+position proto.p;
+identifier get_member.member;
+@@
+
+ func@p(...,
+- T name
++ union nfsd4_op_u *u
+ ) {
++ T name = &u->member;
+ ...
+ }
+
+@cast@
+identifier find.func;
+type T, opsT;
+identifier ops, N;
+@@
+
+ opsT ops[] = {
+ [N] =
+- (T)
+ func,
+ };
+
+Cc: Chuck Lever <chuck.lever@oracle.com>
+Cc: Jeff Layton <jlayton@kernel.org>
+Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
+Cc: linux-nfs@vger.kernel.org
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 632 +++++++++++++++++++++++++++-------------------
+ 1 file changed, 377 insertions(+), 255 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index c2457a9ac00aa..30e085f1e4797 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -770,16 +770,18 @@ nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+
+ static __be32
+ nfsd4_decode_access(struct nfsd4_compoundargs *argp,
+- struct nfsd4_access *access)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_access *access = &u->access;
+ if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
+ }
+
+ static __be32
+-nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
++nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_close *close = &u->close;
+ if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_stateid4(argp, &close->cl_stateid);
+@@ -787,8 +789,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+
+
+ static __be32
+-nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
++nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_commit *commit = &u->commit;
+ if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
+@@ -798,8 +801,9 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
+ }
+
+ static __be32
+-nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
++nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_create *create = &u->create;
+ __be32 *p, status;
+
+ memset(create, 0, sizeof(*create));
+@@ -844,22 +848,25 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
+ }
+
+ static inline __be32
+-nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
++nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_delegreturn *dr = &u->delegreturn;
+ return nfsd4_decode_stateid4(argp, &dr->dr_stateid);
+ }
+
+ static inline __be32
+-nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
++nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_getattr *getattr = &u->getattr;
+ memset(getattr, 0, sizeof(*getattr));
+ return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
+ ARRAY_SIZE(getattr->ga_bmval));
+ }
+
+ static __be32
+-nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
++nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_link *link = &u->link;
+ memset(link, 0, sizeof(*link));
+ return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
+ }
+@@ -907,8 +914,9 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+ }
+
+ static __be32
+-nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
++nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_lock *lock = &u->lock;
+ memset(lock, 0, sizeof(*lock));
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
+ return nfserr_bad_xdr;
+@@ -924,8 +932,9 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+ }
+
+ static __be32
+-nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
++nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_lockt *lockt = &u->lockt;
+ memset(lockt, 0, sizeof(*lockt));
+ if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
+ return nfserr_bad_xdr;
+@@ -940,8 +949,9 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+ }
+
+ static __be32
+-nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
++nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_locku *locku = &u->locku;
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0)
+@@ -962,8 +972,9 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
+ }
+
+ static __be32
+-nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
++nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_lookup *lookup = &u->lookup;
+ return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len);
+ }
+
+@@ -1143,8 +1154,9 @@ nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp,
+ }
+
+ static __be32
+-nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
++nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_open *open = &u->open;
+ __be32 status;
+ u32 dummy;
+
+@@ -1171,8 +1183,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+ }
+
+ static __be32
+-nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
++nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_confirm *open_conf = &u->open_confirm;
+ __be32 status;
+
+ if (argp->minorversion >= 1)
+@@ -1190,8 +1204,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
+ }
+
+ static __be32
+-nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
++nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_downgrade *open_down = &u->open_downgrade;
+ __be32 status;
+
+ memset(open_down, 0, sizeof(*open_down));
+@@ -1209,8 +1225,9 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
+ }
+
+ static __be32
+-nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
++nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_putfh *putfh = &u->putfh;
+ __be32 *p;
+
+ if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0)
+@@ -1229,7 +1246,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+ }
+
+ static __be32
+-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
++nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+ {
+ if (argp->minorversion == 0)
+ return nfs_ok;
+@@ -1237,8 +1254,9 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
+ }
+
+ static __be32
+-nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
++nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_read *read = &u->read;
+ __be32 status;
+
+ memset(read, 0, sizeof(*read));
+@@ -1254,8 +1272,9 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+ }
+
+ static __be32
+-nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
++nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_readdir *readdir = &u->readdir;
+ __be32 status;
+
+ memset(readdir, 0, sizeof(*readdir));
+@@ -1276,15 +1295,17 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
+ }
+
+ static __be32
+-nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
++nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_remove *remove = &u->remove;
+ memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo));
+ return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
+ }
+
+ static __be32
+-nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
++nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_rename *rename = &u->rename;
+ __be32 status;
+
+ memset(rename, 0, sizeof(*rename));
+@@ -1295,22 +1316,25 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
+ }
+
+ static __be32
+-nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
++nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ clientid_t *clientid = &u->renew;
+ return nfsd4_decode_clientid4(argp, clientid);
+ }
+
+ static __be32
+ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
+- struct nfsd4_secinfo *secinfo)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo *secinfo = &u->secinfo;
+ secinfo->si_exp = NULL;
+ return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
+ }
+
+ static __be32
+-nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
++nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_setattr *setattr = &u->setattr;
+ __be32 status;
+
+ memset(setattr, 0, sizeof(*setattr));
+@@ -1324,8 +1348,9 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
+ }
+
+ static __be32
+-nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
++nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_setclientid *setclientid = &u->setclientid;
+ __be32 *p, status;
+
+ memset(setclientid, 0, sizeof(*setclientid));
+@@ -1367,8 +1392,10 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
+ }
+
+ static __be32
+-nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
++nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm;
+ __be32 status;
+
+ if (argp->minorversion >= 1)
+@@ -1382,8 +1409,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
+
+ /* Also used for NVERIFY */
+ static __be32
+-nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
++nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_verify *verify = &u->verify;
+ __be32 *p, status;
+
+ memset(verify, 0, sizeof(*verify));
+@@ -1409,8 +1437,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
+ }
+
+ static __be32
+-nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
++nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_write *write = &u->write;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &write->wr_stateid);
+@@ -1434,8 +1463,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+ }
+
+ static __be32
+-nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
++nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
+ __be32 status;
+
+ if (argp->minorversion >= 1)
+@@ -1452,16 +1483,20 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
+ return nfs_ok;
+ }
+
+-static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
++static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
+ memset(bc, 0, sizeof(*bc));
+ if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+ }
+
+-static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
++static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
+ u32 use_conn_in_rdma_mode;
+ __be32 status;
+
+@@ -1603,8 +1638,9 @@ nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
+- struct nfsd4_exchange_id *exid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_exchange_id *exid = &u->exchange_id;
+ __be32 status;
+
+ memset(exid, 0, sizeof(*exid));
+@@ -1656,8 +1692,9 @@ nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+- struct nfsd4_create_session *sess)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_create_session *sess = &u->create_session;
+ __be32 status;
+
+ memset(sess, 0, sizeof(*sess));
+@@ -1681,23 +1718,26 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
+- struct nfsd4_destroy_session *destroy_session)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_destroy_session *destroy_session = &u->destroy_session;
+ return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid);
+ }
+
+ static __be32
+ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
+- struct nfsd4_free_stateid *free_stateid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
+ return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
+ }
+
+ #ifdef CONFIG_NFSD_PNFS
+ static __be32
+ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+- struct nfsd4_getdeviceinfo *gdev)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
+ __be32 status;
+
+ memset(gdev, 0, sizeof(*gdev));
+@@ -1717,8 +1757,9 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+- struct nfsd4_layoutcommit *lcp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
+ __be32 *p, status;
+
+ memset(lcp, 0, sizeof(*lcp));
+@@ -1753,8 +1794,9 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+- struct nfsd4_layoutget *lgp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutget *lgp = &u->layoutget;
+ __be32 status;
+
+ memset(lgp, 0, sizeof(*lgp));
+@@ -1781,8 +1823,9 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+- struct nfsd4_layoutreturn *lrp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
+ memset(lrp, 0, sizeof(*lrp));
+ if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
+ return nfserr_bad_xdr;
+@@ -1795,8 +1838,9 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+ #endif /* CONFIG_NFSD_PNFS */
+
+ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+- struct nfsd4_secinfo_no_name *sin)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name;
+ if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
+ return nfserr_bad_xdr;
+
+@@ -1806,8 +1850,9 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+- struct nfsd4_sequence *seq)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_sequence *seq = &u->sequence;
+ __be32 *p, status;
+
+ status = nfsd4_decode_sessionid4(argp, &seq->sessionid);
+@@ -1826,8 +1871,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+ }
+
+ static __be32
+-nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
++nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
+ struct nfsd4_test_stateid_id *stateid;
+ __be32 status;
+ u32 i;
+@@ -1852,14 +1899,16 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
+ }
+
+ static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp,
+- struct nfsd4_destroy_clientid *dc)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
+ return nfsd4_decode_clientid4(argp, &dc->clientid);
+ }
+
+ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
+- struct nfsd4_reclaim_complete *rc)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
+ if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
+@@ -1867,8 +1916,9 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
+- struct nfsd4_fallocate *fallocate)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_fallocate *fallocate = &u->allocate;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid);
+@@ -1924,8 +1974,9 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
+ }
+
+ static __be32
+-nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
++nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy *copy = &u->copy;
+ u32 consecutive, i, count, sync;
+ struct nl4_server *ns_dummy;
+ __be32 status;
+@@ -1982,8 +2033,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+
+ static __be32
+ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+- struct nfsd4_copy_notify *cn)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ __be32 status;
+
+ memset(cn, 0, sizeof(*cn));
+@@ -2002,16 +2054,18 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
+- struct nfsd4_offload_status *os)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_offload_status *os = &u->offload_status;
+ os->count = 0;
+ os->status = 0;
+ return nfsd4_decode_stateid4(argp, &os->stateid);
+ }
+
+ static __be32
+-nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
++nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_seek *seek = &u->seek;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &seek->seek_stateid);
+@@ -2028,8 +2082,9 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+ }
+
+ static __be32
+-nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
++nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+ {
++ struct nfsd4_clone *clone = &u->clone;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid);
+@@ -2154,8 +2209,9 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
+ */
+ static __be32
+ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
+- struct nfsd4_getxattr *getxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getxattr *getxattr = &u->getxattr;
+ __be32 status;
+ u32 maxcount;
+
+@@ -2173,8 +2229,9 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+- struct nfsd4_setxattr *setxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setxattr *setxattr = &u->setxattr;
+ u32 flags, maxcount, size;
+ __be32 status;
+
+@@ -2214,8 +2271,9 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
+- struct nfsd4_listxattrs *listxattrs)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
+ u32 maxcount;
+
+ memset(listxattrs, 0, sizeof(*listxattrs));
+@@ -2245,113 +2303,114 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
+
+ static __be32
+ nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp,
+- struct nfsd4_removexattr *removexattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_removexattr *removexattr = &u->removexattr;
+ memset(removexattr, 0, sizeof(*removexattr));
+ return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name);
+ }
+
+ static __be32
+-nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
++nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+ {
+ return nfs_ok;
+ }
+
+ static __be32
+-nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
++nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
+ {
+ return nfserr_notsupp;
+ }
+
+-typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
++typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u);
+
+ static const nfsd4_dec nfsd4_dec_ops[] = {
+- [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
+- [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
+- [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
+- [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
+- [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
+- [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
+- [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
+- [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
+- [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
+- [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
+- [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
+- [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+- [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
+- [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
+- [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
+- [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
+- [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh,
+- [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
+- [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
+- [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
+- [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
+- [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
+- [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
+- [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
+- [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
+- [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
+- [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
+- [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
+- [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
+- [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
++ [OP_ACCESS] = nfsd4_decode_access,
++ [OP_CLOSE] = nfsd4_decode_close,
++ [OP_COMMIT] = nfsd4_decode_commit,
++ [OP_CREATE] = nfsd4_decode_create,
++ [OP_DELEGPURGE] = nfsd4_decode_notsupp,
++ [OP_DELEGRETURN] = nfsd4_decode_delegreturn,
++ [OP_GETATTR] = nfsd4_decode_getattr,
++ [OP_GETFH] = nfsd4_decode_noop,
++ [OP_LINK] = nfsd4_decode_link,
++ [OP_LOCK] = nfsd4_decode_lock,
++ [OP_LOCKT] = nfsd4_decode_lockt,
++ [OP_LOCKU] = nfsd4_decode_locku,
++ [OP_LOOKUP] = nfsd4_decode_lookup,
++ [OP_LOOKUPP] = nfsd4_decode_noop,
++ [OP_NVERIFY] = nfsd4_decode_verify,
++ [OP_OPEN] = nfsd4_decode_open,
++ [OP_OPENATTR] = nfsd4_decode_notsupp,
++ [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm,
++ [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade,
++ [OP_PUTFH] = nfsd4_decode_putfh,
++ [OP_PUTPUBFH] = nfsd4_decode_putpubfh,
++ [OP_PUTROOTFH] = nfsd4_decode_noop,
++ [OP_READ] = nfsd4_decode_read,
++ [OP_READDIR] = nfsd4_decode_readdir,
++ [OP_READLINK] = nfsd4_decode_noop,
++ [OP_REMOVE] = nfsd4_decode_remove,
++ [OP_RENAME] = nfsd4_decode_rename,
++ [OP_RENEW] = nfsd4_decode_renew,
++ [OP_RESTOREFH] = nfsd4_decode_noop,
++ [OP_SAVEFH] = nfsd4_decode_noop,
++ [OP_SECINFO] = nfsd4_decode_secinfo,
++ [OP_SETATTR] = nfsd4_decode_setattr,
++ [OP_SETCLIENTID] = nfsd4_decode_setclientid,
++ [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm,
++ [OP_VERIFY] = nfsd4_decode_verify,
++ [OP_WRITE] = nfsd4_decode_write,
++ [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner,
+
+ /* new operations for NFSv4.1 */
+- [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl,
+- [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
+- [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id,
+- [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session,
+- [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
+- [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
+- [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
++ [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl,
++ [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session,
++ [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id,
++ [OP_CREATE_SESSION] = nfsd4_decode_create_session,
++ [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session,
++ [OP_FREE_STATEID] = nfsd4_decode_free_stateid,
++ [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp,
+ #ifdef CONFIG_NFSD_PNFS
+- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
+- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
+- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
+- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
++ [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo,
++ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
++ [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit,
++ [OP_LAYOUTGET] = nfsd4_decode_layoutget,
++ [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn,
+ #else
+- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
++ [OP_GETDEVICEINFO] = nfsd4_decode_notsupp,
++ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
++ [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp,
++ [OP_LAYOUTGET] = nfsd4_decode_notsupp,
++ [OP_LAYOUTRETURN] = nfsd4_decode_notsupp,
+ #endif
+- [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
+- [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
+- [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid,
+- [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
+- [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
++ [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name,
++ [OP_SEQUENCE] = nfsd4_decode_sequence,
++ [OP_SET_SSV] = nfsd4_decode_notsupp,
++ [OP_TEST_STATEID] = nfsd4_decode_test_stateid,
++ [OP_WANT_DELEGATION] = nfsd4_decode_notsupp,
++ [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid,
++ [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete,
+
+ /* new operations for NFSv4.2 */
+- [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
+- [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
+- [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
+- [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
+- [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
+- [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
+- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
+- [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
+- [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
+- [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
++ [OP_ALLOCATE] = nfsd4_decode_fallocate,
++ [OP_COPY] = nfsd4_decode_copy,
++ [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify,
++ [OP_DEALLOCATE] = nfsd4_decode_fallocate,
++ [OP_IO_ADVISE] = nfsd4_decode_notsupp,
++ [OP_LAYOUTERROR] = nfsd4_decode_notsupp,
++ [OP_LAYOUTSTATS] = nfsd4_decode_notsupp,
++ [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status,
++ [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status,
++ [OP_READ_PLUS] = nfsd4_decode_read,
++ [OP_SEEK] = nfsd4_decode_seek,
++ [OP_WRITE_SAME] = nfsd4_decode_notsupp,
++ [OP_CLONE] = nfsd4_decode_clone,
+ /* RFC 8276 extended atributes operations */
+- [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr,
+- [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr,
+- [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs,
+- [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr,
++ [OP_GETXATTR] = nfsd4_decode_getxattr,
++ [OP_SETXATTR] = nfsd4_decode_setxattr,
++ [OP_LISTXATTRS] = nfsd4_decode_listxattrs,
++ [OP_REMOVEXATTR] = nfsd4_decode_removexattr,
+ };
+
+ static inline bool
+@@ -3644,8 +3703,10 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
+ }
+
+ static __be32
+-nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
++nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_access *access = &u->access;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3657,8 +3718,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ return 0;
+ }
+
+-static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
++static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3674,8 +3737,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
+ }
+
+ static __be32
+-nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
++nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_close *close = &u->close;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &close->cl_stateid);
+@@ -3683,8 +3748,10 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
+
+
+ static __be32
+-nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
++nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_commit *commit = &u->commit;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3697,8 +3764,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ }
+
+ static __be32
+-nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
++nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_create *create = &u->create;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3711,8 +3780,10 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ }
+
+ static __be32
+-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
++nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getattr *getattr = &u->getattr;
+ struct svc_fh *fhp = getattr->ga_fhp;
+ struct xdr_stream *xdr = resp->xdr;
+
+@@ -3721,8 +3792,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
+ }
+
+ static __be32
+-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
++nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct svc_fh **fhpp = &u->getfh;
+ struct xdr_stream *xdr = resp->xdr;
+ struct svc_fh *fhp = *fhpp;
+ unsigned int len;
+@@ -3776,8 +3849,10 @@ nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld)
+ }
+
+ static __be32
+-nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
++nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_lock *lock = &u->lock;
+ struct xdr_stream *xdr = resp->xdr;
+
+ if (!nfserr)
+@@ -3789,8 +3864,10 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
+ }
+
+ static __be32
+-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
++nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_lockt *lockt = &u->lockt;
+ struct xdr_stream *xdr = resp->xdr;
+
+ if (nfserr == nfserr_denied)
+@@ -3799,8 +3876,10 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
+ }
+
+ static __be32
+-nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
++nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_locku *locku = &u->locku;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &locku->lu_stateid);
+@@ -3808,8 +3887,10 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
+
+
+ static __be32
+-nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
++nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_link *link = &u->link;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3822,8 +3903,10 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
+
+
+ static __be32
+-nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
++nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open *open = &u->open;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -3916,16 +3999,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
+ }
+
+ static __be32
+-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
++nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_confirm *oc = &u->open_confirm;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
+ }
+
+ static __be32
+-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
++nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_open_downgrade *od = &u->open_downgrade;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_stateid(xdr, &od->od_stateid);
+@@ -4024,8 +4111,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+
+ static __be32
+ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_read *read)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_read *read = &u->read;
+ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
+ unsigned long maxcount;
+ struct xdr_stream *xdr = resp->xdr;
+@@ -4066,8 +4154,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ }
+
+ static __be32
+-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
++nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_readlink *readlink = &u->readlink;
+ __be32 *p, *maxcount_p, zero = xdr_zero;
+ struct xdr_stream *xdr = resp->xdr;
+ int length_offset = xdr->buf->len;
+@@ -4111,8 +4201,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
+ }
+
+ static __be32
+-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
++nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_readdir *readdir = &u->readdir;
+ int maxcount;
+ int bytes_left;
+ loff_t offset;
+@@ -4202,8 +4294,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
+ }
+
+ static __be32
+-nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
++nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_remove *remove = &u->remove;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4215,8 +4309,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
+ }
+
+ static __be32
+-nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
++nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_rename *rename = &u->rename;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4298,8 +4394,9 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
+
+ static __be32
+ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_secinfo *secinfo)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo *secinfo = &u->secinfo;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
+@@ -4307,8 +4404,9 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_secinfo_no_name *secinfo)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
+@@ -4319,8 +4417,10 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
+ * regardless of the error status.
+ */
+ static __be32
+-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
++nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setattr *setattr = &u->setattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4343,8 +4443,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
+ }
+
+ static __be32
+-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
++nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setclientid *scd = &u->setclientid;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4367,8 +4469,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
+ }
+
+ static __be32
+-nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
++nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_write *write = &u->write;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4384,8 +4488,9 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
+
+ static __be32
+ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_exchange_id *exid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_exchange_id *exid = &u->exchange_id;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+ char *major_id;
+@@ -4462,8 +4567,9 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_create_session *sess)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_create_session *sess = &u->create_session;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4515,8 +4621,9 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_sequence *seq)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_sequence *seq = &u->sequence;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4538,8 +4645,9 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_test_stateid *test_stateid)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
+ struct xdr_stream *xdr = resp->xdr;
+ struct nfsd4_test_stateid_id *stateid, *next;
+ __be32 *p;
+@@ -4559,8 +4667,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
+ #ifdef CONFIG_NFSD_PNFS
+ static __be32
+ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_getdeviceinfo *gdev)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
+ struct xdr_stream *xdr = resp->xdr;
+ const struct nfsd4_layout_ops *ops;
+ u32 starting_len = xdr->buf->len, needed_len;
+@@ -4612,8 +4721,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_layoutget *lgp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutget *lgp = &u->layoutget;
+ struct xdr_stream *xdr = resp->xdr;
+ const struct nfsd4_layout_ops *ops;
+ __be32 *p;
+@@ -4639,8 +4749,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_layoutcommit *lcp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4660,8 +4771,9 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_layoutreturn *lrp)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4746,8 +4858,9 @@ nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
+
+ static __be32
+ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_copy *copy)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy *copy = &u->copy;
+ __be32 *p;
+
+ nfserr = nfsd42_encode_write_res(resp, ©->cp_res,
+@@ -4763,8 +4876,9 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_offload_status *os)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_offload_status *os = &u->offload_status;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4814,8 +4928,9 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
+
+ static __be32
+ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_read *read)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_read *read = &u->read;
+ struct file *file = read->rd_nf->nf_file;
+ struct xdr_stream *xdr = resp->xdr;
+ int starting_len = xdr->buf->len;
+@@ -4851,8 +4966,9 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_copy_notify *cn)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -4886,8 +5002,9 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_seek *seek)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_seek *seek = &u->seek;
+ __be32 *p;
+
+ p = xdr_reserve_space(resp->xdr, 4 + 8);
+@@ -4898,7 +5015,8 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
+ }
+
+ static __be32
+-nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
++nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr,
++ union nfsd4_op_u *p)
+ {
+ return nfserr;
+ }
+@@ -4949,8 +5067,9 @@ nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen)
+
+ static __be32
+ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_getxattr *getxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_getxattr *getxattr = &u->getxattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p, err;
+
+@@ -4973,8 +5092,9 @@ nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_setxattr *setxattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_setxattr *setxattr = &u->setxattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -5014,8 +5134,9 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
+
+ static __be32
+ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_listxattrs *listxattrs)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
+ struct xdr_stream *xdr = resp->xdr;
+ u32 cookie_offset, count_offset, eof;
+ u32 left, xdrleft, slen, count;
+@@ -5125,8 +5246,9 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+- struct nfsd4_removexattr *removexattr)
++ union nfsd4_op_u *u)
+ {
++ struct nfsd4_removexattr *removexattr = &u->removexattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p;
+
+@@ -5138,7 +5260,7 @@ nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ return 0;
+ }
+
+-typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
++typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
+
+ /*
+ * Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
+@@ -5146,93 +5268,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
+ * done in the decoding phase.
+ */
+ static const nfsd4_enc nfsd4_enc_ops[] = {
+- [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
+- [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
+- [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
+- [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
+- [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
+- [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
+- [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
+- [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
+- [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
+- [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
+- [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
+- [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
+- [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
+- [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
+- [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
+- [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
+- [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
+- [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
+- [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
+- [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
+- [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
+- [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
+- [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_ACCESS] = nfsd4_encode_access,
++ [OP_CLOSE] = nfsd4_encode_close,
++ [OP_COMMIT] = nfsd4_encode_commit,
++ [OP_CREATE] = nfsd4_encode_create,
++ [OP_DELEGPURGE] = nfsd4_encode_noop,
++ [OP_DELEGRETURN] = nfsd4_encode_noop,
++ [OP_GETATTR] = nfsd4_encode_getattr,
++ [OP_GETFH] = nfsd4_encode_getfh,
++ [OP_LINK] = nfsd4_encode_link,
++ [OP_LOCK] = nfsd4_encode_lock,
++ [OP_LOCKT] = nfsd4_encode_lockt,
++ [OP_LOCKU] = nfsd4_encode_locku,
++ [OP_LOOKUP] = nfsd4_encode_noop,
++ [OP_LOOKUPP] = nfsd4_encode_noop,
++ [OP_NVERIFY] = nfsd4_encode_noop,
++ [OP_OPEN] = nfsd4_encode_open,
++ [OP_OPENATTR] = nfsd4_encode_noop,
++ [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm,
++ [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade,
++ [OP_PUTFH] = nfsd4_encode_noop,
++ [OP_PUTPUBFH] = nfsd4_encode_noop,
++ [OP_PUTROOTFH] = nfsd4_encode_noop,
++ [OP_READ] = nfsd4_encode_read,
++ [OP_READDIR] = nfsd4_encode_readdir,
++ [OP_READLINK] = nfsd4_encode_readlink,
++ [OP_REMOVE] = nfsd4_encode_remove,
++ [OP_RENAME] = nfsd4_encode_rename,
++ [OP_RENEW] = nfsd4_encode_noop,
++ [OP_RESTOREFH] = nfsd4_encode_noop,
++ [OP_SAVEFH] = nfsd4_encode_noop,
++ [OP_SECINFO] = nfsd4_encode_secinfo,
++ [OP_SETATTR] = nfsd4_encode_setattr,
++ [OP_SETCLIENTID] = nfsd4_encode_setclientid,
++ [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop,
++ [OP_VERIFY] = nfsd4_encode_noop,
++ [OP_WRITE] = nfsd4_encode_write,
++ [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop,
+
+ /* NFSv4.1 operations */
+- [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
+- [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
+- [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
+- [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop,
++ [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session,
++ [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id,
++ [OP_CREATE_SESSION] = nfsd4_encode_create_session,
++ [OP_DESTROY_SESSION] = nfsd4_encode_noop,
++ [OP_FREE_STATEID] = nfsd4_encode_noop,
++ [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop,
+ #ifdef CONFIG_NFSD_PNFS
+- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
+- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
+- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
+- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
++ [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo,
++ [OP_GETDEVICELIST] = nfsd4_encode_noop,
++ [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit,
++ [OP_LAYOUTGET] = nfsd4_encode_layoutget,
++ [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn,
+ #else
+- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_GETDEVICEINFO] = nfsd4_encode_noop,
++ [OP_GETDEVICELIST] = nfsd4_encode_noop,
++ [OP_LAYOUTCOMMIT] = nfsd4_encode_noop,
++ [OP_LAYOUTGET] = nfsd4_encode_noop,
++ [OP_LAYOUTRETURN] = nfsd4_encode_noop,
+ #endif
+- [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
+- [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
+- [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid,
+- [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name,
++ [OP_SEQUENCE] = nfsd4_encode_sequence,
++ [OP_SET_SSV] = nfsd4_encode_noop,
++ [OP_TEST_STATEID] = nfsd4_encode_test_stateid,
++ [OP_WANT_DELEGATION] = nfsd4_encode_noop,
++ [OP_DESTROY_CLIENTID] = nfsd4_encode_noop,
++ [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop,
+
+ /* NFSv4.2 operations */
+- [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
+- [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
+- [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
+- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
+- [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
+- [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
+- [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
++ [OP_ALLOCATE] = nfsd4_encode_noop,
++ [OP_COPY] = nfsd4_encode_copy,
++ [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify,
++ [OP_DEALLOCATE] = nfsd4_encode_noop,
++ [OP_IO_ADVISE] = nfsd4_encode_noop,
++ [OP_LAYOUTERROR] = nfsd4_encode_noop,
++ [OP_LAYOUTSTATS] = nfsd4_encode_noop,
++ [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop,
++ [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status,
++ [OP_READ_PLUS] = nfsd4_encode_read_plus,
++ [OP_SEEK] = nfsd4_encode_seek,
++ [OP_WRITE_SAME] = nfsd4_encode_noop,
++ [OP_CLONE] = nfsd4_encode_noop,
+
+ /* RFC 8276 extended atributes operations */
+- [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr,
+- [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr,
+- [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs,
+- [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr,
++ [OP_GETXATTR] = nfsd4_encode_getxattr,
++ [OP_SETXATTR] = nfsd4_encode_setxattr,
++ [OP_LISTXATTRS] = nfsd4_encode_listxattrs,
++ [OP_REMOVEXATTR] = nfsd4_encode_removexattr,
+ };
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 9cb65b9b20a77135169bec23be21dd66d52eeae5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 07:27:11 +0200
+Subject: nfsd: Avoid some useless tests
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit d44899b8bb0b919f923186c616a84f0e70e04772 ]
+
+memdup_user() can't return NULL, so there is no point for checking for it.
+
+Simplify some tests accordingly.
+
+Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4recover.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
+index 8f24485e0f04f..4edfc95806412 100644
+--- a/fs/nfsd/nfs4recover.c
++++ b/fs/nfsd/nfs4recover.c
+@@ -807,7 +807,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ if (get_user(namelen, &ci->cc_name.cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+- if (IS_ERR_OR_NULL(name.data))
++ if (IS_ERR(name.data))
+ return -EFAULT;
+ name.len = namelen;
+ get_user(princhashlen, &ci->cc_princhash.cp_len);
+@@ -815,7 +815,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ princhash.data = memdup_user(
+ &ci->cc_princhash.cp_data,
+ princhashlen);
+- if (IS_ERR_OR_NULL(princhash.data)) {
++ if (IS_ERR(princhash.data)) {
+ kfree(name.data);
+ return -EFAULT;
+ }
+@@ -829,7 +829,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ if (get_user(namelen, &cnm->cn_len))
+ return -EFAULT;
+ name.data = memdup_user(&cnm->cn_id, namelen);
+- if (IS_ERR_OR_NULL(name.data))
++ if (IS_ERR(name.data))
+ return -EFAULT;
+ name.len = namelen;
+ }
+--
+2.43.0
+
--- /dev/null
+From 54411ec1964fe1631075869374c683d4d029aa74 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Dec 2023 11:56:31 +1100
+Subject: nfsd: call nfsd_last_thread() before final nfsd_put()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 2a501f55cd641eb4d3c16a2eab0d678693fac663 ]
+
+If write_ports_addfd or write_ports_addxprt fail, they call nfsd_put()
+without calling nfsd_last_thread(). This leaves nn->nfsd_serv pointing
+to a structure that has been freed.
+
+So remove 'static' from nfsd_last_thread() and call it when the
+nfsd_serv is about to be destroyed.
+
+Fixes: ec52361df99b ("SUNRPC: stop using ->sv_nrthreads as a refcount")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 9 +++++++--
+ fs/nfsd/nfsd.h | 1 +
+ fs/nfsd/nfssvc.c | 2 +-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 76a60e7a75097..eec442edb6556 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -720,8 +720,10 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
+
+ err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+
+- if (err >= 0 &&
+- !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
++ if (err < 0 && !nn->nfsd_serv->sv_nrthreads && !nn->keep_active)
++ nfsd_last_thread(net);
++ else if (err >= 0 &&
++ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+ svc_get(nn->nfsd_serv);
+
+ nfsd_put(net);
+@@ -771,6 +773,9 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ svc_xprt_put(xprt);
+ }
+ out_err:
++ if (!nn->nfsd_serv->sv_nrthreads && !nn->keep_active)
++ nfsd_last_thread(net);
++
+ nfsd_put(net);
+ return err;
+ }
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 867dcfd64d426..3796015dc7656 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -138,6 +138,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
+ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
+ void nfsd_reset_versions(struct nfsd_net *nn);
+ int nfsd_create_serv(struct net *net);
++void nfsd_last_thread(struct net *net);
+
+ extern int nfsd_max_blksize;
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 2a1dd580dfb94..3d4fd40c987bd 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -529,7 +529,7 @@ static struct notifier_block nfsd_inet6addr_notifier = {
+ /* Only used under nfsd_mutex, so this atomic may be overkill: */
+ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
+
+-static void nfsd_last_thread(struct net *net)
++void nfsd_last_thread(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv = nn->nfsd_serv;
+--
+2.43.0
+
--- /dev/null
+From f4bb8fed6729ff8c11bf06bad9bdc55d4ac62ef5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Mar 2023 06:21:37 -0400
+Subject: nfsd: call op_release, even when op_func returns an error
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 15a8b55dbb1ba154d82627547c5761cac884d810 ]
+
+For ops with "trivial" replies, nfsd4_encode_operation will shortcut
+most of the encoding work and skip to just marshalling up the status.
+One of the things it skips is calling op_release. This could cause a
+memory leak in the layoutget codepath if there is an error at an
+inopportune time.
+
+Have the compound processing engine always call op_release, even when
+op_func sets an error in op->status. With this change, we also need
+nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
+on error to avoid a double free.
+
+Reported-by: Zhi Li <yieli@redhat.com>
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
+Fixes: 34b1744c91cc ("nfsd4: define ->op_release for compound ops")
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+[ cel: adjusted to apply to v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 9c9ff3bdc62a9..30c64c3f5fa05 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5398,10 +5398,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 8);
+- if (!p) {
+- WARN_ON_ONCE(1);
+- return;
+- }
++ if (!p)
++ goto release;
+ *p++ = cpu_to_be32(op->opnum);
+ post_err_offset = xdr->buf->len;
+
+@@ -5416,8 +5414,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+ op->status = encoder(resp, op->status, &op->u);
+ if (op->status)
+ trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
+- if (opdesc && opdesc->op_release)
+- opdesc->op_release(&op->u);
+ xdr_commit_encode(xdr);
+
+ /* nfsd4_check_resp_size guarantees enough room for error status */
+@@ -5458,6 +5454,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+ }
+ status:
+ *p = op->status;
++release:
++ if (opdesc && opdesc->op_release)
++ opdesc->op_release(&op->u);
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 3b6199b99e93c0816c536758baf6905f31d63f3d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 15:29:55 -0400
+Subject: NFSD: Cap rsize_bop result based on send buffer size
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 76ce4dcec0dc08a032db916841ddc4e3998be317 ]
+
+Since before the git era, NFSD has conserved the number of pages
+held by each nfsd thread by combining the RPC receive and send
+buffers into a single array of pages. This works because there are
+no cases where an operation needs a large RPC Call message and a
+large RPC Reply at the same time.
+
+Once an RPC Call has been received, svc_process() updates
+svc_rqst::rq_res to describe the part of rq_pages that can be
+used for constructing the Reply. This means that the send buffer
+(rq_res) shrinks when the received RPC record containing the RPC
+Call is large.
+
+Add an NFSv4 helper that computes the size of the send buffer. It
+replaces svc_max_payload() in spots where svc_max_payload() returns
+a value that might be larger than the remaining send buffer space.
+Callers who need to know the transport's actual maximum payload size
+will continue to use svc_max_payload().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 48 +++++++++++++++++++++++-----------------------
+ 1 file changed, 24 insertions(+), 24 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 59f675f194ebb..2e8f8b9fa3aeb 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2765,6 +2765,22 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+
+ #define op_encode_channel_attrs_maxsz (6 + 1 + 1)
+
++/*
++ * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which
++ * is called before sunrpc sets rq_res.buflen. Thus we have to compute
++ * the maximum payload size here, based on transport limits and the size
++ * of the remaining space in the rq_pages array.
++ */
++static u32 nfsd4_max_payload(const struct svc_rqst *rqstp)
++{
++ u32 buflen;
++
++ buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE;
++ buflen -= rqstp->rq_auth_slack;
++ buflen -= rqstp->rq_res.head[0].iov_len;
++ return min_t(u32, buflen, svc_max_payload(rqstp));
++}
++
+ static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+ {
+@@ -2810,9 +2826,9 @@ static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp,
+ u32 ret = 0;
+
+ if (bmap0 & FATTR4_WORD0_ACL)
+- return svc_max_payload(rqstp);
++ return nfsd4_max_payload(rqstp);
+ if (bmap0 & FATTR4_WORD0_FS_LOCATIONS)
+- return svc_max_payload(rqstp);
++ return nfsd4_max_payload(rqstp);
+
+ if (bmap1 & FATTR4_WORD1_OWNER) {
+ ret += IDMAP_NAMESZ + 4;
+@@ -2872,10 +2888,7 @@ static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp,
+ static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+ {
+- u32 maxcount = 0, rlen = 0;
+-
+- maxcount = svc_max_payload(rqstp);
+- rlen = min(op->u.read.rd_length, maxcount);
++ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
+
+ return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+@@ -2883,8 +2896,7 @@ static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp,
+ static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+ {
+- u32 maxcount = svc_max_payload(rqstp);
+- u32 rlen = min(op->u.read.rd_length, maxcount);
++ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
+ /*
+ * If we detect that the file changed during hole encoding, then we
+ * recover by encoding the remaining reply as data. This means we need
+@@ -2898,10 +2910,7 @@ static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp,
+ static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+ {
+- u32 maxcount = 0, rlen = 0;
+-
+- maxcount = svc_max_payload(rqstp);
+- rlen = min(op->u.readdir.rd_maxcount, maxcount);
++ u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp));
+
+ return (op_encode_hdr_size + op_encode_verifier_maxsz +
+ XDR_QUADLEN(rlen)) * sizeof(__be32);
+@@ -3040,10 +3049,7 @@ static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
+ static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+ {
+- u32 maxcount = 0, rlen = 0;
+-
+- maxcount = svc_max_payload(rqstp);
+- rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
++ u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp));
+
+ return (op_encode_hdr_size +
+ 1 /* gd_layout_type*/ +
+@@ -3093,10 +3099,7 @@ static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
+ static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+ {
+- u32 maxcount, rlen;
+-
+- maxcount = svc_max_payload(rqstp);
+- rlen = min_t(u32, XATTR_SIZE_MAX, maxcount);
++ u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp));
+
+ return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+@@ -3110,10 +3113,7 @@ static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp,
+ static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+ {
+- u32 maxcount, rlen;
+-
+- maxcount = svc_max_payload(rqstp);
+- rlen = min(op->u.listxattrs.lsxa_maxcount, maxcount);
++ u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp));
+
+ return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+--
+2.43.0
+
--- /dev/null
+From 48411bc3d75ff03344c9a96d0aa0134b904a3356 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: change nfsd_create()/nfsd_symlink() to unlock directory before
+ returning.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 927bfc5600cd6333c9ef9f090f19e66b7d4c8ee1 ]
+
+nfsd_create() usually returns with the directory still locked.
+nfsd_symlink() usually returns with it unlocked. This is clumsy.
+
+Until recently nfsd_create() needed to keep the directory locked until
+ACLs and security label had been set. These are now set inside
+nfsd_create() (in nfsd_setattr()) so this need is gone.
+
+So change nfsd_create() and nfsd_symlink() to always unlock, and remove
+any fh_unlock() calls that follow calls to these functions.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 2 --
+ fs/nfsd/nfs4proc.c | 2 --
+ fs/nfsd/vfs.c | 38 +++++++++++++++++++++-----------------
+ 3 files changed, 21 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index cb91088bce2e8..c7c2c7db30f54 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -388,7 +388,6 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
+ fh_init(&resp->fh, NFS3_FHSIZE);
+ resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &attrs, S_IFDIR, 0, &resp->fh);
+- fh_unlock(&resp->dirfh);
+ return rpc_success;
+ }
+
+@@ -469,7 +468,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
+ type = nfs3_ftypes[argp->ftype];
+ resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &attrs, type, rdev, &resp->fh);
+- fh_unlock(&resp->dirfh);
+ out:
+ return rpc_success;
+ }
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index ffa2806fd5d3b..fb2487ceac46e 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -823,8 +823,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (attrs.na_aclerr)
+ create->cr_bmval[0] &= ~FATTR4_WORD0_ACL;
+-
+- fh_unlock(&cstate->current_fh);
+ set_change_info(&create->cr_cinfo, &cstate->current_fh);
+ fh_dup2(&cstate->current_fh, &resfh);
+ out:
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index e91ac3bc68764..90bd6968fbf68 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1380,8 +1380,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
+ dchild = lookup_one_len(fname, dentry, flen);
+ host_err = PTR_ERR(dchild);
+- if (IS_ERR(dchild))
+- return nfserrno(host_err);
++ if (IS_ERR(dchild)) {
++ err = nfserrno(host_err);
++ goto out_unlock;
++ }
+ err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+ /*
+ * We unconditionally drop our ref to dchild as fh_compose will have
+@@ -1389,9 +1391,12 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ dput(dchild);
+ if (err)
+- return err;
+- return nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type,
+- rdev, resfhp);
++ goto out_unlock;
++ err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type,
++ rdev, resfhp);
++out_unlock:
++ fh_unlock(fhp);
++ return err;
+ }
+
+ /*
+@@ -1468,16 +1473,19 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto out;
+
+ host_err = fh_want_write(fhp);
+- if (host_err)
+- goto out_nfserr;
++ if (host_err) {
++ err = nfserrno(host_err);
++ goto out;
++ }
+
+ fh_lock(fhp);
+ dentry = fhp->fh_dentry;
+ dnew = lookup_one_len(fname, dentry, flen);
+- host_err = PTR_ERR(dnew);
+- if (IS_ERR(dnew))
+- goto out_nfserr;
+-
++ if (IS_ERR(dnew)) {
++ err = nfserrno(PTR_ERR(dnew));
++ fh_unlock(fhp);
++ goto out_drop_write;
++ }
+ host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path);
+ err = nfserrno(host_err);
+ cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+@@ -1486,16 +1494,12 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ fh_unlock(fhp);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
+- fh_drop_write(fhp);
+-
+ dput(dnew);
+ if (err==0) err = cerr;
++out_drop_write:
++ fh_drop_write(fhp);
+ out:
+ return err;
+-
+-out_nfserr:
+- err = nfserrno(host_err);
+- goto out;
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 6d85536e4c6957abd740c626d6368f66f619137f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Feb 2022 11:26:06 -0500
+Subject: NFSD: Clean up _lm_ operation names
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 35aff0678f99b0623bb72d50112de9e163a19559 ]
+
+The common practice is to name function instances the same as the
+method names, but with a uniquifying prefix. Commit aef9583b234a
+("NFSD: Get reference of lockowner when coping file_lock") missed
+this -- the new function names should both have been of the form
+"nfsd4_lm_*".
+
+Before more lock manager operations are added in NFSD, rename these
+two functions for consistency.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index fc0d7fbe5d4a6..5f3adb59c1ffd 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -6552,7 +6552,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
+ }
+
+ static fl_owner_t
+-nfsd4_fl_get_owner(fl_owner_t owner)
++nfsd4_lm_get_owner(fl_owner_t owner)
+ {
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
+
+@@ -6561,7 +6561,7 @@ nfsd4_fl_get_owner(fl_owner_t owner)
+ }
+
+ static void
+-nfsd4_fl_put_owner(fl_owner_t owner)
++nfsd4_lm_put_owner(fl_owner_t owner)
+ {
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
+
+@@ -6596,8 +6596,8 @@ nfsd4_lm_notify(struct file_lock *fl)
+
+ static const struct lock_manager_operations nfsd_posix_mng_ops = {
+ .lm_notify = nfsd4_lm_notify,
+- .lm_get_owner = nfsd4_fl_get_owner,
+- .lm_put_owner = nfsd4_fl_put_owner,
++ .lm_get_owner = nfsd4_lm_get_owner,
++ .lm_put_owner = nfsd4_lm_put_owner,
+ };
+
+ static inline void
+--
+2.43.0
+
--- /dev/null
+From 02871b1a7fd36d758517e6687a0c3389991584d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:41 -0400
+Subject: NFSD: Clean up find_or_add_file()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 9270fc514ba7d415636b23bcb937573a1ce54f6a ]
+
+Remove the call to find_file_locked() in insert_nfs4_file(). Tracing
+shows that over 99% of these calls return NULL. Thus it is not worth
+the expense of the extra bucket list traversal. insert_file() already
+deals correctly with the case where the item is already in the hash
+bucket.
+
+Since nfsd4_file_hash_insert() is now just a wrapper around
+insert_file(), move the meat of insert_file() into
+nfsd4_file_hash_insert() and get rid of it.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 64 ++++++++++++++++++++-------------------------
+ 1 file changed, 28 insertions(+), 36 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index f723d7d5e1557..192e721525665 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4698,24 +4698,42 @@ find_file_locked(const struct svc_fh *fh, unsigned int hashval)
+ return NULL;
+ }
+
+-static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
+- unsigned int hashval)
++static struct nfs4_file * find_file(struct svc_fh *fh)
+ {
+ struct nfs4_file *fp;
++ unsigned int hashval = file_hashval(fh);
++
++ rcu_read_lock();
++ fp = find_file_locked(fh, hashval);
++ rcu_read_unlock();
++ return fp;
++}
++
++/*
++ * On hash insertion, identify entries with the same inode but
++ * distinct filehandles. They will all be in the same hash bucket
++ * because nfs4_file's are hashed by the address in the fi_inode
++ * field.
++ */
++static noinline_for_stack struct nfs4_file *
++nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp)
++{
++ unsigned int hashval = file_hashval(fhp);
+ struct nfs4_file *ret = NULL;
+ bool alias_found = false;
++ struct nfs4_file *fi;
+
+ spin_lock(&state_lock);
+- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
++ hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash,
+ lockdep_is_held(&state_lock)) {
+- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
+- if (refcount_inc_not_zero(&fp->fi_ref))
+- ret = fp;
+- } else if (d_inode(fh->fh_dentry) == fp->fi_inode)
+- fp->fi_aliased = alias_found = true;
++ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
++ if (refcount_inc_not_zero(&fi->fi_ref))
++ ret = fi;
++ } else if (d_inode(fhp->fh_dentry) == fi->fi_inode)
++ fi->fi_aliased = alias_found = true;
+ }
+ if (likely(ret == NULL)) {
+- nfsd4_file_init(fh, new);
++ nfsd4_file_init(fhp, new);
+ hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]);
+ new->fi_aliased = alias_found;
+ ret = new;
+@@ -4724,32 +4742,6 @@ static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
+ return ret;
+ }
+
+-static struct nfs4_file * find_file(struct svc_fh *fh)
+-{
+- struct nfs4_file *fp;
+- unsigned int hashval = file_hashval(fh);
+-
+- rcu_read_lock();
+- fp = find_file_locked(fh, hashval);
+- rcu_read_unlock();
+- return fp;
+-}
+-
+-static struct nfs4_file *
+-find_or_add_file(struct nfs4_file *new, struct svc_fh *fh)
+-{
+- struct nfs4_file *fp;
+- unsigned int hashval = file_hashval(fh);
+-
+- rcu_read_lock();
+- fp = find_file_locked(fh, hashval);
+- rcu_read_unlock();
+- if (fp)
+- return fp;
+-
+- return insert_file(new, fh, hashval);
+-}
+-
+ static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
+ {
+ hlist_del_rcu(&fi->fi_hash);
+@@ -5641,7 +5633,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ * and check for delegations in the process of being recalled.
+ * If not found, create the nfs4_file struct
+ */
+- fp = find_or_add_file(open->op_file, current_fh);
++ fp = nfsd4_file_hash_insert(open->op_file, current_fh);
+ if (fp != open->op_file) {
+ status = nfs4_check_deleg(cl, open, &dp);
+ if (status)
+--
+2.43.0
+
--- /dev/null
+From 9a157e89c10af2a4accf9ed67077d178e4ab0652 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 12:31:07 -0400
+Subject: nfsd: clean up mounted_on_fileid handling
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 6106d9119b6599fa23dc556b429d887b4c2d9f62 ]
+
+We only need the inode number for this, not a full rack of attributes.
+Rename this function make it take a pointer to a u64 instead of
+struct kstat, and change it to just request STATX_INO.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+[ cel: renamed get_mounted_on_ino() ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 0f30d93577e7b..3ad9b41c51730 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2772,9 +2772,10 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32
+ }
+
+
+-static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
++static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino)
+ {
+ struct path path = exp->ex_path;
++ struct kstat stat;
+ int err;
+
+ path_get(&path);
+@@ -2782,8 +2783,10 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
+ if (path.dentry != path.mnt->mnt_root)
+ break;
+ }
+- err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
++ err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
+ path_put(&path);
++ if (!err)
++ *pino = stat.ino;
+ return err;
+ }
+
+@@ -3280,22 +3283,21 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ *p++ = cpu_to_be32(stat.btime.tv_nsec);
+ }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+- struct kstat parent_stat;
+ u64 ino = stat.ino;
+
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ goto out_resource;
+ /*
+- * Get parent's attributes if not ignoring crossmount
+- * and this is the root of a cross-mounted filesystem.
++ * Get ino of mountpoint in parent filesystem, if not ignoring
++ * crossmount and this is the root of a cross-mounted
++ * filesystem.
+ */
+ if (ignore_crossmnt == 0 &&
+ dentry == exp->ex_path.mnt->mnt_root) {
+- err = get_parent_attributes(exp, &parent_stat);
++ err = nfsd4_get_mounted_on_ino(exp, &ino);
+ if (err)
+ goto out_nfserr;
+- ino = parent_stat.ino;
+ }
+ p = xdr_encode_hyper(p, ino);
+ }
+--
+2.43.0
+
--- /dev/null
+From 0cf7f1a2afb001c8d325bedeac4877a843abf662 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:46:57 -0400
+Subject: NFSD: Clean up nfs4_preprocess_stateid_op() call sites
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit eeff73f7c1c583f79a401284f46c619294859310 ]
+
+Remove the lame-duck dprintk()s around nfs4_preprocess_stateid_op()
+call sites.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 31 +++++++------------------------
+ 1 file changed, 7 insertions(+), 24 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 6ed0baa119433..92d4eb1032ff9 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -943,12 +943,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &read->rd_stateid, RD_STATE,
+ &read->rd_nf, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
+- goto out;
+- }
+- status = nfs_ok;
+-out:
++
+ read->rd_rqstp = rqstp;
+ read->rd_fhp = &cstate->current_fh;
+ return status;
+@@ -1117,10 +1112,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ &cstate->current_fh, &setattr->sa_stateid,
+ WR_STATE, NULL, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
++ if (status)
+ return status;
+- }
+ }
+ err = fh_want_write(&cstate->current_fh);
+ if (err)
+@@ -1168,10 +1161,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ write->wr_offset, cnt);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ stateid, WR_STATE, &nf, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
++ if (status)
+ return status;
+- }
+
+ write->wr_how_written = write->wr_stable_how;
+
+@@ -1202,17 +1193,13 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
+ src_stateid, RD_STATE, src, NULL);
+- if (status) {
+- dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
++ if (status)
+ goto out;
+- }
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ dst_stateid, WR_STATE, dst, NULL);
+- if (status) {
+- dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
++ if (status)
+ goto out_put_src;
+- }
+
+ /* fix up for NFS-specific error code */
+ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+@@ -1951,10 +1938,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &fallocate->falloc_stateid,
+ WR_STATE, &nf, NULL);
+- if (status != nfs_ok) {
+- dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
++ if (status != nfs_ok)
+ return status;
+- }
+
+ status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
+ fallocate->falloc_offset,
+@@ -2010,10 +1995,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &seek->seek_stateid,
+ RD_STATE, &nf, NULL);
+- if (status) {
+- dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
++ if (status)
+ return status;
+- }
+
+ switch (seek->seek_whence) {
+ case NFS4_CONTENT_DATA:
+--
+2.43.0
+
--- /dev/null
+From bec3bcce0f515275cf2b0c0b0df1984fbb79ba8d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:23:19 -0400
+Subject: NFSD: Clean up nfs4svc_encode_compoundres()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upsteam commit 9993a66317fc9951322483a9edbfae95a640b210 ]
+
+In today's Linux NFS server implementation, the NFS dispatcher
+initializes each XDR result stream, and the NFSv4 .pc_func and
+.pc_encode methods all use xdr_stream-based encoding. This keeps
+rq_res.len automatically updated. There is no longer a need for
+the WARN_ON_ONCE() check in nfs4svc_encode_compoundres().
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 6c43cf52a885f..76028a5c81d1d 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5470,12 +5470,8 @@ bool
+ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+- struct xdr_buf *buf = xdr->buf;
+ __be32 *p;
+
+- WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
+- buf->tail[0].iov_len);
+-
+ /*
+ * Send buffer space for the following items is reserved
+ * at the top of nfsd4_proc_compound().
+--
+2.43.0
+
--- /dev/null
+From f92e3cd695f4421a0936ccfaf4b6fb651c8def42 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Mar 2022 14:47:54 -0400
+Subject: NFSD: Clean up nfsd3_proc_create()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit e61568599c9ad638fdaba150fee07d7065e31851 ]
+
+As near as I can tell, mode bit masking and setting S_IFREG is
+already done by do_nfsd_create() and vfs_create(). The NFSv4 path
+(do_open_lookup), for example, does not bother with this special
+processing.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 16 ++--------------
+ 1 file changed, 2 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index eaf785aec0708..86163ecbb015d 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -229,8 +229,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_createargs *argp = rqstp->rq_argp;
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
+- svc_fh *dirfhp, *newfhp = NULL;
+- struct iattr *attr;
++ svc_fh *dirfhp, *newfhp;
+
+ dprintk("nfsd: CREATE(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+@@ -239,20 +238,9 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
+
+ dirfhp = fh_copy(&resp->dirfh, &argp->fh);
+ newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
+- attr = &argp->attrs;
+-
+- /* Unfudge the mode bits */
+- attr->ia_mode &= ~S_IFMT;
+- if (!(attr->ia_valid & ATTR_MODE)) {
+- attr->ia_valid |= ATTR_MODE;
+- attr->ia_mode = S_IFREG;
+- } else {
+- attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
+- }
+
+- /* Now create the file and set attributes */
+ resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
+- attr, newfhp, argp->createmode,
++ &argp->attrs, newfhp, argp->createmode,
+ (u32 *)argp->verf, NULL, NULL);
+ return rpc_success;
+ }
+--
+2.43.0
+
--- /dev/null
+From 8a08ec06a8a8069a5ee1699493096eb02bfb22da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:09:23 -0400
+Subject: NFSD: Clean up nfsd4_encode_readlink()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 99b002a1fa00d90e66357315757e7277447ce973 ]
+
+Similar changes to nfsd4_encode_readv(), all bundled into a single
+patch.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 24 +++++++++---------------
+ 1 file changed, 9 insertions(+), 15 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 88e8192f9a75d..a5ab6ea475423 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -4016,16 +4016,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ static __be32
+ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
+ {
+- int maxcount;
+- __be32 wire_count;
+- int zero = 0;
++ __be32 *p, *maxcount_p, zero = xdr_zero;
+ struct xdr_stream *xdr = resp->xdr;
+ int length_offset = xdr->buf->len;
+- int status;
+- __be32 *p;
++ int maxcount, status;
+
+- p = xdr_reserve_space(xdr, 4);
+- if (!p)
++ maxcount_p = xdr_reserve_space(xdr, XDR_UNIT);
++ if (!maxcount_p)
+ return nfserr_resource;
+ maxcount = PAGE_SIZE;
+
+@@ -4050,14 +4047,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
+ nfserr = nfserrno(status);
+ goto out_err;
+ }
+-
+- wire_count = htonl(maxcount);
+- write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
+- xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4));
+- if (maxcount & 3)
+- write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
+- &zero, 4 - (maxcount&3));
+- return 0;
++ *maxcount_p = cpu_to_be32(maxcount);
++ xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount));
++ write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero,
++ xdr_pad_size(maxcount));
++ return nfs_ok;
+
+ out_err:
+ xdr_truncate_encode(xdr, length_offset);
+--
+2.43.0
+
--- /dev/null
+From 941a169dae9bfdea4cabc9b82fca2c59103724e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:28 -0400
+Subject: NFSD: Clean up nfsd4_init_file()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 81a21fa3e7fdecb3c5b97014f0fc5a17d5806cae ]
+
+Name this function more consistently. I'm going to use nfsd4_file_
+and nfsd4_file_hash_ for these helpers.
+
+Change the @fh parameter to be const pointer for better type safety.
+
+Finally, move the hash insertion operation to the caller. This is
+typical for most other "init_object" type helpers, and it is where
+most of the other nfs4_file hash table operations are located.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index aa7374933de77..1cb3ea90eb4ca 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4277,11 +4277,9 @@ static struct nfs4_file *nfsd4_alloc_file(void)
+ }
+
+ /* OPEN Share state helper functions */
+-static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
+- struct nfs4_file *fp)
+-{
+- lockdep_assert_held(&state_lock);
+
++static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
++{
+ refcount_set(&fp->fi_ref, 1);
+ spin_lock_init(&fp->fi_lock);
+ INIT_LIST_HEAD(&fp->fi_stateids);
+@@ -4299,7 +4297,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
+ INIT_LIST_HEAD(&fp->fi_lo_states);
+ atomic_set(&fp->fi_lo_recalls, 0);
+ #endif
+- hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
+ }
+
+ void
+@@ -4717,7 +4714,8 @@ static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
+ fp->fi_aliased = alias_found = true;
+ }
+ if (likely(ret == NULL)) {
+- nfsd4_init_file(fh, hashval, new);
++ nfsd4_file_init(fh, new);
++ hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]);
+ new->fi_aliased = alias_found;
+ ret = new;
+ }
+--
+2.43.0
+
--- /dev/null
+From cbe3d42fb4d7e2683b1fbb63c228ae322ffbfa04 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Mar 2022 09:54:02 -0400
+Subject: nfsd: Clean up nfsd_file_put()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 999397926ab3f78c7d1235cc4ca6e3c89d2769bf ]
+
+Make it a little less racy, by removing the refcount_read() test. Then
+remove the redundant 'is_hashed' variable.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 13 +++----------
+ 1 file changed, 3 insertions(+), 10 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 6cde6ef68996e..429ae485ebbbe 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -295,21 +295,14 @@ nfsd_file_put_noref(struct nfsd_file *nf)
+ void
+ nfsd_file_put(struct nfsd_file *nf)
+ {
+- bool is_hashed;
+-
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+- if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
+- nfsd_file_put_noref(nf);
+- return;
+- }
+-
+- is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+- if (!is_hashed) {
++ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
+ nfsd_file_flush(nf);
+ nfsd_file_put_noref(nf);
+ } else {
+ nfsd_file_put_noref(nf);
+- nfsd_file_schedule_laundrette();
++ if (nf->nf_file)
++ nfsd_file_schedule_laundrette();
+ }
+ if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+ nfsd_file_gc();
+--
+2.43.0
+
--- /dev/null
+From d46f1c6d664bc46ed2272025d834c7ca1ff68fc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 27 Mar 2022 16:46:47 -0400
+Subject: NFSD: Clean up nfsd_open_verified()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit f4d84c52643ae1d63a8e73e2585464470e7944d1 ]
+
+Its only caller always passes S_IFREG as the @type parameter. As an
+additional clean-up, add a kerneldoc comment.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 4 ++--
+ fs/nfsd/vfs.c | 15 ++++++++++++---
+ fs/nfsd/vfs.h | 2 +-
+ 3 files changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index b1afe6db589f2..0f6553b316f58 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -976,8 +976,8 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+ if (nf->nf_mark)
+- status = nfsd_open_verified(rqstp, fhp, S_IFREG,
+- may_flags, &nf->nf_file);
++ status = nfsd_open_verified(rqstp, fhp, may_flags,
++ &nf->nf_file);
+ else
+ status = nfserr_jukebox;
+ /*
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 9dd14c0eaebd1..6689ad5bb790d 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -834,14 +834,23 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ return err;
+ }
+
++/**
++ * nfsd_open_verified - Open a regular file for the filecache
++ * @rqstp: RPC request
++ * @fhp: NFS filehandle of the file to open
++ * @may_flags: internal permission flags
++ * @filp: OUT: open "struct file *"
++ *
++ * Returns an nfsstat value in network byte order.
++ */
+ __be32
+-nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+- int may_flags, struct file **filp)
++nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
++ struct file **filp)
+ {
+ __be32 err;
+
+ validate_process_creds();
+- err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
++ err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
+ validate_process_creds();
+ return err;
+ }
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index f99794b033a55..26347d76f44a0 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -86,7 +86,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ int nfsd_open_break_lease(struct inode *, int);
+ __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
+ int, struct file **);
+-__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
++__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *,
+ int, struct file **);
+ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset,
+--
+2.43.0
+
--- /dev/null
+From e3d29b9ee6fc9e897f4c1348405bf53fa902e7b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Apr 2022 16:48:24 -0400
+Subject: NFSD: Clean up nfsd_splice_actor()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 91e23b1c39820bfed642119ff6b6ef9f43cf09ce ]
+
+nfsd_splice_actor() checks that the page being spliced does not
+match the previous element in the svc_rqst::rq_pages array. We
+believe this is to prevent a double put_page() in cases where the
+READ payload is partially contained in the xdr_buf's head buffer.
+
+However, the NFSD READ proc functions no longer place any part of
+the READ payload in the head buffer, in order to properly support
+NFS/RDMA READ with Write chunks. Therefore, simplify the logic in
+nfsd_splice_actor() to remove this unnecessary check.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 10 ++--------
+ 1 file changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 00e956bdefaae..541f39ab450ce 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -856,17 +856,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+ {
+ struct svc_rqst *rqstp = sd->u.data;
+- struct page **pp = rqstp->rq_next_page;
+- struct page *page = buf->page;
+
+- if (rqstp->rq_res.page_len == 0) {
+- svc_rqst_replace_page(rqstp, page);
++ svc_rqst_replace_page(rqstp, buf->page);
++ if (rqstp->rq_res.page_len == 0)
+ rqstp->rq_res.page_base = buf->offset;
+- } else if (page != pp[-1]) {
+- svc_rqst_replace_page(rqstp, page);
+- }
+ rqstp->rq_res.page_len += sd->len;
+-
+ return sd->len;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 535a4225341e55de835a23fc521803c97c40e5cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 14:19:41 -0500
+Subject: NFSD: Clean up nfsd_vfs_write()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 33388b3aefefd4d83764dab8038cb54068161a44 ]
+
+The RWF_SYNC and !RWF_SYNC arms are now exactly alike except that
+the RWF_SYNC arm resets the boot verifier twice in a row. Fix that
+redundancy and de-duplicate the code.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 21 +++++----------------
+ 1 file changed, 5 insertions(+), 16 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index bc025fe5a595b..98d370dcca867 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1009,22 +1009,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+ since = READ_ONCE(file->f_wb_err);
+- if (flags & RWF_SYNC) {
+- if (verf)
+- nfsd_copy_boot_verifier(verf,
+- net_generic(SVC_NET(rqstp),
+- nfsd_net_id));
+- host_err = vfs_iter_write(file, &iter, &pos, flags);
+- if (host_err < 0)
+- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+- nfsd_net_id));
+- } else {
+- if (verf)
+- nfsd_copy_boot_verifier(verf,
+- net_generic(SVC_NET(rqstp),
+- nfsd_net_id));
+- host_err = vfs_iter_write(file, &iter, &pos, flags);
+- }
++ if (verf)
++ nfsd_copy_boot_verifier(verf,
++ net_generic(SVC_NET(rqstp),
++ nfsd_net_id));
++ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0) {
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+--
+2.43.0
+
--- /dev/null
+From 73493b28b964a1fa16d3eb4f47d4b0d174eddc7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Jan 2023 14:38:31 -0500
+Subject: nfsd: clean up potential nfsd_file refcount leaks in COPY codepath
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 6ba434cb1a8d403ea9aad1b667c3ea3ad8b3191f ]
+
+There are two different flavors of the nfsd4_copy struct. One is
+embedded in the compound and is used directly in synchronous copies. The
+other is dynamically allocated, refcounted and tracked in the client
+struture. For the embedded one, the cleanup just involves releasing any
+nfsd_files held on its behalf. For the async one, the cleanup is a bit
+more involved, and we need to dequeue it from lists, unhash it, etc.
+
+There is at least one potential refcount leak in this code now. If the
+kthread_create call fails, then both the src and dst nfsd_files in the
+original nfsd4_copy object are leaked.
+
+The cleanup in this codepath is also sort of weird. In the async copy
+case, we'll have up to four nfsd_file references (src and dst for both
+flavors of copy structure). They are both put at the end of
+nfsd4_do_async_copy, even though the ones held on behalf of the embedded
+one outlive that structure.
+
+Change it so that we always clean up the nfsd_file refs held by the
+embedded copy structure before nfsd4_copy returns. Rework
+cleanup_async_copy to handle both inter and intra copies. Eliminate
+nfsd4_cleanup_intra_ssc since it now becomes a no-op.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 23 ++++++++++-------------
+ 1 file changed, 10 insertions(+), 13 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 6fb5f10602233..ada46ef5a093d 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1512,7 +1512,6 @@ nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
+ long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
+
+ nfs42_ssc_close(filp);
+- nfsd_file_put(dst);
+ fput(filp);
+
+ spin_lock(&nn->nfsd_ssc_lock);
+@@ -1562,13 +1561,6 @@ nfsd4_setup_intra_ssc(struct svc_rqst *rqstp,
+ ©->nf_dst);
+ }
+
+-static void
+-nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst)
+-{
+- nfsd_file_put(src);
+- nfsd_file_put(dst);
+-}
+-
+ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
+ {
+ struct nfsd4_cb_offload *cbo =
+@@ -1683,12 +1675,18 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+ dst->ss_nsui = src->ss_nsui;
+ }
+
++static void release_copy_files(struct nfsd4_copy *copy)
++{
++ if (copy->nf_src)
++ nfsd_file_put(copy->nf_src);
++ if (copy->nf_dst)
++ nfsd_file_put(copy->nf_dst);
++}
++
+ static void cleanup_async_copy(struct nfsd4_copy *copy)
+ {
+ nfs4_free_copy_state(copy);
+- nfsd_file_put(copy->nf_dst);
+- if (!nfsd4_ssc_is_inter(copy))
+- nfsd_file_put(copy->nf_src);
++ release_copy_files(copy);
+ spin_lock(©->cp_clp->async_lock);
+ list_del(©->copies);
+ spin_unlock(©->cp_clp->async_lock);
+@@ -1748,7 +1746,6 @@ static int nfsd4_do_async_copy(void *data)
+ } else {
+ nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, false);
+- nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+ }
+
+ do_callback:
+@@ -1811,9 +1808,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ } else {
+ status = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, true);
+- nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+ }
+ out:
++ release_copy_files(copy);
+ return status;
+ out_err:
+ if (async_copy)
+--
+2.43.0
+
--- /dev/null
+From 892f810b6f2a5258f34c2ec08363711b32ce8889 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:08:51 -0400
+Subject: NFSD: Clean up SPLICE_OK in nfsd4_encode_read()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c738b218a2e5a753a336b4b7fee6720b902c7ace ]
+
+Do the test_bit() once -- this reduces the number of locked-bus
+operations and makes the function a little easier to read.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index d5a4aa0da32be..afc8a51cf60f1 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3988,6 +3988,7 @@ static __be32
+ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_read *read)
+ {
++ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
+ unsigned long maxcount;
+ struct xdr_stream *xdr = resp->xdr;
+ struct file *file;
+@@ -4000,11 +4001,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
+ if (!p) {
+- WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
++ WARN_ON_ONCE(splice_ok);
+ return nfserr_resource;
+ }
+- if (resp->xdr->buf->page_len &&
+- test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
++ if (resp->xdr->buf->page_len && splice_ok) {
+ WARN_ON_ONCE(1);
+ return nfserr_serverfault;
+ }
+@@ -4013,8 +4013,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));
+
+- if (file->f_op->splice_read &&
+- test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
++ if (file->f_op->splice_read && splice_ok)
+ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ else
+ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+--
+2.43.0
+
--- /dev/null
+From 2490ca224f9232bc7cf1292affe006e2dc1aec9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Dec 2021 14:43:16 -0500
+Subject: NFSD: Clean up the nfsd_net::nfssvc_boot field
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 91d2e9b56cf5c80f9efc530d494968369a8a0e0d ]
+
+There are two boot-time fields in struct nfsd_net: one called
+boot_time and one called nfssvc_boot. The latter is used only to
+form write verifiers, but its documenting comment declares:
+
+ /* Time of server startup */
+
+Since commit 27c438f53e79 ("nfsd: Support the server resetting the
+boot verifier"), this field can be reset at any time; it's no
+longer tied to server restart. So that comment is stale.
+
+Also, according to pahole, struct timespec64 is 16 bytes long on
+x86_64. The nfssvc_boot field is used only to form a write verifier,
+which is 8 bytes long.
+
+Let's clarify this situation by manufacturing an 8-byte verifier
+in nfs_reset_boot_verifier() and storing only that in struct
+nfsd_net.
+
+We're grabbing 128 bits of time, so compress all of those into a
+64-bit verifier instead of throwing out the high-order bits.
+In the future, the siphash_key can be re-used for other hashed
+objects per-nfsd_net.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 8 +++++---
+ fs/nfsd/nfsctl.c | 3 ++-
+ fs/nfsd/nfssvc.c | 51 ++++++++++++++++++++++++++++++++++++------------
+ 3 files changed, 45 insertions(+), 17 deletions(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 9e8b77d2a3a47..a6ed300259849 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -11,6 +11,7 @@
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+ #include <linux/percpu_counter.h>
++#include <linux/siphash.h>
+
+ /* Hash tables for nfs4_clientid state */
+ #define CLIENT_HASH_BITS 4
+@@ -108,9 +109,8 @@ struct nfsd_net {
+ bool nfsd_net_up;
+ bool lockd_up;
+
+- /* Time of server startup */
+- struct timespec64 nfssvc_boot;
+- seqlock_t boot_lock;
++ seqlock_t writeverf_lock;
++ unsigned char writeverf[8];
+
+ /*
+ * Max number of connections this nfsd container will allow. Defaults
+@@ -187,6 +187,8 @@ struct nfsd_net {
+ char nfsd_name[UNX_MAXNODENAME+1];
+
+ struct nfsd_fcache_disposal *fcache_disposal;
++
++ siphash_key_t siphash_key;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 504b169d27881..68b020f2002b7 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1484,7 +1484,8 @@ static __net_init int nfsd_init_net(struct net *net)
+ nn->clientid_counter = nn->clientid_base + 1;
+ nn->s2s_cp_cl_id = nn->clientid_counter++;
+
+- seqlock_init(&nn->boot_lock);
++ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
++ seqlock_init(&nn->writeverf_lock);
+
+ return 0;
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 4d1d8aa6d7f9d..5a60664695352 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -12,6 +12,7 @@
+ #include <linux/module.h>
+ #include <linux/fs_struct.h>
+ #include <linux/swap.h>
++#include <linux/siphash.h>
+
+ #include <linux/sunrpc/stats.h>
+ #include <linux/sunrpc/svcsock.h>
+@@ -344,33 +345,57 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
+ return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
+ }
+
++/**
++ * nfsd_copy_boot_verifier - Atomically copy a write verifier
++ * @verf: buffer in which to receive the verifier cookie
++ * @nn: NFS net namespace
++ *
++ * This function provides a wait-free mechanism for copying the
++ * namespace's boot verifier without tearing it.
++ */
+ void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+ {
+ int seq = 0;
+
+ do {
+- read_seqbegin_or_lock(&nn->boot_lock, &seq);
+- /*
+- * This is opaque to client, so no need to byte-swap. Use
+- * __force to keep sparse happy. y2038 time_t overflow is
+- * irrelevant in this usage
+- */
+- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
+- } while (need_seqretry(&nn->boot_lock, seq));
+- done_seqretry(&nn->boot_lock, seq);
++ read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
++ memcpy(verf, nn->writeverf, sizeof(*verf));
++ } while (need_seqretry(&nn->writeverf_lock, seq));
++ done_seqretry(&nn->writeverf_lock, seq);
+ }
+
+ static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+ {
+- ktime_get_raw_ts64(&nn->nfssvc_boot);
++ struct timespec64 now;
++ u64 verf;
++
++ /*
++ * Because the time value is hashed, y2038 time_t overflow
++ * is irrelevant in this usage.
++ */
++ ktime_get_raw_ts64(&now);
++ verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key);
++ memcpy(nn->writeverf, &verf, sizeof(nn->writeverf));
+ }
+
++/**
++ * nfsd_reset_boot_verifier - Generate a new boot verifier
++ * @nn: NFS net namespace
++ *
++ * This function updates the ->writeverf field of @nn. This field
++ * contains an opaque cookie that, according to Section 18.32.3 of
++ * RFC 8881, "the client can use to determine whether a server has
++ * changed instance state (e.g., server restart) between a call to
++ * WRITE and a subsequent call to either WRITE or COMMIT. This
++ * cookie MUST be unchanged during a single instance of the NFSv4.1
++ * server and MUST be unique between instances of the NFSv4.1
++ * server."
++ */
+ void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+ {
+- write_seqlock(&nn->boot_lock);
++ write_seqlock(&nn->writeverf_lock);
+ nfsd_reset_boot_verifier_locked(nn);
+- write_sequnlock(&nn->boot_lock);
++ write_sequnlock(&nn->writeverf_lock);
+ }
+
+ static int nfsd_startup_net(struct net *net, const struct cred *cred)
+--
+2.43.0
+
--- /dev/null
+From ffedfc6af1b3b17cd64ee241f9665146814f221f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:36 -0400
+Subject: NFSD: Clean up unused code after rhashtable conversion
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 0ec8e9d1539a7b8109a554028bbce441052f847e ]
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 33 +--------------------------------
+ fs/nfsd/filecache.h | 1 -
+ 2 files changed, 1 insertion(+), 33 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 45dd4f3fa0905..c6dc55c0f758b 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -21,11 +21,6 @@
+ #include "filecache.h"
+ #include "trace.h"
+
+-#define NFSDDBG_FACILITY NFSDDBG_FH
+-
+-/* FIXME: dynamically size this for the machine somehow? */
+-#define NFSD_FILE_HASH_BITS 12
+-#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
+ #define NFSD_LAUNDRETTE_DELAY (2 * HZ)
+
+ #define NFSD_FILE_CACHE_UP (0)
+@@ -33,13 +28,6 @@
+ /* We only care about NFSD_MAY_READ/WRITE for this cache */
+ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+-struct nfsd_fcache_bucket {
+- struct hlist_head nfb_head;
+- spinlock_t nfb_lock;
+- unsigned int nfb_count;
+- unsigned int nfb_maxcount;
+-};
+-
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
+@@ -57,7 +45,6 @@ static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
+
+ static struct kmem_cache *nfsd_file_slab;
+ static struct kmem_cache *nfsd_file_mark_slab;
+-static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
+ static struct list_lru nfsd_file_lru;
+ static unsigned long nfsd_file_flags;
+ static struct fsnotify_group *nfsd_file_fsnotify_group;
+@@ -302,7 +289,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+
+ nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+ if (nf) {
+- INIT_HLIST_NODE(&nf->nf_node);
+ INIT_LIST_HEAD(&nf->nf_lru);
+ nf->nf_birthtime = ktime_get();
+ nf->nf_file = NULL;
+@@ -810,8 +796,7 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+ int
+ nfsd_file_cache_init(void)
+ {
+- int ret;
+- unsigned int i;
++ int ret;
+
+ lockdep_assert_held(&nfsd_mutex);
+ if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+@@ -826,13 +811,6 @@ nfsd_file_cache_init(void)
+ if (!nfsd_filecache_wq)
+ goto out;
+
+- nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
+- sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+- if (!nfsd_file_hashtbl) {
+- pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
+- goto out_err;
+- }
+-
+ nfsd_file_slab = kmem_cache_create("nfsd_file",
+ sizeof(struct nfsd_file), 0, 0, NULL);
+ if (!nfsd_file_slab) {
+@@ -876,11 +854,6 @@ nfsd_file_cache_init(void)
+ goto out_notifier;
+ }
+
+- for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+- INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+- spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+- }
+-
+ INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
+ out:
+ return ret;
+@@ -895,8 +868,6 @@ nfsd_file_cache_init(void)
+ nfsd_file_slab = NULL;
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+- kvfree(nfsd_file_hashtbl);
+- nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+ rhashtable_destroy(&nfsd_file_rhash_tbl);
+@@ -1026,8 +997,6 @@ nfsd_file_cache_shutdown(void)
+ fsnotify_wait_marks_destroyed();
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+- kvfree(nfsd_file_hashtbl);
+- nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+ rhashtable_destroy(&nfsd_file_rhash_tbl);
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 5cbfc61a7d7d9..ee9ed99d8b8fa 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -30,7 +30,6 @@ struct nfsd_file_mark {
+ */
+ struct nfsd_file {
+ struct rhash_head nf_rhash;
+- struct hlist_node nf_node;
+ struct list_head nf_lru;
+ struct rcu_head nf_rcu;
+ struct file *nf_file;
+--
+2.43.0
+
--- /dev/null
+From a89b20a5386487ec7019c41008cded16d6623262 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:23:07 -0400
+Subject: NFSD: Clean up WRITE arg decoders
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit d4da5baa533215b14625458e645056baf646bb2e ]
+
+xdr_stream_subsegment() already returns a boolean value.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3xdr.c | 4 +---
+ fs/nfsd/nfsxdr.c | 4 +---
+ 2 files changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 71e32cf288854..3308dd671ef0b 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -571,10 +571,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ args->count = max_blocksize;
+ args->len = max_blocksize;
+ }
+- if (!xdr_stream_subsegment(xdr, &args->payload, args->count))
+- return false;
+
+- return true;
++ return xdr_stream_subsegment(xdr, &args->payload, args->count);
+ }
+
+ bool
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index aba8520b4b8b6..caf6355b18fa9 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -338,10 +338,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ return false;
+ if (args->len > NFSSVC_MAXBLKSIZE_V2)
+ return false;
+- if (!xdr_stream_subsegment(xdr, &args->payload, args->len))
+- return false;
+
+- return true;
++ return xdr_stream_subsegment(xdr, &args->payload, args->len);
+ }
+
+ bool
+--
+2.43.0
+
--- /dev/null
+From 5c01cac13e8feadbb6b4ebe877e8467153de6087 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Oct 2021 12:11:45 -0400
+Subject: NFSD: Combine XDR error tracepoints
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 70e94d757b3e1f46486d573729d84c8955c81dce ]
+
+Clean up: The garbage_args and cant_encode tracepoints report the
+same information as each other, so combine them into a single
+tracepoint class to reduce code duplication and slightly reduce the
+size of trace.o.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/trace.h | 28 +++++++---------------------
+ 1 file changed, 7 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 61943a629cdee..1c98a0f857498 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -47,7 +47,7 @@
+ rqstp->rq_xprt->xpt_remotelen); \
+ } while (0);
+
+-TRACE_EVENT(nfsd_garbage_args_err,
++DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
+ TP_PROTO(
+ const struct svc_rqst *rqstp
+ ),
+@@ -69,27 +69,13 @@ TRACE_EVENT(nfsd_garbage_args_err,
+ )
+ );
+
+-TRACE_EVENT(nfsd_cant_encode_err,
+- TP_PROTO(
+- const struct svc_rqst *rqstp
+- ),
+- TP_ARGS(rqstp),
+- TP_STRUCT__entry(
+- NFSD_TRACE_PROC_ARG_FIELDS
++#define DEFINE_NFSD_XDR_ERR_EVENT(name) \
++DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \
++ TP_PROTO(const struct svc_rqst *rqstp), \
++ TP_ARGS(rqstp))
+
+- __field(u32, vers)
+- __field(u32, proc)
+- ),
+- TP_fast_assign(
+- NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+-
+- __entry->vers = rqstp->rq_vers;
+- __entry->proc = rqstp->rq_proc;
+- ),
+- TP_printk("xid=0x%08x vers=%u proc=%u",
+- __entry->xid, __entry->vers, __entry->proc
+- )
+-);
++DEFINE_NFSD_XDR_ERR_EVENT(garbage_args);
++DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
+
+ #define show_nfsd_may_flags(x) \
+ __print_flags(x, "|", \
+--
+2.43.0
+
--- /dev/null
+From 087ecc11228f3d0e5fb51c9bd6bb4e6e7e854546 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 15:09:04 -0500
+Subject: NFSD: Convert filecache to rhltable
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c4c649ab413ba6a785b25f0edbb12f617c87db2a ]
+
+While we were converting the nfs4_file hashtable to use the kernel's
+resizable hashtable data structure, Neil Brown observed that the
+list variant (rhltable) would be better for managing nfsd_file items
+as well. The nfsd_file hash table will contain multiple entries for
+the same inode -- these should be kept together on a list. And, it
+could be possible for exotic or malicious client behavior to cause
+the hash table to resize itself on every insertion.
+
+A nice simplification is that rhltable_lookup() can return a list
+that contains only nfsd_file items that match a given inode, which
+enables us to eliminate specialized hash table helper functions and
+use the default functions provided by the rhashtable implementation).
+
+Since we are now storing nfsd_file items for the same inode on a
+single list, that effectively reduces the number of hash entries
+that have to be tracked in the hash table. The mininum bucket count
+is therefore lowered.
+
+Light testing with fstests generic/531 show no regressions.
+
+Suggested-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 311 ++++++++++++++++++--------------------------
+ fs/nfsd/filecache.h | 9 +-
+ 2 files changed, 133 insertions(+), 187 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 080d796547854..52e67ec267965 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -73,70 +73,9 @@ static struct list_lru nfsd_file_lru;
+ static unsigned long nfsd_file_flags;
+ static struct fsnotify_group *nfsd_file_fsnotify_group;
+ static struct delayed_work nfsd_filecache_laundrette;
+-static struct rhashtable nfsd_file_rhash_tbl
++static struct rhltable nfsd_file_rhltable
+ ____cacheline_aligned_in_smp;
+
+-enum nfsd_file_lookup_type {
+- NFSD_FILE_KEY_INODE,
+- NFSD_FILE_KEY_FULL,
+-};
+-
+-struct nfsd_file_lookup_key {
+- struct inode *inode;
+- struct net *net;
+- const struct cred *cred;
+- unsigned char need;
+- bool gc;
+- enum nfsd_file_lookup_type type;
+-};
+-
+-/*
+- * The returned hash value is based solely on the address of an in-code
+- * inode, a pointer to a slab-allocated object. The entropy in such a
+- * pointer is concentrated in its middle bits.
+- */
+-static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
+-{
+- unsigned long ptr = (unsigned long)inode;
+- u32 k;
+-
+- k = ptr >> L1_CACHE_SHIFT;
+- k &= 0x00ffffff;
+- return jhash2(&k, 1, seed);
+-}
+-
+-/**
+- * nfsd_file_key_hashfn - Compute the hash value of a lookup key
+- * @data: key on which to compute the hash value
+- * @len: rhash table's key_len parameter (unused)
+- * @seed: rhash table's random seed of the day
+- *
+- * Return value:
+- * Computed 32-bit hash value
+- */
+-static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
+-{
+- const struct nfsd_file_lookup_key *key = data;
+-
+- return nfsd_file_inode_hash(key->inode, seed);
+-}
+-
+-/**
+- * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
+- * @data: object on which to compute the hash value
+- * @len: rhash table's key_len parameter (unused)
+- * @seed: rhash table's random seed of the day
+- *
+- * Return value:
+- * Computed 32-bit hash value
+- */
+-static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
+-{
+- const struct nfsd_file *nf = data;
+-
+- return nfsd_file_inode_hash(nf->nf_inode, seed);
+-}
+-
+ static bool
+ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+ {
+@@ -157,55 +96,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+ return true;
+ }
+
+-/**
+- * nfsd_file_obj_cmpfn - Match a cache item against search criteria
+- * @arg: search criteria
+- * @ptr: cache item to check
+- *
+- * Return values:
+- * %0 - Item matches search criteria
+- * %1 - Item does not match search criteria
+- */
+-static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+- const void *ptr)
+-{
+- const struct nfsd_file_lookup_key *key = arg->key;
+- const struct nfsd_file *nf = ptr;
+-
+- switch (key->type) {
+- case NFSD_FILE_KEY_INODE:
+- if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+- return 1;
+- if (nf->nf_inode != key->inode)
+- return 1;
+- break;
+- case NFSD_FILE_KEY_FULL:
+- if (nf->nf_inode != key->inode)
+- return 1;
+- if (nf->nf_may != key->need)
+- return 1;
+- if (nf->nf_net != key->net)
+- return 1;
+- if (!nfsd_match_cred(nf->nf_cred, key->cred))
+- return 1;
+- if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+- return 1;
+- if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+- return 1;
+- break;
+- }
+- return 0;
+-}
+-
+ static const struct rhashtable_params nfsd_file_rhash_params = {
+ .key_len = sizeof_field(struct nfsd_file, nf_inode),
+ .key_offset = offsetof(struct nfsd_file, nf_inode),
+- .head_offset = offsetof(struct nfsd_file, nf_rhash),
+- .hashfn = nfsd_file_key_hashfn,
+- .obj_hashfn = nfsd_file_obj_hashfn,
+- .obj_cmpfn = nfsd_file_obj_cmpfn,
+- /* Reduce resizing churn on light workloads */
+- .min_size = 512, /* buckets */
++ .head_offset = offsetof(struct nfsd_file, nf_rlist),
++
++ /*
++ * Start with a single page hash table to reduce resizing churn
++ * on light workloads.
++ */
++ .min_size = 256,
+ .automatic_shrinking = true,
+ };
+
+@@ -308,27 +208,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
+ }
+
+ static struct nfsd_file *
+-nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
++nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
++ bool want_gc)
+ {
+ struct nfsd_file *nf;
+
+ nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+- if (nf) {
+- INIT_LIST_HEAD(&nf->nf_lru);
+- nf->nf_birthtime = ktime_get();
+- nf->nf_file = NULL;
+- nf->nf_cred = get_current_cred();
+- nf->nf_net = key->net;
+- nf->nf_flags = 0;
+- __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+- __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+- if (key->gc)
+- __set_bit(NFSD_FILE_GC, &nf->nf_flags);
+- nf->nf_inode = key->inode;
+- refcount_set(&nf->nf_ref, 1);
+- nf->nf_may = key->need;
+- nf->nf_mark = NULL;
+- }
++ if (unlikely(!nf))
++ return NULL;
++
++ INIT_LIST_HEAD(&nf->nf_lru);
++ nf->nf_birthtime = ktime_get();
++ nf->nf_file = NULL;
++ nf->nf_cred = get_current_cred();
++ nf->nf_net = net;
++ nf->nf_flags = want_gc ?
++ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
++ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
++ nf->nf_inode = inode;
++ refcount_set(&nf->nf_ref, 1);
++ nf->nf_may = need;
++ nf->nf_mark = NULL;
+ return nf;
+ }
+
+@@ -353,8 +253,8 @@ static void
+ nfsd_file_hash_remove(struct nfsd_file *nf)
+ {
+ trace_nfsd_file_unhash(nf);
+- rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
+- nfsd_file_rhash_params);
++ rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
++ nfsd_file_rhash_params);
+ }
+
+ static bool
+@@ -687,8 +587,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ * @inode: inode on which to close out nfsd_files
+ * @dispose: list on which to gather nfsd_files to close out
+ *
+- * An nfsd_file represents a struct file being held open on behalf of nfsd. An
+- * open file however can block other activity (such as leases), or cause
++ * An nfsd_file represents a struct file being held open on behalf of nfsd.
++ * An open file however can block other activity (such as leases), or cause
+ * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
+ *
+ * This function is intended to find open nfsd_files when this sort of
+@@ -701,21 +601,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ static void
+ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+ {
+- struct nfsd_file_lookup_key key = {
+- .type = NFSD_FILE_KEY_INODE,
+- .inode = inode,
+- .gc = true,
+- };
++ struct rhlist_head *tmp, *list;
+ struct nfsd_file *nf;
+
+ rcu_read_lock();
+- do {
+- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+- nfsd_file_rhash_params);
+- if (!nf)
+- break;
++ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++ nfsd_file_rhash_params);
++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
++ if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
++ continue;
+ nfsd_file_cond_queue(nf, dispose);
+- } while (1);
++ }
+ rcu_read_unlock();
+ }
+
+@@ -839,7 +735,7 @@ nfsd_file_cache_init(void)
+ if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+ return 0;
+
+- ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
++ ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
+ if (ret)
+ return ret;
+
+@@ -907,7 +803,7 @@ nfsd_file_cache_init(void)
+ nfsd_file_mark_slab = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+- rhashtable_destroy(&nfsd_file_rhash_tbl);
++ rhltable_destroy(&nfsd_file_rhltable);
+ goto out;
+ }
+
+@@ -926,7 +822,7 @@ __nfsd_file_cache_purge(struct net *net)
+ struct nfsd_file *nf;
+ LIST_HEAD(dispose);
+
+- rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
++ rhltable_walk_enter(&nfsd_file_rhltable, &iter);
+ do {
+ rhashtable_walk_start(&iter);
+
+@@ -1032,7 +928,7 @@ nfsd_file_cache_shutdown(void)
+ nfsd_file_mark_slab = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
+- rhashtable_destroy(&nfsd_file_rhash_tbl);
++ rhltable_destroy(&nfsd_file_rhltable);
+
+ for_each_possible_cpu(i) {
+ per_cpu(nfsd_file_cache_hits, i) = 0;
+@@ -1043,6 +939,35 @@ nfsd_file_cache_shutdown(void)
+ }
+ }
+
++static struct nfsd_file *
++nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
++ struct inode *inode, unsigned char need,
++ bool want_gc)
++{
++ struct rhlist_head *tmp, *list;
++ struct nfsd_file *nf;
++
++ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++ nfsd_file_rhash_params);
++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
++ if (nf->nf_may != need)
++ continue;
++ if (nf->nf_net != net)
++ continue;
++ if (!nfsd_match_cred(nf->nf_cred, cred))
++ continue;
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
++ continue;
++ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
++ continue;
++
++ if (!nfsd_file_get(nf))
++ continue;
++ return nf;
++ }
++ return NULL;
++}
++
+ /**
+ * nfsd_file_is_cached - are there any cached open files for this inode?
+ * @inode: inode to check
+@@ -1057,16 +982,20 @@ nfsd_file_cache_shutdown(void)
+ bool
+ nfsd_file_is_cached(struct inode *inode)
+ {
+- struct nfsd_file_lookup_key key = {
+- .type = NFSD_FILE_KEY_INODE,
+- .inode = inode,
+- .gc = true,
+- };
++ struct rhlist_head *tmp, *list;
++ struct nfsd_file *nf;
+ bool ret = false;
+
+- if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
+- nfsd_file_rhash_params) != NULL)
+- ret = true;
++ rcu_read_lock();
++ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
++ nfsd_file_rhash_params);
++ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
++ ret = true;
++ break;
++ }
++ rcu_read_unlock();
++
+ trace_nfsd_file_is_cached(inode, (int)ret);
+ return ret;
+ }
+@@ -1076,14 +1005,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf, bool want_gc)
+ {
+- struct nfsd_file_lookup_key key = {
+- .type = NFSD_FILE_KEY_FULL,
+- .need = may_flags & NFSD_FILE_MAY_MASK,
+- .net = SVC_NET(rqstp),
+- .gc = want_gc,
+- };
++ unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
++ struct net *net = SVC_NET(rqstp);
++ struct nfsd_file *new, *nf;
++ const struct cred *cred;
+ bool open_retry = true;
+- struct nfsd_file *nf;
++ struct inode *inode;
+ __be32 status;
+ int ret;
+
+@@ -1091,14 +1018,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ if (status != nfs_ok)
+ return status;
+- key.inode = d_inode(fhp->fh_dentry);
+- key.cred = get_current_cred();
++ inode = d_inode(fhp->fh_dentry);
++ cred = get_current_cred();
+
+ retry:
+ rcu_read_lock();
+- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+- nfsd_file_rhash_params);
+- nf = nfsd_file_get(nf);
++ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
+ rcu_read_unlock();
+
+ if (nf) {
+@@ -1112,21 +1037,32 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto wait_for_construction;
+ }
+
+- nf = nfsd_file_alloc(&key, may_flags);
+- if (!nf) {
++ new = nfsd_file_alloc(net, inode, need, want_gc);
++ if (!new) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+
+- ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
+- &key, &nf->nf_rhash,
+- nfsd_file_rhash_params);
++ rcu_read_lock();
++ spin_lock(&inode->i_lock);
++ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
++ if (unlikely(nf)) {
++ spin_unlock(&inode->i_lock);
++ rcu_read_unlock();
++ nfsd_file_slab_free(&new->nf_rcu);
++ goto wait_for_construction;
++ }
++ nf = new;
++ ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
++ nfsd_file_rhash_params);
++ spin_unlock(&inode->i_lock);
++ rcu_read_unlock();
+ if (likely(ret == 0))
+ goto open_file;
+
+ if (ret == -EEXIST)
+ goto retry;
+- trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
++ trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
+ status = nfserr_jukebox;
+ goto construction_err;
+
+@@ -1135,7 +1071,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ /* Did construction of this file fail? */
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+- trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
++ trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
+ if (!open_retry) {
+ status = nfserr_jukebox;
+ goto construction_err;
+@@ -1157,13 +1093,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ nfsd_file_check_write_error(nf);
+ *pnf = nf;
+ }
+- put_cred(key.cred);
+- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
++ put_cred(cred);
++ trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
+ return status;
+
+ open_file:
+ trace_nfsd_file_alloc(nf);
+- nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
++ nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
+ if (nf->nf_mark) {
+ if (file) {
+ get_file(file);
+@@ -1181,7 +1117,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * If construction failed, or we raced with a call to unlink()
+ * then unhash.
+ */
+- if (status == nfs_ok && key.inode->i_nlink == 0)
++ if (status != nfs_ok || inode->i_nlink == 0)
+ status = nfserr_jukebox;
+ if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+@@ -1208,8 +1144,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * seconds after the final nfsd_file_put() in case the caller
+ * wants to re-use it.
+ *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ * %nfs_ok - @pnf points to an nfsd_file with its reference
++ * count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+ */
+ __be32
+ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1229,8 +1168,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put().
+ *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ * %nfs_ok - @pnf points to an nfsd_file with its reference
++ * count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+ */
+ __be32
+ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1251,8 +1193,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+ * opening a new one.
+ *
+- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+- * network byte order is returned.
++ * Return values:
++ * %nfs_ok - @pnf points to an nfsd_file with its reference
++ * count boosted.
++ *
++ * On error, an nfsstat value in network byte order is returned.
+ */
+ __be32
+ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -1283,7 +1228,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ lru = list_lru_count(&nfsd_file_lru);
+
+ rcu_read_lock();
+- ht = &nfsd_file_rhash_tbl;
++ ht = &nfsd_file_rhltable.ht;
+ count = atomic_read(&ht->nelems);
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ buckets = tbl->size;
+@@ -1299,7 +1244,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ evictions += per_cpu(nfsd_file_evictions, i);
+ }
+
+- seq_printf(m, "total entries: %u\n", count);
++ seq_printf(m, "total inodes: %u\n", count);
+ seq_printf(m, "hash buckets: %u\n", buckets);
+ seq_printf(m, "lru entries: %lu\n", lru);
+ seq_printf(m, "cache hits: %lu\n", hits);
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 41516a4263ea5..e54165a3224f0 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -29,9 +29,8 @@ struct nfsd_file_mark {
+ * never be dereferenced, only used for comparison.
+ */
+ struct nfsd_file {
+- struct rhash_head nf_rhash;
+- struct list_head nf_lru;
+- struct rcu_head nf_rcu;
++ struct rhlist_head nf_rlist;
++ void *nf_inode;
+ struct file *nf_file;
+ const struct cred *nf_cred;
+ struct net *nf_net;
+@@ -40,10 +39,12 @@ struct nfsd_file {
+ #define NFSD_FILE_REFERENCED (2)
+ #define NFSD_FILE_GC (3)
+ unsigned long nf_flags;
+- struct inode *nf_inode; /* don't deref */
+ refcount_t nf_ref;
+ unsigned char nf_may;
++
+ struct nfsd_file_mark *nf_mark;
++ struct list_head nf_lru;
++ struct rcu_head nf_rcu;
+ ktime_t nf_birthtime;
+ };
+
+--
+2.43.0
+
--- /dev/null
+From fc683b8c7bcfaf948316ee4fbc9aea7610032173 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:30 -0400
+Subject: NFSD: Convert the filecache to use rhashtable
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit ce502f81ba884c1fe45dc0ebddbcaaa4ec0fc5fb ]
+
+Enable the filecache hash table to start small, then grow with the
+workload. Smaller server deployments benefit because there should
+be lower memory utilization. Larger server deployments should see
+improved scaling with the number of open files.
+
+Suggested-by: Jeff Layton <jlayton@kernel.org>
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 265 +++++++++++++++++++-------------------------
+ fs/nfsd/trace.h | 63 ++++++++++-
+ 2 files changed, 179 insertions(+), 149 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 95e7e15b567e2..45dd4f3fa0905 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -61,7 +61,6 @@ static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
+ static struct list_lru nfsd_file_lru;
+ static unsigned long nfsd_file_flags;
+ static struct fsnotify_group *nfsd_file_fsnotify_group;
+-static atomic_long_t nfsd_filecache_count;
+ static struct delayed_work nfsd_filecache_laundrette;
+ static struct rhashtable nfsd_file_rhash_tbl
+ ____cacheline_aligned_in_smp;
+@@ -197,7 +196,7 @@ static const struct rhashtable_params nfsd_file_rhash_params = {
+ static void
+ nfsd_file_schedule_laundrette(void)
+ {
+- if ((atomic_long_read(&nfsd_filecache_count) == 0) ||
++ if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) ||
+ test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
+ return;
+
+@@ -297,7 +296,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+ }
+
+ static struct nfsd_file *
+-nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net)
++nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+ {
+ struct nfsd_file *nf;
+
+@@ -308,11 +307,14 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net)
+ nf->nf_birthtime = ktime_get();
+ nf->nf_file = NULL;
+ nf->nf_cred = get_current_cred();
+- nf->nf_net = net;
++ nf->nf_net = key->net;
+ nf->nf_flags = 0;
+- nf->nf_inode = inode;
+- refcount_set(&nf->nf_ref, 1);
+- nf->nf_may = may & NFSD_FILE_MAY_MASK;
++ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
++ __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
++ nf->nf_inode = key->inode;
++ /* nf_ref is pre-incremented for hash table */
++ refcount_set(&nf->nf_ref, 2);
++ nf->nf_may = key->need;
+ nf->nf_mark = NULL;
+ trace_nfsd_file_alloc(nf);
+ }
+@@ -398,40 +400,21 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf)
+ }
+
+ static void
+-nfsd_file_do_unhash(struct nfsd_file *nf)
++nfsd_file_hash_remove(struct nfsd_file *nf)
+ {
+- struct inode *inode = nf->nf_inode;
+- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+- NFSD_FILE_HASH_BITS);
+-
+- lockdep_assert_held(&nfsd_file_hashtbl[hashval].nfb_lock);
+-
+ trace_nfsd_file_unhash(nf);
+
+ if (nfsd_file_check_write_error(nf))
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+- --nfsd_file_hashtbl[hashval].nfb_count;
+- hlist_del_rcu(&nf->nf_node);
+- atomic_long_dec(&nfsd_filecache_count);
+-}
+-
+-static void
+-nfsd_file_hash_remove(struct nfsd_file *nf)
+-{
+- struct inode *inode = nf->nf_inode;
+- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+- NFSD_FILE_HASH_BITS);
+-
+- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+- nfsd_file_do_unhash(nf);
+- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
++ rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
++ nfsd_file_rhash_params);
+ }
+
+ static bool
+ nfsd_file_unhash(struct nfsd_file *nf)
+ {
+ if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+- nfsd_file_do_unhash(nf);
++ nfsd_file_hash_remove(nf);
+ return true;
+ }
+ return false;
+@@ -441,9 +424,9 @@ nfsd_file_unhash(struct nfsd_file *nf)
+ * Return true if the file was unhashed.
+ */
+ static bool
+-nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
++nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
+ {
+- trace_nfsd_file_unhash_and_release_locked(nf);
++ trace_nfsd_file_unhash_and_dispose(nf);
+ if (!nfsd_file_unhash(nf))
+ return false;
+ /* keep final reference for nfsd_file_lru_dispose */
+@@ -702,20 +685,23 @@ static struct shrinker nfsd_file_shrinker = {
+ static unsigned int
+ __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
+ {
+- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+- NFSD_FILE_HASH_BITS);
+- unsigned int count = 0;
+- struct nfsd_file *nf;
+- struct hlist_node *tmp;
+-
+- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+- hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+- if (inode == nf->nf_inode) {
+- nfsd_file_unhash_and_release_locked(nf, dispose);
+- count++;
+- }
+- }
+- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
++ struct nfsd_file_lookup_key key = {
++ .type = NFSD_FILE_KEY_INODE,
++ .inode = inode,
++ };
++ unsigned int count = 0;
++ struct nfsd_file *nf;
++
++ rcu_read_lock();
++ do {
++ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
++ nfsd_file_rhash_params);
++ if (!nf)
++ break;
++ nfsd_file_unhash_and_dispose(nf, dispose);
++ count++;
++ } while (1);
++ rcu_read_unlock();
+ return count;
+ }
+
+@@ -923,30 +909,35 @@ nfsd_file_cache_init(void)
+ static void
+ __nfsd_file_cache_purge(struct net *net)
+ {
+- unsigned int i;
+- struct nfsd_file *nf;
+- struct hlist_node *next;
++ struct rhashtable_iter iter;
++ struct nfsd_file *nf;
+ LIST_HEAD(dispose);
+ bool del;
+
+- for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+- struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
++ rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
++ do {
++ rhashtable_walk_start(&iter);
+
+- spin_lock(&nfb->nfb_lock);
+- hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
++ nf = rhashtable_walk_next(&iter);
++ while (!IS_ERR_OR_NULL(nf)) {
+ if (net && nf->nf_net != net)
+ continue;
+- del = nfsd_file_unhash_and_release_locked(nf, &dispose);
++ del = nfsd_file_unhash_and_dispose(nf, &dispose);
+
+ /*
+ * Deadlock detected! Something marked this entry as
+ * unhased, but hasn't removed it from the hash list.
+ */
+ WARN_ON_ONCE(!del);
++
++ nf = rhashtable_walk_next(&iter);
+ }
+- spin_unlock(&nfb->nfb_lock);
+- nfsd_file_dispose_list(&dispose);
+- }
++
++ rhashtable_walk_stop(&iter);
++ } while (nf == ERR_PTR(-EAGAIN));
++ rhashtable_walk_exit(&iter);
++
++ nfsd_file_dispose_list(&dispose);
+ }
+
+ static struct nfsd_fcache_disposal *
+@@ -1051,56 +1042,29 @@ nfsd_file_cache_shutdown(void)
+ }
+ }
+
+-static struct nfsd_file *
+-nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+- unsigned int hashval, struct net *net)
+-{
+- struct nfsd_file *nf;
+- unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+-
+- hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+- nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) {
+- if (nf->nf_may != need)
+- continue;
+- if (nf->nf_inode != inode)
+- continue;
+- if (nf->nf_net != net)
+- continue;
+- if (!nfsd_match_cred(nf->nf_cred, current_cred()))
+- continue;
+- if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+- continue;
+- if (nfsd_file_get(nf) != NULL)
+- return nf;
+- }
+- return NULL;
+-}
+-
+ /**
+- * nfsd_file_is_cached - are there any cached open files for this fh?
+- * @inode: inode of the file to check
++ * nfsd_file_is_cached - are there any cached open files for this inode?
++ * @inode: inode to check
++ *
++ * The lookup matches inodes in all net namespaces and is atomic wrt
++ * nfsd_file_acquire().
+ *
+- * Scan the hashtable for open files that match this fh. Returns true if there
+- * are any, and false if not.
++ * Return values:
++ * %true: filecache contains at least one file matching this inode
++ * %false: filecache contains no files matching this inode
+ */
+ bool
+ nfsd_file_is_cached(struct inode *inode)
+ {
+- bool ret = false;
+- struct nfsd_file *nf;
+- unsigned int hashval;
+-
+- hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+-
+- rcu_read_lock();
+- hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+- nf_node) {
+- if (inode == nf->nf_inode) {
+- ret = true;
+- break;
+- }
+- }
+- rcu_read_unlock();
++ struct nfsd_file_lookup_key key = {
++ .type = NFSD_FILE_KEY_INODE,
++ .inode = inode,
++ };
++ bool ret = false;
++
++ if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
++ nfsd_file_rhash_params) != NULL)
++ ret = true;
+ trace_nfsd_file_is_cached(inode, (int)ret);
+ return ret;
+ }
+@@ -1109,39 +1073,51 @@ static __be32
+ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf, bool open)
+ {
+- __be32 status;
+- struct net *net = SVC_NET(rqstp);
++ struct nfsd_file_lookup_key key = {
++ .type = NFSD_FILE_KEY_FULL,
++ .need = may_flags & NFSD_FILE_MAY_MASK,
++ .net = SVC_NET(rqstp),
++ };
+ struct nfsd_file *nf, *new;
+- struct inode *inode;
+- unsigned int hashval;
+ bool retry = true;
++ __be32 status;
+
+- /* FIXME: skip this if fh_dentry is already set? */
+ status = fh_verify(rqstp, fhp, S_IFREG,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ if (status != nfs_ok)
+ return status;
++ key.inode = d_inode(fhp->fh_dentry);
++ key.cred = get_current_cred();
+
+- inode = d_inode(fhp->fh_dentry);
+- hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
+ retry:
+- rcu_read_lock();
+- nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+- rcu_read_unlock();
++ /* Avoid allocation if the item is already in cache */
++ nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
++ nfsd_file_rhash_params);
++ if (nf)
++ nf = nfsd_file_get(nf);
+ if (nf)
+ goto wait_for_construction;
+
+- new = nfsd_file_alloc(inode, may_flags, net);
++ new = nfsd_file_alloc(&key, may_flags);
+ if (!new) {
+ status = nfserr_jukebox;
+ goto out_status;
+ }
+
+- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+- nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
+- if (nf == NULL)
++ nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl,
++ &key, &new->nf_rhash,
++ nfsd_file_rhash_params);
++ if (!nf) {
++ nf = new;
++ goto open_file;
++ }
++ if (IS_ERR(nf))
++ goto insert_err;
++ nf = nfsd_file_get(nf);
++ if (nf == NULL) {
++ nf = new;
+ goto open_file;
+- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
++ }
+ nfsd_file_slab_free(&new->nf_rcu);
+
+ wait_for_construction:
+@@ -1149,6 +1125,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ /* Did construction of this file fail? */
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
++ trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
+ if (!retry) {
+ status = nfserr_jukebox;
+ goto out;
+@@ -1173,22 +1150,11 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+
+ out_status:
+- trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
++ put_cred(key.cred);
++ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ return status;
+
+ open_file:
+- nf = new;
+- /* Take reference for the hashtable */
+- refcount_inc(&nf->nf_ref);
+- __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+- __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+- hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+- ++nfsd_file_hashtbl[hashval].nfb_count;
+- nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+- nfsd_file_hashtbl[hashval].nfb_count);
+- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+- atomic_long_inc(&nfsd_filecache_count);
+-
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+ if (nf->nf_mark) {
+ if (open) {
+@@ -1203,19 +1169,20 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * If construction failed, or we raced with a call to unlink()
+ * then unhash.
+ */
+- if (status != nfs_ok || inode->i_nlink == 0) {
+- bool do_free;
+- nfsd_file_lru_remove(nf);
+- spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+- do_free = nfsd_file_unhash(nf);
+- spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+- if (do_free)
++ if (status != nfs_ok || key.inode->i_nlink == 0)
++ if (nfsd_file_unhash(nf))
+ nfsd_file_put_noref(nf);
+- }
+ clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+ goto out;
++
++insert_err:
++ nfsd_file_slab_free(&new->nf_rcu);
++ trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf));
++ nf = NULL;
++ status = nfserr_jukebox;
++ goto out_status;
+ }
+
+ /**
+@@ -1261,21 +1228,23 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+ unsigned long releases = 0, pages_flushed = 0, evictions = 0;
+ unsigned long hits = 0, acquisitions = 0;
+- unsigned int i, count = 0, longest = 0;
++ unsigned int i, count = 0, buckets = 0;
+ unsigned long lru = 0, total_age = 0;
+
+- /*
+- * No need for spinlocks here since we're not terribly interested in
+- * accuracy. We do take the nfsd_mutex simply to ensure that we
+- * don't end up racing with server shutdown
+- */
++ /* Serialize with server shutdown */
+ mutex_lock(&nfsd_mutex);
+ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) {
+- for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+- count += nfsd_file_hashtbl[i].nfb_count;
+- longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+- }
++ struct bucket_table *tbl;
++ struct rhashtable *ht;
++
+ lru = list_lru_count(&nfsd_file_lru);
++
++ rcu_read_lock();
++ ht = &nfsd_file_rhash_tbl;
++ count = atomic_read(&ht->nelems);
++ tbl = rht_dereference_rcu(ht->tbl, ht);
++ buckets = tbl->size;
++ rcu_read_unlock();
+ }
+ mutex_unlock(&nfsd_mutex);
+
+@@ -1289,7 +1258,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ }
+
+ seq_printf(m, "total entries: %u\n", count);
+- seq_printf(m, "longest chain: %u\n", longest);
++ seq_printf(m, "hash buckets: %u\n", buckets);
+ seq_printf(m, "lru entries: %lu\n", lru);
+ seq_printf(m, "cache hits: %lu\n", hits);
+ seq_printf(m, "acquisitions: %lu\n", acquisitions);
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index e82ea1abfbd46..f94db18777ad7 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -777,7 +777,7 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+-DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
++DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose);
+
+ TRACE_EVENT(nfsd_file_acquire,
+ TP_PROTO(
+@@ -820,6 +820,67 @@ TRACE_EVENT(nfsd_file_acquire,
+ __entry->nf_file, __entry->status)
+ );
+
++TRACE_EVENT(nfsd_file_insert_err,
++ TP_PROTO(
++ const struct svc_rqst *rqstp,
++ const struct inode *inode,
++ unsigned int may_flags,
++ long error
++ ),
++ TP_ARGS(rqstp, inode, may_flags, error),
++ TP_STRUCT__entry(
++ __field(u32, xid)
++ __field(const void *, inode)
++ __field(unsigned long, may_flags)
++ __field(long, error)
++ ),
++ TP_fast_assign(
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ __entry->inode = inode;
++ __entry->may_flags = may_flags;
++ __entry->error = error;
++ ),
++ TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld",
++ __entry->xid, __entry->inode,
++ show_nfsd_may_flags(__entry->may_flags),
++ __entry->error
++ )
++);
++
++TRACE_EVENT(nfsd_file_cons_err,
++ TP_PROTO(
++ const struct svc_rqst *rqstp,
++ const struct inode *inode,
++ unsigned int may_flags,
++ const struct nfsd_file *nf
++ ),
++ TP_ARGS(rqstp, inode, may_flags, nf),
++ TP_STRUCT__entry(
++ __field(u32, xid)
++ __field(const void *, inode)
++ __field(unsigned long, may_flags)
++ __field(unsigned int, nf_ref)
++ __field(unsigned long, nf_flags)
++ __field(unsigned long, nf_may)
++ __field(const void *, nf_file)
++ ),
++ TP_fast_assign(
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ __entry->inode = inode;
++ __entry->may_flags = may_flags;
++ __entry->nf_ref = refcount_read(&nf->nf_ref);
++ __entry->nf_flags = nf->nf_flags;
++ __entry->nf_may = nf->nf_may;
++ __entry->nf_file = nf->nf_file;
++ ),
++ TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
++ __entry->xid, __entry->inode,
++ show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref,
++ show_nf_flags(__entry->nf_flags),
++ show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
++ )
++);
++
+ TRACE_EVENT(nfsd_file_open,
+ TP_PROTO(struct nfsd_file *nf, __be32 status),
+ TP_ARGS(nf, status),
+--
+2.43.0
+
--- /dev/null
+From aa4f75e9d5dc4c68243b17cc27fdb98df07f277e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2023 10:07:59 -0500
+Subject: NFSD: copy the whole verifier in nfsd_copy_write_verifier
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 90d2175572470ba7f55da8447c72ddd4942923c4 ]
+
+Currently, we're only memcpy'ing the first __be32. Ensure we copy into
+both words.
+
+Fixes: 91d2e9b56cf5 ("NFSD: Clean up the nfsd_net::nfssvc_boot field")
+Reported-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 325d3d3f12110..a0ecec54d3d7d 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -363,7 +363,7 @@ void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
+
+ do {
+ read_seqbegin_or_lock(&nn->writeverf_lock, &seq);
+- memcpy(verf, nn->writeverf, sizeof(*verf));
++ memcpy(verf, nn->writeverf, sizeof(nn->writeverf));
+ } while (need_seqretry(&nn->writeverf_lock, seq));
+ done_seqretry(&nn->writeverf_lock, seq);
+ }
+--
+2.43.0
+
--- /dev/null
+From 28f46ae2744fa654deec1c8f9c7e6ea0acc933dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 12:41:32 -0500
+Subject: NFSD: De-duplicate net_generic(SVC_NET(rqstp), nfsd_net_id)
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit fb7622c2dbd1aa41133a8c73e1137b833c074519 ]
+
+Since this pointer is used repeatedly, move it to a stack variable.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 98d370dcca867..17985d868887a 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -966,6 +966,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ unsigned long *cnt, int stable,
+ __be32 *verf)
+ {
++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct file *file = nf->nf_file;
+ struct super_block *sb = file_inode(file)->i_sb;
+ struct svc_export *exp;
+@@ -1010,13 +1011,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+ since = READ_ONCE(file->f_wb_err);
+ if (verf)
+- nfsd_copy_boot_verifier(verf,
+- net_generic(SVC_NET(rqstp),
+- nfsd_net_id));
++ nfsd_copy_boot_verifier(verf, nn);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0) {
+- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+- nfsd_net_id));
++ nfsd_reset_boot_verifier(nn);
+ goto out_nfserr;
+ }
+ *cnt = host_err;
+@@ -1029,8 +1027,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ if (stable && use_wgather) {
+ host_err = wait_for_concurrent_writes(file);
+ if (host_err < 0)
+- nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+- nfsd_net_id));
++ nfsd_reset_boot_verifier(nn);
+ }
+
+ out_nfserr:
+--
+2.43.0
+
--- /dev/null
+From 1e08cf62dca3a4e9ced1e2a347cfec60d61e35fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Dec 2021 10:20:45 -0500
+Subject: NFSD: De-duplicate nfsd4_decode_bitmap4()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit cd2e999c7c394ae916d8be741418b3c6c1dddea8 ]
+
+Clean up. Trond points out that xdr_stream_decode_uint32_array()
+does the same thing as nfsd4_decode_bitmap4().
+
+Suggested-by: Trond Myklebust <trondmy@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 17 +++--------------
+ 1 file changed, 3 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 506ecfca2338b..4459722259fb2 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -277,21 +277,10 @@ nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf)
+ static __be32
+ nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
+ {
+- u32 i, count;
+- __be32 *p;
+-
+- if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+- return nfserr_bad_xdr;
+- /* request sanity */
+- if (count > 1000)
+- return nfserr_bad_xdr;
+- p = xdr_inline_decode(argp->xdr, count << 2);
+- if (!p)
+- return nfserr_bad_xdr;
+- for (i = 0; i < bmlen; i++)
+- bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
++ ssize_t status;
+
+- return nfs_ok;
++ status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen);
++ return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok;
+ }
+
+ static __be32
+--
+2.43.0
+
--- /dev/null
+From bcc87299d253c95b03ffa6c6d438849941da8250 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Jul 2022 14:46:04 -0400
+Subject: NFSD: Decode NFSv4 birth time attribute
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5b2f3e0777da2a5dd62824bbe2fdab1d12caaf8f ]
+
+NFSD has advertised support for the NFSv4 time_create attribute
+since commit e377a3e698fb ("nfsd: Add support for the birth time
+attribute").
+
+Igor Mammedov reports that Mac OS clients attempt to set the NFSv4
+birth time attribute via OPEN(CREATE) and SETATTR if the server
+indicates that it supports it, but since the above commit was
+merged, those attempts now fail.
+
+Table 5 in RFC 8881 lists the time_create attribute as one that can
+be both set and retrieved, but the above commit did not add server
+support for clients to provide a time_create attribute. IMO that's
+a bug in our implementation of the NFSv4 protocol, which this commit
+addresses.
+
+Whether NFSD silently ignores the new birth time or actually sets it
+is another matter. I haven't found another filesystem service in the
+Linux kernel that enables users or clients to modify a file's birth
+time attribute.
+
+This commit reflects my (perhaps incorrect) understanding of whether
+Linux users can set a file's birth time. NFSD will now recognize a
+time_create attribute but it ignores its value. It clears the
+time_create bit in the returned attribute bitmask to indicate that
+the value was not used.
+
+Reported-by: Igor Mammedov <imammedo@redhat.com>
+Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute")
+Tested-by: Igor Mammedov <imammedo@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 9 +++++++++
+ fs/nfsd/nfsd.h | 3 ++-
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 96d41b1cc2d17..07f891d7fa0ae 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -470,6 +470,15 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
+ return nfserr_bad_xdr;
+ }
+ }
++ if (bmval[1] & FATTR4_WORD1_TIME_CREATE) {
++ struct timespec64 ts;
++
++ /* No Linux filesystem supports setting this attribute. */
++ bmval[1] &= ~FATTR4_WORD1_TIME_CREATE;
++ status = nfsd4_decode_nfstime4(argp, &ts);
++ if (status)
++ return status;
++ }
+ if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+ u32 set_it;
+
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 847b482155ae9..9a8b09afc1733 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -465,7 +465,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
+ (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
+ #define NFSD_WRITEABLE_ATTRS_WORD1 \
+ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
++ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \
++ | FATTR4_WORD1_TIME_MODIFY_SET)
+ #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+ #define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
+ FATTR4_WORD2_SECURITY_LABEL
+--
+2.43.0
+
--- /dev/null
+From 6b1af4bddedbd7e133775c9f6baceb42eb58b4d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:23:45 -0400
+Subject: NFSD: Demote a WARN to a pr_warn()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit ca3f9acb6d3faf78da2b63324f7c737dbddf7f69 ]
+
+The call trace doesn't add much value, but it sure is noisy.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index d70c4e78f0b3f..15991eb9b8d8c 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -630,9 +630,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ }
+
+ status = nfsd4_process_open2(rqstp, resfh, open);
+- WARN(status && open->op_created,
+- "nfsd4_process_open2 failed to open newly-created file! status=%u\n",
+- be32_to_cpu(status));
++ if (status && open->op_created)
++ pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n",
++ be32_to_cpu(status));
+ if (reclaim && !status)
+ nn->somebody_reclaimed = true;
+ out:
+--
+2.43.0
+
--- /dev/null
+From 6932d8b910fb0b1158ef129b93efc202b6b175f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jan 2022 15:57:45 -0500
+Subject: NFSD: Deprecate NFS_OFFSET_MAX
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c306d737691ef84305d4ed0d302c63db2932f0bb ]
+
+NFS_OFFSET_MAX was introduced way back in Linux v2.3.y before there
+was a kernel-wide OFFSET_MAX value. As a clean up, replace the last
+few uses of it with its generic equivalent, and get rid of it.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3xdr.c | 2 +-
+ fs/nfsd/nfs4xdr.c | 2 +-
+ include/linux/nfs.h | 8 --------
+ 3 files changed, 2 insertions(+), 10 deletions(-)
+
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 2e47a07029f1d..0293b8d65f10f 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -1060,7 +1060,7 @@ svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name,
+ return false;
+ /* cookie */
+ resp->cookie_offset = dirlist->len;
+- if (xdr_stream_encode_u64(xdr, NFS_OFFSET_MAX) < 0)
++ if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0)
+ return false;
+
+ return true;
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 4459722259fb2..19ddd80239944 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3492,7 +3492,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
+ p = xdr_reserve_space(xdr, 3*4 + namlen);
+ if (!p)
+ goto fail;
+- p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
++ p = xdr_encode_hyper(p, OFFSET_MAX); /* offset of next entry */
+ p = xdr_encode_array(p, name, namlen); /* name length & name */
+
+ nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
+diff --git a/include/linux/nfs.h b/include/linux/nfs.h
+index 0dc7ad38a0da4..b06375e88e589 100644
+--- a/include/linux/nfs.h
++++ b/include/linux/nfs.h
+@@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc
+ memcpy(target->data, source->data, source->size);
+ }
+
+-
+-/*
+- * This is really a general kernel constant, but since nothing like
+- * this is defined in the kernel headers, I have to do it here.
+- */
+-#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1))
+-
+-
+ enum nfs3_stable_how {
+ NFS_UNSTABLE = 0,
+ NFS_DATA_SYNC = 1,
+--
+2.43.0
+
--- /dev/null
+From b2eb4d3e8a1766d5d225d4e6785b92717733da7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: discard fh_locked flag and fh_lock/fh_unlock
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit dd8dd403d7b223cc77ee89d8d09caf045e90e648 ]
+
+As all inode locking is now fully balanced, fh_put() does not need to
+call fh_unlock().
+fh_lock() and fh_unlock() are no longer used, so discard them.
+These are the only real users of ->fh_locked, so discard that too.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsfh.c | 3 +--
+ fs/nfsd/nfsfh.h | 56 ++++---------------------------------------------
+ fs/nfsd/vfs.c | 17 +--------------
+ 3 files changed, 6 insertions(+), 70 deletions(-)
+
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index cd2946a88d727..a5b71526cee0f 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -549,7 +549,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
+ if (ref_fh == fhp)
+ fh_put(ref_fh);
+
+- if (fhp->fh_locked || fhp->fh_dentry) {
++ if (fhp->fh_dentry) {
+ printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
+ dentry);
+ }
+@@ -700,7 +700,6 @@ fh_put(struct svc_fh *fhp)
+ struct dentry * dentry = fhp->fh_dentry;
+ struct svc_export * exp = fhp->fh_export;
+ if (dentry) {
+- fh_unlock(fhp);
+ fhp->fh_dentry = NULL;
+ dput(dentry);
+ fh_clear_pre_post_attrs(fhp);
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index 28a4f9a94e2c8..c3ae6414fc5cf 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -81,7 +81,6 @@ typedef struct svc_fh {
+ struct dentry * fh_dentry; /* validated dentry */
+ struct svc_export * fh_export; /* export pointer */
+
+- bool fh_locked; /* inode locked by us */
+ bool fh_want_write; /* remount protection taken */
+ bool fh_no_wcc; /* no wcc data needed */
+ bool fh_no_atomic_attr;
+@@ -93,7 +92,7 @@ typedef struct svc_fh {
+ bool fh_post_saved; /* post-op attrs saved */
+ bool fh_pre_saved; /* pre-op attrs saved */
+
+- /* Pre-op attributes saved during fh_lock */
++ /* Pre-op attributes saved when inode is locked */
+ __u64 fh_pre_size; /* size before operation */
+ struct timespec64 fh_pre_mtime; /* mtime before oper */
+ struct timespec64 fh_pre_ctime; /* ctime before oper */
+@@ -103,7 +102,7 @@ typedef struct svc_fh {
+ */
+ u64 fh_pre_change;
+
+- /* Post-op attributes saved in fh_unlock */
++ /* Post-op attributes saved in fh_fill_post_attrs() */
+ struct kstat fh_post_attr; /* full attrs after operation */
+ u64 fh_post_change; /* nfsv4 change; see above */
+ } svc_fh;
+@@ -223,8 +222,8 @@ void fh_put(struct svc_fh *);
+ static __inline__ struct svc_fh *
+ fh_copy(struct svc_fh *dst, struct svc_fh *src)
+ {
+- WARN_ON(src->fh_dentry || src->fh_locked);
+-
++ WARN_ON(src->fh_dentry);
++
+ *dst = *src;
+ return dst;
+ }
+@@ -323,51 +322,4 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
+ extern void fh_fill_pre_attrs(struct svc_fh *fhp);
+ extern void fh_fill_post_attrs(struct svc_fh *fhp);
+ extern void fh_fill_both_attrs(struct svc_fh *fhp);
+-
+-/*
+- * Lock a file handle/inode
+- * NOTE: both fh_lock and fh_unlock are done "by hand" in
+- * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
+- * so, any changes here should be reflected there.
+- */
+-
+-static inline void
+-fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+-{
+- struct dentry *dentry = fhp->fh_dentry;
+- struct inode *inode;
+-
+- BUG_ON(!dentry);
+-
+- if (fhp->fh_locked) {
+- printk(KERN_WARNING "fh_lock: %pd2 already locked!\n",
+- dentry);
+- return;
+- }
+-
+- inode = d_inode(dentry);
+- inode_lock_nested(inode, subclass);
+- fh_fill_pre_attrs(fhp);
+- fhp->fh_locked = true;
+-}
+-
+-static inline void
+-fh_lock(struct svc_fh *fhp)
+-{
+- fh_lock_nested(fhp, I_MUTEX_NORMAL);
+-}
+-
+-/*
+- * Unlock a file handle/inode
+- */
+-static inline void
+-fh_unlock(struct svc_fh *fhp)
+-{
+- if (fhp->fh_locked) {
+- fh_fill_post_attrs(fhp);
+- inode_unlock(d_inode(fhp->fh_dentry));
+- fhp->fh_locked = false;
+- }
+-}
+-
+ #endif /* _LINUX_NFSD_NFSFH_H */
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 73a153be6a5ad..66d4a126f20ab 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1266,13 +1266,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ dirp = d_inode(dentry);
+
+ dchild = dget(resfhp->fh_dentry);
+- if (!fhp->fh_locked) {
+- WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
+- dentry);
+- err = nfserr_io;
+- goto out;
+- }
+-
+ err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
+ if (err)
+ goto out;
+@@ -1641,10 +1634,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+ goto out;
+ }
+
+- /* cannot use fh_lock as we need deadlock protective ordering
+- * so do it by hand */
+ trap = lock_rename(tdentry, fdentry);
+- ffhp->fh_locked = tfhp->fh_locked = true;
+ fh_fill_pre_attrs(ffhp);
+ fh_fill_pre_attrs(tfhp);
+
+@@ -1694,17 +1684,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+ dput(odentry);
+ out_nfserr:
+ err = nfserrno(host_err);
+- /*
+- * We cannot rely on fh_unlock on the two filehandles,
+- * as that would do the wrong thing if the two directories
+- * were the same, so again we do it by hand.
+- */
++
+ if (!close_cached) {
+ fh_fill_post_attrs(ffhp);
+ fh_fill_post_attrs(tfhp);
+ }
+ unlock_rename(tdentry, fdentry);
+- ffhp->fh_locked = tfhp->fh_locked = false;
+ fh_drop_write(ffhp);
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 761d7a30a7e838b3108865a78e7134a18b77ee54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 16:38:08 +1000
+Subject: nfsd: don't allow nfsd threads to be signalled.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 3903902401451b1cd9d797a8c79769eb26ac7fe5 ]
+
+The original implementation of nfsd used signals to stop threads during
+shutdown.
+In Linux 2.3.46pre5 nfsd gained the ability to shutdown threads
+internally it if was asked to run "0" threads. After this user-space
+transitioned to using "rpc.nfsd 0" to stop nfsd and sending signals to
+threads was no longer an important part of the API.
+
+In commit 3ebdbe5203a8 ("SUNRPC: discard svo_setup and rename
+svc_set_num_threads_sync()") (v5.17-rc1~75^2~41) we finally removed the
+use of signals for stopping threads, using kthread_stop() instead.
+
+This patch makes the "obvious" next step and removes the ability to
+signal nfsd threads - or any svc threads. nfsd stops allowing signals
+and we don't check for their delivery any more.
+
+This will allow for some simplification in later patches.
+
+A change worth noting is in nfsd4_ssc_setup_dul(). There was previously
+a signal_pending() check which would only succeed when the thread was
+being shut down. It should really have tested kthread_should_stop() as
+well. Now it just does the latter, not the former.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/callback.c | 9 +--------
+ fs/nfsd/nfs4proc.c | 5 ++---
+ fs/nfsd/nfssvc.c | 12 ------------
+ net/sunrpc/svc_xprt.c | 16 ++++++----------
+ 4 files changed, 9 insertions(+), 33 deletions(-)
+
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 456af7d230cf1..46a0a2d6962e1 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -80,9 +80,6 @@ nfs4_callback_svc(void *vrqstp)
+ set_freezable();
+
+ while (!kthread_freezable_should_stop(NULL)) {
+-
+- if (signal_pending(current))
+- flush_signals(current);
+ /*
+ * Listen for a request on the socket
+ */
+@@ -112,11 +109,7 @@ nfs41_callback_svc(void *vrqstp)
+ set_freezable();
+
+ while (!kthread_freezable_should_stop(NULL)) {
+-
+- if (signal_pending(current))
+- flush_signals(current);
+-
+- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
++ prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
+ spin_lock_bh(&serv->sv_cb_lock);
+ if (!list_empty(&serv->sv_cb_list)) {
+ req = list_first_entry(&serv->sv_cb_list,
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index ba53cd89ec62c..b6d768bd5ccca 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1313,12 +1313,11 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ /* found a match */
+ if (ni->nsui_busy) {
+ /* wait - and try again */
+- prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
+- TASK_INTERRUPTIBLE);
++ prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
+ spin_unlock(&nn->nfsd_ssc_lock);
+
+ /* allow 20secs for mount/unmount for now - revisit */
+- if (signal_pending(current) ||
++ if (kthread_should_stop() ||
+ (schedule_timeout(20*HZ) == 0)) {
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
+ kfree(work);
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index a0ecec54d3d7d..8063fab2c0279 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -952,15 +952,6 @@ nfsd(void *vrqstp)
+
+ current->fs->umask = 0;
+
+- /*
+- * thread is spawned with all signals set to SIG_IGN, re-enable
+- * the ones that will bring down the thread
+- */
+- allow_signal(SIGKILL);
+- allow_signal(SIGHUP);
+- allow_signal(SIGINT);
+- allow_signal(SIGQUIT);
+-
+ atomic_inc(&nfsdstats.th_cnt);
+
+ set_freezable();
+@@ -985,9 +976,6 @@ nfsd(void *vrqstp)
+ validate_process_creds();
+ }
+
+- /* Clear signals before calling svc_exit_thread() */
+- flush_signals(current);
+-
+ atomic_dec(&nfsdstats.th_cnt);
+
+ out:
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 67ccf1a6459ae..b19592673eef2 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -700,8 +700,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
+ /* Made progress, don't sleep yet */
+ continue;
+
+- set_current_state(TASK_INTERRUPTIBLE);
+- if (signalled() || kthread_should_stop()) {
++ set_current_state(TASK_IDLE);
++ if (kthread_should_stop()) {
+ set_current_state(TASK_RUNNING);
+ return -EINTR;
+ }
+@@ -736,7 +736,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
+ return false;
+
+ /* are we shutting down? */
+- if (signalled() || kthread_should_stop())
++ if (kthread_should_stop())
+ return false;
+
+ /* are we freezing? */
+@@ -758,11 +758,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+ if (rqstp->rq_xprt)
+ goto out_found;
+
+- /*
+- * We have to be able to interrupt this wait
+- * to bring down the daemons ...
+- */
+- set_current_state(TASK_INTERRUPTIBLE);
++ set_current_state(TASK_IDLE);
+ smp_mb__before_atomic();
+ clear_bit(SP_CONGESTED, &pool->sp_flags);
+ clear_bit(RQ_BUSY, &rqstp->rq_flags);
+@@ -784,7 +780,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+ if (!time_left)
+ atomic_long_inc(&pool->sp_stats.threads_timedout);
+
+- if (signalled() || kthread_should_stop())
++ if (kthread_should_stop())
+ return ERR_PTR(-EINTR);
+ return ERR_PTR(-EAGAIN);
+ out_found:
+@@ -882,7 +878,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
+ try_to_freeze();
+ cond_resched();
+ err = -EINTR;
+- if (signalled() || kthread_should_stop())
++ if (kthread_should_stop())
+ goto out;
+
+ xprt = svc_get_next_xprt(rqstp, timeout);
+--
+2.43.0
+
--- /dev/null
+From 4ec3d4e7a4e356bebf4469396a23a231c0652d9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Jan 2024 11:17:40 +1100
+Subject: nfsd: don't call locks_release_private() twice concurrently
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 05eda6e75773592760285e10ac86c56d683be17f ]
+
+It is possible for free_blocked_lock() to be called twice concurrently,
+once from nfsd4_lock() and once from nfsd4_release_lockowner() calling
+remove_blocked_locks(). This is why a kref was added.
+
+It is perfectly safe for locks_delete_block() and kref_put() to be
+called in parallel as they use locking or atomicity respectively as
+protection. However locks_release_private() has no locking. It is
+safe for it to be called twice sequentially, but not concurrently.
+
+This patch moves that call from free_blocked_lock() where it could race
+with itself, to free_nbl() where it cannot. This will slightly delay
+the freeing of private info or release of the owner - but not by much.
+It is arguably more natural for this freeing to happen in free_nbl()
+where the structure itself is freed.
+
+This bug was found by code inspection - it has not been seen in practice.
+
+Fixes: 47446d74f170 ("nfsd4: add refcount for nfsd4_blocked_lock")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 0f1ece95bd642..ccc235a8bc1b4 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -318,6 +318,7 @@ free_nbl(struct kref *kref)
+ struct nfsd4_blocked_lock *nbl;
+
+ nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
++ locks_release_private(&nbl->nbl_lock);
+ kfree(nbl);
+ }
+
+@@ -325,7 +326,6 @@ static void
+ free_blocked_lock(struct nfsd4_blocked_lock *nbl)
+ {
+ locks_delete_block(&nbl->nbl_lock);
+- locks_release_private(&nbl->nbl_lock);
+ kref_put(&nbl->nbl_kref, free_nbl);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 583fbaceb7714a601dee3ccae051a2a589c0bb60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 11 Feb 2023 07:50:08 -0500
+Subject: nfsd: don't destroy global nfs4_file table in per-net shutdown
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 4102db175b5d884d133270fdbd0e59111ce688fc ]
+
+The nfs4_file table is global, so shutting it down when a containerized
+nfsd is shut down is wrong and can lead to double-frees. Tear down the
+nfs4_file_rhltable in nfs4_state_shutdown instead of
+nfs4_state_shutdown_net.
+
+Fixes: d47b295e8d76 ("NFSD: Use rhashtable for managing nfs4_file objects")
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2169017
+Reported-by: JianHong Yin <jiyin@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 22799f5ce686e..5c261cc807e8e 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -8183,7 +8183,6 @@ nfs4_state_shutdown_net(struct net *net)
+
+ nfsd4_client_tracking_exit(net);
+ nfs4_state_destroy_net(net);
+- rhltable_destroy(&nfs4_file_rhltable);
+ #ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_shutdown_umount(nn);
+ #endif
+@@ -8193,6 +8192,7 @@ void
+ nfs4_state_shutdown(void)
+ {
+ nfsd4_destroy_callback_queue();
++ rhltable_destroy(&nfs4_file_rhltable);
+ }
+
+ static void
+--
+2.43.0
+
--- /dev/null
+From 4f796f4d3400f69f10ae04bc6c0cca9950cda936 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Jan 2023 14:52:14 -0500
+Subject: nfsd: don't free files unconditionally in __nfsd_file_cache_purge
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 4bdbba54e9b1c769da8ded9abd209d765715e1d6 ]
+
+nfsd_file_cache_purge is called when the server is shutting down, in
+which case, tearing things down is generally fine, but it also gets
+called when the exports cache is flushed.
+
+Instead of walking the cache and freeing everything unconditionally,
+handle it the same as when we have a notification of conflicting access.
+
+Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache")
+Reported-by: Ruben Vestergaard <rubenv@drcmr.dk>
+Reported-by: Torkil Svensgaard <torkil@drcmr.dk>
+Reported-by: Shachar Kagan <skagan@nvidia.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Tested-by: Shachar Kagan <skagan@nvidia.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 61 ++++++++++++++++++++++++++-------------------
+ 1 file changed, 36 insertions(+), 25 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 6a62d95d5ce64..68c7c82f8b3bb 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -660,6 +660,39 @@ static struct shrinker nfsd_file_shrinker = {
+ .seeks = 1,
+ };
+
++/**
++ * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
++ * @nf: nfsd_file to attempt to queue
++ * @dispose: private list to queue successfully-put objects
++ *
++ * Unhash an nfsd_file, try to get a reference to it, and then put that
++ * reference. If it's the last reference, queue it to the dispose list.
++ */
++static void
++nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
++ __must_hold(RCU)
++{
++ int decrement = 1;
++
++ /* If we raced with someone else unhashing, ignore it */
++ if (!nfsd_file_unhash(nf))
++ return;
++
++ /* If we can't get a reference, ignore it */
++ if (!nfsd_file_get(nf))
++ return;
++
++ /* Extra decrement if we remove from the LRU */
++ if (nfsd_file_lru_remove(nf))
++ ++decrement;
++
++ /* If refcount goes to 0, then put on the dispose list */
++ if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
++ list_add(&nf->nf_lru, dispose);
++ trace_nfsd_file_closing(nf);
++ }
++}
++
+ /**
+ * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
+ * @inode: inode on which to close out nfsd_files
+@@ -687,30 +720,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+
+ rcu_read_lock();
+ do {
+- int decrement = 1;
+-
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
+ if (!nf)
+ break;
+-
+- /* If we raced with someone else unhashing, ignore it */
+- if (!nfsd_file_unhash(nf))
+- continue;
+-
+- /* If we can't get a reference, ignore it */
+- if (!nfsd_file_get(nf))
+- continue;
+-
+- /* Extra decrement if we remove from the LRU */
+- if (nfsd_file_lru_remove(nf))
+- ++decrement;
+-
+- /* If refcount goes to 0, then put on the dispose list */
+- if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
+- list_add(&nf->nf_lru, dispose);
+- trace_nfsd_file_closing(nf);
+- }
++ nfsd_file_cond_queue(nf, dispose);
+ } while (1);
+ rcu_read_unlock();
+ }
+@@ -927,11 +941,8 @@ __nfsd_file_cache_purge(struct net *net)
+
+ nf = rhashtable_walk_next(&iter);
+ while (!IS_ERR_OR_NULL(nf)) {
+- if (!net || nf->nf_net == net) {
+- nfsd_file_unhash(nf);
+- nfsd_file_lru_remove(nf);
+- list_add(&nf->nf_lru, &dispose);
+- }
++ if (!net || nf->nf_net == net)
++ nfsd_file_cond_queue(nf, &dispose);
+ nf = rhashtable_walk_next(&iter);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 3cd2a68f8649b8ea5a09c9a9223b250100d9405f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Feb 2023 12:02:46 -0500
+Subject: nfsd: don't fsync nfsd_files on last close
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 4c475eee02375ade6e864f1db16976ba0d96a0a2 ]
+
+Most of the time, NFSv4 clients issue a COMMIT before the final CLOSE of
+an open stateid, so with NFSv4, the fsync in the nfsd_file_free path is
+usually a no-op and doesn't block.
+
+We have a customer running knfsd over very slow storage (XFS over Ceph
+RBD). They were using the "async" export option because performance was
+more important than data integrity for this application. That export
+option turns NFSv4 COMMIT calls into no-ops. Due to the fsync in this
+codepath however, their final CLOSE calls would still stall (since a
+CLOSE effectively became a COMMIT).
+
+I think this fsync is not strictly necessary. We only use that result to
+reset the write verifier. Instead of fsync'ing all of the data when we
+free an nfsd_file, we can just check for writeback errors when one is
+acquired and when it is freed.
+
+If the client never comes back, then it'll never see the error anyway
+and there is no point in resetting it. If an error occurs after the
+nfsd_file is removed from the cache but before the inode is evicted,
+then it will reset the write verifier on the next nfsd_file_acquire,
+(since there will be an unseen error).
+
+The only exception here is if something else opens and fsyncs the file
+during that window. Given that local applications work with this
+limitation today, I don't see that as an issue.
+
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2166658
+Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache")
+Reported-and-tested-by: Pierguido Lambri <plambri@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 44 ++++++++++++--------------------------------
+ fs/nfsd/trace.h | 31 -------------------------------
+ 2 files changed, 12 insertions(+), 63 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 206742bbbd682..4a3796c6bd957 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -330,37 +330,27 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+ return nf;
+ }
+
++/**
++ * nfsd_file_check_write_error - check for writeback errors on a file
++ * @nf: nfsd_file to check for writeback errors
++ *
++ * Check whether a nfsd_file has an unseen error. Reset the write
++ * verifier if so.
++ */
+ static void
+-nfsd_file_fsync(struct nfsd_file *nf)
+-{
+- struct file *file = nf->nf_file;
+- int ret;
+-
+- if (!file || !(file->f_mode & FMODE_WRITE))
+- return;
+- ret = vfs_fsync(file, 1);
+- trace_nfsd_file_fsync(nf, ret);
+- if (ret)
+- nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+-}
+-
+-static int
+ nfsd_file_check_write_error(struct nfsd_file *nf)
+ {
+ struct file *file = nf->nf_file;
+
+- if (!file || !(file->f_mode & FMODE_WRITE))
+- return 0;
+- return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
++ if ((file->f_mode & FMODE_WRITE) &&
++ filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
++ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ }
+
+ static void
+ nfsd_file_hash_remove(struct nfsd_file *nf)
+ {
+ trace_nfsd_file_unhash(nf);
+-
+- if (nfsd_file_check_write_error(nf))
+- nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
+ nfsd_file_rhash_params);
+ }
+@@ -386,23 +376,12 @@ nfsd_file_free(struct nfsd_file *nf)
+ this_cpu_add(nfsd_file_total_age, age);
+
+ nfsd_file_unhash(nf);
+-
+- /*
+- * We call fsync here in order to catch writeback errors. It's not
+- * strictly required by the protocol, but an nfsd_file could get
+- * evicted from the cache before a COMMIT comes in. If another
+- * task were to open that file in the interim and scrape the error,
+- * then the client may never see it. By calling fsync here, we ensure
+- * that writeback happens before the entry is freed, and that any
+- * errors reported result in the write verifier changing.
+- */
+- nfsd_file_fsync(nf);
+-
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, NULL);
++ nfsd_file_check_write_error(nf);
+ fput(nf->nf_file);
+ }
+
+@@ -1157,6 +1136,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ out:
+ if (status == nfs_ok) {
+ this_cpu_inc(nfsd_file_acquisitions);
++ nfsd_file_check_write_error(nf);
+ *pnf = nf;
+ } else {
+ if (refcount_dec_and_test(&nf->nf_ref))
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 5fb7e153ca865..276420ea3b8d9 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -1201,37 +1201,6 @@ TRACE_EVENT(nfsd_file_close,
+ )
+ );
+
+-TRACE_EVENT(nfsd_file_fsync,
+- TP_PROTO(
+- const struct nfsd_file *nf,
+- int ret
+- ),
+- TP_ARGS(nf, ret),
+- TP_STRUCT__entry(
+- __field(void *, nf_inode)
+- __field(int, nf_ref)
+- __field(int, ret)
+- __field(unsigned long, nf_flags)
+- __field(unsigned char, nf_may)
+- __field(struct file *, nf_file)
+- ),
+- TP_fast_assign(
+- __entry->nf_inode = nf->nf_inode;
+- __entry->nf_ref = refcount_read(&nf->nf_ref);
+- __entry->ret = ret;
+- __entry->nf_flags = nf->nf_flags;
+- __entry->nf_may = nf->nf_may;
+- __entry->nf_file = nf->nf_file;
+- ),
+- TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d",
+- __entry->nf_inode,
+- __entry->nf_ref,
+- show_nf_flags(__entry->nf_flags),
+- show_nfsd_may_flags(__entry->nf_may),
+- __entry->nf_file, __entry->ret
+- )
+-);
+-
+ #include "cache.h"
+
+ TRACE_DEFINE_ENUM(RC_DROPIT);
+--
+2.43.0
+
--- /dev/null
+From d42d3ea64c2e6b4a70323e329174726837d70d5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Jan 2023 07:09:33 -0500
+Subject: nfsd: don't hand out delegation on setuid files being opened for
+ write
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 826b67e6376c2a788e3a62c4860dcd79500a27d5 ]
+
+We had a bug report that xfstest generic/355 was failing on NFSv4.0.
+This test sets various combinations of setuid/setgid modes and tests
+whether DIO writes will cause them to be stripped.
+
+What I found was that the server did properly strip those bits, but
+the client didn't notice because it held a delegation that was not
+recalled. The recall didn't occur because the client itself was the
+one generating the activity and we avoid recalls in that case.
+
+Clearing setuid bits is an "implicit" activity. The client didn't
+specifically request that we do that, so we need the server to issue a
+CB_RECALL, or avoid the situation entirely by not issuing a delegation.
+
+The easiest fix here is to simply not give out a delegation if the file
+is being opened for write, and the mode has the setuid and/or setgid bit
+set. Note that there is a potential race between the mode and lease
+being set, so we test for this condition both before and after setting
+the lease.
+
+This patch fixes generic/355, generic/683 and generic/684 for me. (Note
+that 355 fails only on v4.0, and 683 and 684 require NFSv4.2 to run and
+fail).
+
+Reported-by: Boyang Xue <bxue@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 628e564e530bf..773971a75b62d 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5439,6 +5439,23 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
+ return 0;
+ }
+
++/*
++ * We avoid breaking delegations held by a client due to its own activity, but
++ * clearing setuid/setgid bits on a write is an implicit activity and the client
++ * may not notice and continue using the old mode. Avoid giving out a delegation
++ * on setuid/setgid files when the client is requesting an open for write.
++ */
++static int
++nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
++{
++ struct inode *inode = file_inode(nf->nf_file);
++
++ if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) &&
++ (inode->i_mode & (S_ISUID|S_ISGID)))
++ return -EAGAIN;
++ return 0;
++}
++
+ static struct nfs4_delegation *
+ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
+ struct svc_fh *parent)
+@@ -5472,6 +5489,8 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
+ spin_lock(&fp->fi_lock);
+ if (nfs4_delegation_exists(clp, fp))
+ status = -EAGAIN;
++ else if (nfsd4_verify_setuid_write(open, nf))
++ status = -EAGAIN;
+ else if (!fp->fi_deleg_file) {
+ fp->fi_deleg_file = nf;
+ /* increment early to prevent fi_deleg_file from being
+@@ -5512,6 +5531,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
+ if (status)
+ goto out_unlock;
+
++ /*
++ * Now that the deleg is set, check again to ensure that nothing
++ * raced in and changed the mode while we weren't lookng.
++ */
++ status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file);
++ if (status)
++ goto out_unlock;
++
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ if (fp->fi_had_conflict)
+--
+2.43.0
+
--- /dev/null
+From 54fad5c7e1874959a68fd0b7a6db6ae454391c78 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:11 -0500
+Subject: nfsd: don't kill nfsd_files because of lease break error
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit c6593366c0bf222be9c7561354dfb921c611745e ]
+
+An error from break_lease is non-fatal, so we needn't destroy the
+nfsd_file in that case. Just put the reference like we normally would
+and return the error.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index d61c8223082a4..43bb2fd47cf58 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1101,7 +1101,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ nf = nfsd_file_alloc(&key, may_flags);
+ if (!nf) {
+ status = nfserr_jukebox;
+- goto out_status;
++ goto out;
+ }
+
+ ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
+@@ -1110,13 +1110,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (likely(ret == 0))
+ goto open_file;
+
+- nfsd_file_slab_free(&nf->nf_rcu);
+- nf = NULL;
+ if (ret == -EEXIST)
+ goto retry;
+ trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
+ status = nfserr_jukebox;
+- goto out_status;
++ goto construction_err;
+
+ wait_for_construction:
+ wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+@@ -1126,29 +1124,25 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
+ if (!open_retry) {
+ status = nfserr_jukebox;
+- goto out;
++ goto construction_err;
+ }
+ open_retry = false;
+- if (refcount_dec_and_test(&nf->nf_ref))
+- nfsd_file_free(nf);
+ goto retry;
+ }
+-
+ this_cpu_inc(nfsd_file_cache_hits);
+
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
++ if (status != nfs_ok) {
++ nfsd_file_put(nf);
++ nf = NULL;
++ }
++
+ out:
+ if (status == nfs_ok) {
+ this_cpu_inc(nfsd_file_acquisitions);
+ nfsd_file_check_write_error(nf);
+ *pnf = nf;
+- } else {
+- if (refcount_dec_and_test(&nf->nf_ref))
+- nfsd_file_free(nf);
+- nf = NULL;
+ }
+-
+-out_status:
+ put_cred(key.cred);
+ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ return status;
+@@ -1178,6 +1172,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
++ if (status == nfs_ok)
++ goto out;
++
++construction_err:
++ if (refcount_dec_and_test(&nf->nf_ref))
++ nfsd_file_free(nf);
++ nf = NULL;
+ goto out;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 3ad53cb1b2c3cc045670c00b6fc371cd2aaa4a89 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 07:15:09 -0500
+Subject: nfsd: don't open-code clear_and_wake_up_bit
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b8bea9f6cdd7236c7c2238d022145e9b2f8aac22 ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 4a3796c6bd957..677a8d935ccc2 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1173,9 +1173,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ status = nfserr_jukebox;
+ if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+- clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+- smp_mb__after_atomic();
+- wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
++ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ goto out;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From af3cb578b33221859bc6edfe6abe7de9dad1a94a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Mar 2023 13:13:08 -0400
+Subject: nfsd: don't replace page in rq_pages if it's a continuation of last
+ page
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 27c934dd8832dd40fd34776f916dc201e18b319b ]
+
+The splice read calls nfsd_splice_actor to put the pages containing file
+data into the svc_rqst->rq_pages array. It's possible however to get a
+splice result that only has a partial page at the end, if (e.g.) the
+filesystem hands back a short read that doesn't cover the whole page.
+
+nfsd_splice_actor will plop the partial page into its rq_pages array and
+return. Then later, when nfsd_splice_actor is called again, the
+remainder of the page may end up being filled out. At this point,
+nfsd_splice_actor will put the page into the array _again_ corrupting
+the reply. If this is done enough times, rq_next_page will overrun the
+array and corrupt the trailing fields -- the rq_respages and
+rq_next_page pointers themselves.
+
+If we've already added the page to the array in the last pass, don't add
+it to the array a second time when dealing with a splice continuation.
+This was originally handled properly in nfsd_splice_actor, but commit
+91e23b1c3982 ("NFSD: Clean up nfsd_splice_actor()") removed the check
+for it.
+
+Fixes: 91e23b1c3982 ("NFSD: Clean up nfsd_splice_actor()")
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Reported-by: Dario Lesca <d.lesca@solinos.it>
+Tested-by: David Critch <dcritch@redhat.com>
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2150630
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 690191b3d997c..71788a5e4a55c 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -938,8 +938,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct page *last_page;
+
+ last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
+- for (page += offset / PAGE_SIZE; page <= last_page; page++)
++ for (page += offset / PAGE_SIZE; page <= last_page; page++) {
++ /*
++ * Skip page replacement when extending the contents
++ * of the current page.
++ */
++ if (page == *(rqstp->rq_next_page - 1))
++ continue;
+ svc_rqst_replace_page(rqstp, page);
++ }
+ if (rqstp->rq_res.page_len == 0) // first call
+ rqstp->rq_res.page_base = offset % PAGE_SIZE;
+ rqstp->rq_res.page_len += sd->len;
+--
+2.43.0
+
--- /dev/null
+From 27e01fb88cc1711d0931019a300edb528ebd88ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Feb 2024 13:22:39 +1100
+Subject: nfsd: don't take fi_lock in nfsd_break_deleg_cb()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 ]
+
+A recent change to check_for_locks() changed it to take ->flc_lock while
+holding ->fi_lock. This creates a lock inversion (reported by lockdep)
+because there is a case where ->fi_lock is taken while holding
+->flc_lock.
+
+->flc_lock is held across ->fl_lmops callbacks, and
+nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However
+it doesn't need to.
+
+Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each
+delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list
+and so needed the lock. Since then it doesn't walk the list and doesn't
+need the lock.
+
+Two actions are performed under the lock. One is to call
+nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on
+the nfs4_file at all, so don't need the lock.
+
+The other is to set ->fi_had_conflict which is in the nfs4_file.
+This field is only ever set here (except when initialised to false)
+so there is no possible problem will multiple threads racing when
+setting it.
+
+The field is tested twice in nfs4_set_delegation(). The first test does
+not hold a lock and is documented as an opportunistic optimisation, so
+it doesn't impose any need to hold ->fi_lock while setting
+->fi_had_conflict.
+
+The second test in nfs4_set_delegation() *is* make under ->fi_lock, so
+removing the locking when ->fi_had_conflict is set could make a change.
+The change could only be interesting if ->fi_had_conflict tested as
+false even though nfsd_break_one_deleg() ran before ->fi_lock was
+unlocked. i.e. while hash_delegation_locked() was running.
+As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb()
+there can be no importance to this interaction.
+
+So this patch removes the locking from nfsd_break_one_deleg() and moves
+the final test on ->fi_had_conflict out of the locked region to make it
+clear that locking isn't important to the test. It is still tested
+*after* vfs_setlease() has succeeded. This might be significant and as
+vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called
+under ->flc_lock this "after" is a true ordering provided by a spinlock.
+
+Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 64a8567ea4c40..0f1ece95bd642 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4950,10 +4950,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
+ */
+ fl->fl_break_time = 0;
+
+- spin_lock(&fp->fi_lock);
+ fp->fi_had_conflict = true;
+ nfsd_break_one_deleg(dp);
+- spin_unlock(&fp->fi_lock);
+ return false;
+ }
+
+@@ -5541,12 +5539,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
+ if (status)
+ goto out_unlock;
+
++ status = -EAGAIN;
++ if (fp->fi_had_conflict)
++ goto out_unlock;
++
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+- if (fp->fi_had_conflict)
+- status = -EAGAIN;
+- else
+- status = hash_delegation_locked(dp, fp);
++ status = hash_delegation_locked(dp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
+--
+2.43.0
+
--- /dev/null
+From f9b0514f4565643d402332d55c94dbc4e9829d0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Jan 2023 12:31:37 -0500
+Subject: nfsd: don't take/put an extra reference when putting a file
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b2ff1bd71db2a1b193a6dde0845adcd69cbcf75e ]
+
+The last thing that filp_close does is an fput, so don't bother taking
+and putting the extra reference.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index faa0c7d0253eb..786e06cf107ff 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -381,10 +381,8 @@ nfsd_file_free(struct nfsd_file *nf)
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+- get_file(nf->nf_file);
+- filp_close(nf->nf_file, NULL);
+ nfsd_file_check_write_error(nf);
+- fput(nf->nf_file);
++ filp_close(nf->nf_file, NULL);
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 7319149bc872d9f2a78ddbd86ff44f4baf026928 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: drop fh argument from alloc_init_deleg
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit bbf936edd543e7220f60f9cbd6933b916550396d ]
+
+Currently, we pass the fh of the opened file down through several
+functions so that alloc_init_deleg can pass it to delegation_blocked.
+The filehandle of the open file is available in the nfs4_file however,
+so there's no need to pass it in a separate argument.
+
+Drop the argument from alloc_init_deleg, nfs4_open_delegation and
+nfs4_set_delegation.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 340d533dcafd3..2b333f9259a03 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1143,7 +1143,6 @@ static void block_delegations(struct knfsd_fh *fh)
+
+ static struct nfs4_delegation *
+ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+- struct svc_fh *current_fh,
+ struct nfs4_clnt_odstate *odstate)
+ {
+ struct nfs4_delegation *dp;
+@@ -1153,7 +1152,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+ n = atomic_long_inc_return(&num_delegations);
+ if (n < 0 || n > max_delegations)
+ goto out_dec;
+- if (delegation_blocked(¤t_fh->fh_handle))
++ if (delegation_blocked(&fp->fi_fhandle))
+ goto out_dec;
+ dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
+ if (dp == NULL)
+@@ -5307,7 +5306,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
+ }
+
+ static struct nfs4_delegation *
+-nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
++nfs4_set_delegation(struct nfs4_client *clp,
+ struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
+ {
+ int status = 0;
+@@ -5352,7 +5351,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
+ return ERR_PTR(status);
+
+ status = -ENOMEM;
+- dp = alloc_init_deleg(clp, fp, fh, odstate);
++ dp = alloc_init_deleg(clp, fp, odstate);
+ if (!dp)
+ goto out_delegees;
+
+@@ -5420,8 +5419,7 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
+ * proper support for them.
+ */
+ static void
+-nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
+- struct nfs4_ol_stateid *stp)
++nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
+ {
+ struct nfs4_delegation *dp;
+ struct nfs4_openowner *oo = openowner(stp->st_stateowner);
+@@ -5453,7 +5451,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
+ default:
+ goto out_no_deleg;
+ }
+- dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
++ dp = nfs4_set_delegation(clp, stp->st_stid.sc_file, stp->st_clnt_odstate);
+ if (IS_ERR(dp))
+ goto out_no_deleg;
+
+@@ -5585,7 +5583,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ * Attempt to hand out a delegation. No error return, because the
+ * OPEN succeeds even if we fail.
+ */
+- nfs4_open_delegation(current_fh, open, stp);
++ nfs4_open_delegation(open, stp);
+ nodeleg:
+ status = nfs_ok;
+ trace_nfsd_open(&stp->st_stid.sc_stateid);
+--
+2.43.0
+
--- /dev/null
+From 201d20826cb50d160a9bb39e593f6d2bfe89ceaa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Sep 2022 10:42:19 +1000
+Subject: NFSD: drop fname and flen args from nfsd_create_locked()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 9558f9304ca1903090fa5d995a3269a8e82804b4 ]
+
+nfsd_create_locked() does not use the "fname" and "flen" arguments, so
+drop them from declaration and all callers.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsproc.c | 5 ++---
+ fs/nfsd/vfs.c | 5 ++---
+ fs/nfsd/vfs.h | 4 ++--
+ 3 files changed, 6 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 4b19cc727ea50..ee02ede74bf53 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -391,9 +391,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+ resp->status = nfs_ok;
+ if (!inode) {
+ /* File doesn't exist. Create it and set attrs */
+- resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name,
+- argp->len, &attrs, type, rdev,
+- newfhp);
++ resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type,
++ rdev, newfhp);
+ } else if (type == S_IFREG) {
+ dprintk("nfsd: existing %s, valid=%x, size=%ld\n",
+ argp->name, attr->ia_valid, (long) attr->ia_size);
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 343af6341e5e1..77f8ab3826d75 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1257,7 +1257,7 @@ nfsd_check_ignore_resizing(struct iattr *iap)
+ /* The parent directory should already be locked: */
+ __be32
+ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- char *fname, int flen, struct nfsd_attrs *attrs,
++ struct nfsd_attrs *attrs,
+ int type, dev_t rdev, struct svc_fh *resfhp)
+ {
+ struct dentry *dentry, *dchild;
+@@ -1384,8 +1384,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (err)
+ goto out_unlock;
+ fh_fill_pre_attrs(fhp);
+- err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type,
+- rdev, resfhp);
++ err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
+ fh_fill_post_attrs(fhp);
+ out_unlock:
+ inode_unlock(dentry->d_inode);
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index c95cd414b4bb0..120521bc7b247 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -79,8 +79,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ u64 count, bool sync);
+ #endif /* CONFIG_NFSD_V4 */
+ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
+- char *name, int len, struct nfsd_attrs *attrs,
+- int type, dev_t rdev, struct svc_fh *res);
++ struct nfsd_attrs *attrs, int type, dev_t rdev,
++ struct svc_fh *res);
+ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct nfsd_attrs *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
+--
+2.43.0
+
--- /dev/null
+From 7190e9d456ab6c5b257ac5073a83ddc8793ffa40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Sep 2021 11:15:29 +1000
+Subject: NFSD: drop support for ancient filehandles
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit c645a883df34ee10b884ec921e850def54b7f461 ]
+
+Filehandles not in the "new" or "version 1" format have not been handed
+out for new mounts since Linux 2.4 which was released 20 years ago.
+I think it is safe to say that no such file handles are still in use,
+and that we can drop support for them.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsfh.c | 160 +++++++++++++++---------------------------------
+ fs/nfsd/nfsfh.h | 34 +---------
+ 2 files changed, 54 insertions(+), 140 deletions(-)
+
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index c475d2271f9c5..149f9bbc48a4e 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -154,11 +154,12 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
+ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ {
+ struct knfsd_fh *fh = &fhp->fh_handle;
+- struct fid *fid = NULL, sfid;
++ struct fid *fid = NULL;
+ struct svc_export *exp;
+ struct dentry *dentry;
+ int fileid_type;
+ int data_left = fh->fh_size/4;
++ int len;
+ __be32 error;
+
+ error = nfserr_stale;
+@@ -167,48 +168,35 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ if (rqstp->rq_vers == 4 && fh->fh_size == 0)
+ return nfserr_nofilehandle;
+
+- if (fh->fh_version == 1) {
+- int len;
+-
+- if (--data_left < 0)
+- return error;
+- if (fh->fh_auth_type != 0)
+- return error;
+- len = key_len(fh->fh_fsid_type) / 4;
+- if (len == 0)
+- return error;
+- if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+- /* deprecated, convert to type 3 */
+- len = key_len(FSID_ENCODE_DEV)/4;
+- fh->fh_fsid_type = FSID_ENCODE_DEV;
+- /*
+- * struct knfsd_fh uses host-endian fields, which are
+- * sometimes used to hold net-endian values. This
+- * confuses sparse, so we must use __force here to
+- * keep it from complaining.
+- */
+- fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
+- ntohl((__force __be32)fh->fh_fsid[1])));
+- fh->fh_fsid[1] = fh->fh_fsid[2];
+- }
+- data_left -= len;
+- if (data_left < 0)
+- return error;
+- exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
+- fid = (struct fid *)(fh->fh_fsid + len);
+- } else {
+- __u32 tfh[2];
+- dev_t xdev;
+- ino_t xino;
+-
+- if (fh->fh_size != NFS_FHSIZE)
+- return error;
+- /* assume old filehandle format */
+- xdev = old_decode_dev(fh->ofh_xdev);
+- xino = u32_to_ino_t(fh->ofh_xino);
+- mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
+- exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
++ if (fh->fh_version != 1)
++ return error;
++
++ if (--data_left < 0)
++ return error;
++ if (fh->fh_auth_type != 0)
++ return error;
++ len = key_len(fh->fh_fsid_type) / 4;
++ if (len == 0)
++ return error;
++ if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
++ /* deprecated, convert to type 3 */
++ len = key_len(FSID_ENCODE_DEV)/4;
++ fh->fh_fsid_type = FSID_ENCODE_DEV;
++ /*
++ * struct knfsd_fh uses host-endian fields, which are
++ * sometimes used to hold net-endian values. This
++ * confuses sparse, so we must use __force here to
++ * keep it from complaining.
++ */
++ fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
++ ntohl((__force __be32)fh->fh_fsid[1])));
++ fh->fh_fsid[1] = fh->fh_fsid[2];
+ }
++ data_left -= len;
++ if (data_left < 0)
++ return error;
++ exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
++ fid = (struct fid *)(fh->fh_fsid + len);
+
+ error = nfserr_stale;
+ if (IS_ERR(exp)) {
+@@ -253,18 +241,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+ if (rqstp->rq_vers > 2)
+ error = nfserr_badhandle;
+
+- if (fh->fh_version != 1) {
+- sfid.i32.ino = fh->ofh_ino;
+- sfid.i32.gen = fh->ofh_generation;
+- sfid.i32.parent_ino = fh->ofh_dirino;
+- fid = &sfid;
+- data_left = 3;
+- if (fh->ofh_dirino == 0)
+- fileid_type = FILEID_INO32_GEN;
+- else
+- fileid_type = FILEID_INO32_GEN_PARENT;
+- } else
+- fileid_type = fh->fh_fileid_type;
++ fileid_type = fh->fh_fileid_type;
+
+ if (fileid_type == FILEID_ROOT)
+ dentry = dget(exp->ex_path.dentry);
+@@ -452,20 +429,6 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
+ }
+ }
+
+-/*
+- * for composing old style file handles
+- */
+-static inline void _fh_update_old(struct dentry *dentry,
+- struct svc_export *exp,
+- struct knfsd_fh *fh)
+-{
+- fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino);
+- fh->ofh_generation = d_inode(dentry)->i_generation;
+- if (d_is_dir(dentry) ||
+- (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
+- fh->ofh_dirino = 0;
+-}
+-
+ static bool is_root_export(struct svc_export *exp)
+ {
+ return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
+@@ -562,9 +525,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
+ /* ref_fh is a reference file handle.
+ * if it is non-null and for the same filesystem, then we should compose
+ * a filehandle which is of the same version, where possible.
+- * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
+- * Then create a 32byte filehandle using nfs_fhbase_old
+- *
+ */
+
+ struct inode * inode = d_inode(dentry);
+@@ -600,35 +560,21 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
+ fhp->fh_dentry = dget(dentry); /* our internal copy */
+ fhp->fh_export = exp_get(exp);
+
+- if (fhp->fh_handle.fh_version == 0xca) {
+- /* old style filehandle please */
+- memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
+- fhp->fh_handle.fh_size = NFS_FHSIZE;
+- fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
+- fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev);
+- fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
+- fhp->fh_handle.ofh_xino =
+- ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino);
+- fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
+- if (inode)
+- _fh_update_old(dentry, exp, &fhp->fh_handle);
+- } else {
+- fhp->fh_handle.fh_size =
+- key_len(fhp->fh_handle.fh_fsid_type) + 4;
+- fhp->fh_handle.fh_auth_type = 0;
+-
+- mk_fsid(fhp->fh_handle.fh_fsid_type,
+- fhp->fh_handle.fh_fsid,
+- ex_dev,
+- d_inode(exp->ex_path.dentry)->i_ino,
+- exp->ex_fsid, exp->ex_uuid);
+-
+- if (inode)
+- _fh_update(fhp, exp, dentry);
+- if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
+- fh_put(fhp);
+- return nfserr_opnotsupp;
+- }
++ fhp->fh_handle.fh_size =
++ key_len(fhp->fh_handle.fh_fsid_type) + 4;
++ fhp->fh_handle.fh_auth_type = 0;
++
++ mk_fsid(fhp->fh_handle.fh_fsid_type,
++ fhp->fh_handle.fh_fsid,
++ ex_dev,
++ d_inode(exp->ex_path.dentry)->i_ino,
++ exp->ex_fsid, exp->ex_uuid);
++
++ if (inode)
++ _fh_update(fhp, exp, dentry);
++ if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
++ fh_put(fhp);
++ return nfserr_opnotsupp;
+ }
+
+ return 0;
+@@ -649,16 +595,12 @@ fh_update(struct svc_fh *fhp)
+ dentry = fhp->fh_dentry;
+ if (d_really_is_negative(dentry))
+ goto out_negative;
+- if (fhp->fh_handle.fh_version != 1) {
+- _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
+- } else {
+- if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
+- return 0;
++ if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
++ return 0;
+
+- _fh_update(fhp, fhp->fh_export, dentry);
+- if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
+- return nfserr_opnotsupp;
+- }
++ _fh_update(fhp, fhp->fh_export, dentry);
++ if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
++ return nfserr_opnotsupp;
+ return 0;
+ out_bad:
+ printk(KERN_ERR "fh_update: fh not verified!\n");
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index ad47f16676a8c..8b5587f274a7d 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -14,26 +14,7 @@
+ #include <linux/exportfs.h>
+ #include <linux/nfs4.h>
+
+-
+-/*
+- * This is the old "dentry style" Linux NFSv2 file handle.
+- *
+- * The xino and xdev fields are currently used to transport the
+- * ino/dev of the exported inode.
+- */
+-struct nfs_fhbase_old {
+- u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */
+- u32 fb_ino; /* our inode number */
+- u32 fb_dirino; /* dir inode number, 0 for directories */
+- u32 fb_dev; /* our device */
+- u32 fb_xdev;
+- u32 fb_xino;
+- u32 fb_generation;
+-};
+-
+ /*
+- * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
+- *
+ * The file handle starts with a sequence of four-byte words.
+ * The first word contains a version number (1) and three descriptor bytes
+ * that tell how the remaining 3 variable length fields should be handled.
+@@ -57,7 +38,7 @@ struct nfs_fhbase_old {
+ * 6 - 16 byte uuid
+ * 7 - 8 byte inode number and 16 byte uuid
+ *
+- * The fileid_type identified how the file within the filesystem is encoded.
++ * The fileid_type identifies how the file within the filesystem is encoded.
+ * The values for this field are filesystem specific, exccept that
+ * filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+ * in include/linux/exportfs.h for currently registered values.
+@@ -65,7 +46,7 @@ struct nfs_fhbase_old {
+ struct nfs_fhbase_new {
+ union {
+ struct {
+- u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */
++ u8 fb_version_aux; /* == 1 */
+ u8 fb_auth_type_aux;
+ u8 fb_fsid_type_aux;
+ u8 fb_fileid_type_aux;
+@@ -74,7 +55,7 @@ struct nfs_fhbase_new {
+ /* u32 fb_fileid[0]; floating */
+ };
+ struct {
+- u8 fb_version; /* == 1, even => nfs_fhbase_old */
++ u8 fb_version; /* == 1 */
+ u8 fb_auth_type;
+ u8 fb_fsid_type;
+ u8 fb_fileid_type;
+@@ -89,20 +70,11 @@ struct knfsd_fh {
+ * a new file handle
+ */
+ union {
+- struct nfs_fhbase_old fh_old;
+ u32 fh_pad[NFS4_FHSIZE/4];
+ struct nfs_fhbase_new fh_new;
+ } fh_base;
+ };
+
+-#define ofh_dcookie fh_base.fh_old.fb_dcookie
+-#define ofh_ino fh_base.fh_old.fb_ino
+-#define ofh_dirino fh_base.fh_old.fb_dirino
+-#define ofh_dev fh_base.fh_old.fb_dev
+-#define ofh_xdev fh_base.fh_old.fb_xdev
+-#define ofh_xino fh_base.fh_old.fb_xino
+-#define ofh_generation fh_base.fh_old.fb_generation
+-
+ #define fh_version fh_base.fh_new.fb_version
+ #define fh_fsid_type fh_base.fh_new.fb_fsid_type
+ #define fh_auth_type fh_base.fh_new.fb_auth_type
+--
+2.43.0
+
--- /dev/null
+From 283c2b154a1116c03f74d4c2423cd6e33615d70d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Jan 2024 08:36:52 -0500
+Subject: nfsd: drop the nfsd_put helper
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 64e6304169f1e1f078e7f0798033f80a7fb0ea46 ]
+
+It's not safe to call nfsd_put once nfsd_last_thread has been called, as
+that function will zero out the nn->nfsd_serv pointer.
+
+Drop the nfsd_put helper altogether and open-code the svc_put in its
+callers instead. That allows us to not be reliant on the value of that
+pointer when handling an error.
+
+Fixes: 2a501f55cd64 ("nfsd: call nfsd_last_thread() before final nfsd_put()")
+Reported-by: Zhi Li <yieli@redhat.com>
+Cc: NeilBrown <neilb@suse.de>
+Signed-off-by: Jeffrey Layton <jlayton@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 31 +++++++++++++++++--------------
+ fs/nfsd/nfsd.h | 7 -------
+ 2 files changed, 17 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index eec442edb6556..f77f00c931723 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -709,6 +709,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
+ char *mesg = buf;
+ int fd, err;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++ struct svc_serv *serv;
+
+ err = get_int(&mesg, &fd);
+ if (err != 0 || fd < 0)
+@@ -718,15 +719,15 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
+ if (err != 0)
+ return err;
+
+- err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
++ serv = nn->nfsd_serv;
++ err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+
+- if (err < 0 && !nn->nfsd_serv->sv_nrthreads && !nn->keep_active)
++ if (err < 0 && !serv->sv_nrthreads && !nn->keep_active)
+ nfsd_last_thread(net);
+- else if (err >= 0 &&
+- !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+- svc_get(nn->nfsd_serv);
++ else if (err >= 0 && !serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
++ svc_get(serv);
+
+- nfsd_put(net);
++ svc_put(serv);
+ return err;
+ }
+
+@@ -740,6 +741,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ struct svc_xprt *xprt;
+ int port, err;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++ struct svc_serv *serv;
+
+ if (sscanf(buf, "%15s %5u", transport, &port) != 2)
+ return -EINVAL;
+@@ -751,32 +753,33 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ if (err != 0)
+ return err;
+
+- err = svc_xprt_create(nn->nfsd_serv, transport, net,
++ serv = nn->nfsd_serv;
++ err = svc_xprt_create(serv, transport, net,
+ PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
+ if (err < 0)
+ goto out_err;
+
+- err = svc_xprt_create(nn->nfsd_serv, transport, net,
++ err = svc_xprt_create(serv, transport, net,
+ PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
+ if (err < 0 && err != -EAFNOSUPPORT)
+ goto out_close;
+
+- if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+- svc_get(nn->nfsd_serv);
++ if (!serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
++ svc_get(serv);
+
+- nfsd_put(net);
++ svc_put(serv);
+ return 0;
+ out_close:
+- xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
++ xprt = svc_find_xprt(serv, transport, net, PF_INET, port);
+ if (xprt != NULL) {
+ svc_xprt_close(xprt);
+ svc_xprt_put(xprt);
+ }
+ out_err:
+- if (!nn->nfsd_serv->sv_nrthreads && !nn->keep_active)
++ if (!serv->sv_nrthreads && !nn->keep_active)
+ nfsd_last_thread(net);
+
+- nfsd_put(net);
++ svc_put(serv);
+ return err;
+ }
+
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 3796015dc7656..013bfa24ced21 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -96,13 +96,6 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
+ int nfsd_pool_stats_release(struct inode *, struct file *);
+ void nfsd_shutdown_threads(struct net *net);
+
+-static inline void nfsd_put(struct net *net)
+-{
+- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+-
+- svc_put(nn->nfsd_serv);
+-}
+-
+ bool i_am_nfsd(void);
+
+ struct nfsdfs_client {
+--
+2.43.0
+
--- /dev/null
+From c75c29a9e4dac80bef5533876c619d332670e644 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Aug 2022 15:16:36 -0400
+Subject: NFSD enforce filehandle check for source file in COPY
+
+From: Olga Kornievskaia <kolga@netapp.com>
+
+[ Upstream commit 754035ff79a14886e68c0c9f6fa80adb21f12b53 ]
+
+If the passed in filehandle for the source file in the COPY operation
+is not a regular file, the server MUST return NFS4ERR_WRONG_TYPE.
+
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+[ cel: adjusted to apply to v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index b2e6fa962f7d9..b2bfe540c1cb0 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1760,7 +1760,13 @@ static int nfsd4_do_async_copy(void *data)
+ filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh,
+ ©->stateid);
+ if (IS_ERR(filp)) {
+- nfserr = nfserr_offload_denied;
++ switch (PTR_ERR(filp)) {
++ case -EBADF:
++ nfserr = nfserr_wrong_type;
++ break;
++ default:
++ nfserr = nfserr_offload_denied;
++ }
+ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
+--
+2.43.0
+
--- /dev/null
+From 33f7731a59767367dee7d3d2903897e86cb9651d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 18 Dec 2022 16:55:53 -0800
+Subject: NFSD: enhance inter-server copy cleanup
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit df24ac7a2e3a9d0bc68f1756a880e50bfe4b4522 ]
+
+Currently nfsd4_setup_inter_ssc returns the vfsmount of the source
+server's export when the mount completes. After the copy is done
+nfsd4_cleanup_inter_ssc is called with the vfsmount of the source
+server and it searches nfsd_ssc_mount_list for a matching entry
+to do the clean up.
+
+The problems with this approach are (1) the need to search the
+nfsd_ssc_mount_list and (2) the code has to handle the case where
+the matching entry is not found which looks ugly.
+
+The enhancement is instead of nfsd4_setup_inter_ssc returning the
+vfsmount, it returns the nfsd4_ssc_umount_item which has the
+vfsmount embedded in it. When nfsd4_cleanup_inter_ssc is called
+it's passed with the nfsd4_ssc_umount_item directly to do the
+clean up so no searching is needed and there is no need to handle
+the 'not found' case.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+[ cel: adjusted whitespace and variable/function names ]
+Reviewed-by: Olga Kornievskaia <kolga@netapp.com>
+---
+ fs/nfsd/nfs4proc.c | 111 ++++++++++++++++------------------------
+ fs/nfsd/xdr4.h | 2 +-
+ include/linux/nfs_ssc.h | 2 +-
+ 3 files changed, 46 insertions(+), 69 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 5ea71af276c7b..6fb5f10602233 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1295,15 +1295,15 @@ extern void nfs_sb_deactive(struct super_block *sb);
+ * setup a work entry in the ssc delayed unmount list.
+ */
+ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+- struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt)
++ struct nfsd4_ssc_umount_item **nsui)
+ {
+ struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd4_ssc_umount_item *work = NULL;
+ struct nfsd4_ssc_umount_item *tmp;
+ DEFINE_WAIT(wait);
++ __be32 status = 0;
+
+- *ss_mnt = NULL;
+- *retwork = NULL;
++ *nsui = NULL;
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ try_again:
+ spin_lock(&nn->nfsd_ssc_lock);
+@@ -1327,12 +1327,12 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
+ goto try_again;
+ }
+- *ss_mnt = ni->nsui_vfsmount;
++ *nsui = ni;
+ refcount_inc(&ni->nsui_refcnt);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ kfree(work);
+
+- /* return vfsmount in ss_mnt */
++ /* return vfsmount in (*nsui)->nsui_vfsmount */
+ return 0;
+ }
+ if (work) {
+@@ -1340,31 +1340,32 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ refcount_set(&work->nsui_refcnt, 2);
+ work->nsui_busy = true;
+ list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
+- *retwork = work;
+- }
++ *nsui = work;
++ } else
++ status = nfserr_resource;
+ spin_unlock(&nn->nfsd_ssc_lock);
+- return 0;
++ return status;
+ }
+
+-static void nfsd4_ssc_update_dul_work(struct nfsd_net *nn,
+- struct nfsd4_ssc_umount_item *work, struct vfsmount *ss_mnt)
++static void nfsd4_ssc_update_dul(struct nfsd_net *nn,
++ struct nfsd4_ssc_umount_item *nsui,
++ struct vfsmount *ss_mnt)
+ {
+- /* set nsui_vfsmount, clear busy flag and wakeup waiters */
+ spin_lock(&nn->nfsd_ssc_lock);
+- work->nsui_vfsmount = ss_mnt;
+- work->nsui_busy = false;
++ nsui->nsui_vfsmount = ss_mnt;
++ nsui->nsui_busy = false;
+ wake_up_all(&nn->nfsd_ssc_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ }
+
+-static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn,
+- struct nfsd4_ssc_umount_item *work)
++static void nfsd4_ssc_cancel_dul(struct nfsd_net *nn,
++ struct nfsd4_ssc_umount_item *nsui)
+ {
+ spin_lock(&nn->nfsd_ssc_lock);
+- list_del(&work->nsui_list);
++ list_del(&nsui->nsui_list);
+ wake_up_all(&nn->nfsd_ssc_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+- kfree(work);
++ kfree(nsui);
+ }
+
+ /*
+@@ -1372,7 +1373,7 @@ static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn,
+ */
+ static __be32
+ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+- struct vfsmount **mount)
++ struct nfsd4_ssc_umount_item **nsui)
+ {
+ struct file_system_type *type;
+ struct vfsmount *ss_mnt;
+@@ -1383,7 +1384,6 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ char *ipaddr, *dev_name, *raw_data;
+ int len, raw_len;
+ __be32 status = nfserr_inval;
+- struct nfsd4_ssc_umount_item *work = NULL;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ naddr = &nss->u.nl4_addr;
+@@ -1391,6 +1391,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ naddr->addr_len,
+ (struct sockaddr *)&tmp_addr,
+ sizeof(tmp_addr));
++ *nsui = NULL;
+ if (tmp_addrlen == 0)
+ goto out_err;
+
+@@ -1433,10 +1434,10 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ goto out_free_rawdata;
+ snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
+
+- status = nfsd4_ssc_setup_dul(nn, ipaddr, &work, &ss_mnt);
++ status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
+ if (status)
+ goto out_free_devname;
+- if (ss_mnt)
++ if ((*nsui)->nsui_vfsmount)
+ goto out_done;
+
+ /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
+@@ -1444,15 +1445,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ module_put(type->owner);
+ if (IS_ERR(ss_mnt)) {
+ status = nfserr_nodev;
+- if (work)
+- nfsd4_ssc_cancel_dul_work(nn, work);
++ nfsd4_ssc_cancel_dul(nn, *nsui);
+ goto out_free_devname;
+ }
+- if (work)
+- nfsd4_ssc_update_dul_work(nn, work, ss_mnt);
++ nfsd4_ssc_update_dul(nn, *nsui, ss_mnt);
+ out_done:
+ status = 0;
+- *mount = ss_mnt;
+
+ out_free_devname:
+ kfree(dev_name);
+@@ -1476,7 +1474,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ static __be32
+ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+- struct nfsd4_copy *copy, struct vfsmount **mount)
++ struct nfsd4_copy *copy)
+ {
+ struct svc_fh *s_fh = NULL;
+ stateid_t *s_stid = ©->cp_src_stateid;
+@@ -1489,7 +1487,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ if (status)
+ goto out;
+
+- status = nfsd4_interssc_connect(copy->cp_src, rqstp, mount);
++ status = nfsd4_interssc_connect(copy->cp_src, rqstp, ©->ss_nsui);
+ if (status)
+ goto out;
+
+@@ -1507,45 +1505,27 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ }
+
+ static void
+-nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
++nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
+ struct nfsd_file *dst)
+ {
+- bool found = false;
+- long timeout;
+- struct nfsd4_ssc_umount_item *tmp;
+- struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
++ long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
+
+ nfs42_ssc_close(filp);
+ nfsd_file_put(dst);
+ fput(filp);
+
+- if (!nn) {
+- mntput(ss_mnt);
+- return;
+- }
+ spin_lock(&nn->nfsd_ssc_lock);
+- timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
+- list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
+- if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) {
+- list_del(&ni->nsui_list);
+- /*
+- * vfsmount can be shared by multiple exports,
+- * decrement refcnt. If the count drops to 1 it
+- * will be unmounted when nsui_expire expires.
+- */
+- refcount_dec(&ni->nsui_refcnt);
+- ni->nsui_expire = jiffies + timeout;
+- list_add_tail(&ni->nsui_list, &nn->nfsd_ssc_mount_list);
+- found = true;
+- break;
+- }
+- }
++ list_del(&nsui->nsui_list);
++ /*
++ * vfsmount can be shared by multiple exports,
++ * decrement refcnt. If the count drops to 1 it
++ * will be unmounted when nsui_expire expires.
++ */
++ refcount_dec(&nsui->nsui_refcnt);
++ nsui->nsui_expire = jiffies + timeout;
++ list_add_tail(&nsui->nsui_list, &nn->nfsd_ssc_mount_list);
+ spin_unlock(&nn->nfsd_ssc_lock);
+- if (!found) {
+- mntput(ss_mnt);
+- return;
+- }
+ }
+
+ #else /* CONFIG_NFSD_V4_2_INTER_SSC */
+@@ -1553,15 +1533,13 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
+ static __be32
+ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+- struct nfsd4_copy *copy,
+- struct vfsmount **mount)
++ struct nfsd4_copy *copy)
+ {
+- *mount = NULL;
+ return nfserr_inval;
+ }
+
+ static void
+-nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
++nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
+ struct nfsd_file *dst)
+ {
+ }
+@@ -1702,7 +1680,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+ memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server));
+ memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
+ memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
+- dst->ss_mnt = src->ss_mnt;
++ dst->ss_nsui = src->ss_nsui;
+ }
+
+ static void cleanup_async_copy(struct nfsd4_copy *copy)
+@@ -1751,8 +1729,8 @@ static int nfsd4_do_async_copy(void *data)
+ if (nfsd4_ssc_is_inter(copy)) {
+ struct file *filp;
+
+- filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh,
+- ©->stateid);
++ filp = nfs42_ssc_open(copy->ss_nsui->nsui_vfsmount,
++ ©->c_fh, ©->stateid);
+ if (IS_ERR(filp)) {
+ switch (PTR_ERR(filp)) {
+ case -EBADF:
+@@ -1766,7 +1744,7 @@ static int nfsd4_do_async_copy(void *data)
+ }
+ nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
+ false);
+- nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst);
++ nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst);
+ } else {
+ nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, false);
+@@ -1792,8 +1770,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfserr_notsupp;
+ goto out;
+ }
+- status = nfsd4_setup_inter_ssc(rqstp, cstate, copy,
+- ©->ss_mnt);
++ status = nfsd4_setup_inter_ssc(rqstp, cstate, copy);
+ if (status)
+ return nfserr_offload_denied;
+ } else {
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 4fd2cf6d1d2dc..510978e602da6 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -571,7 +571,7 @@ struct nfsd4_copy {
+ struct task_struct *copy_task;
+ refcount_t refcount;
+
+- struct vfsmount *ss_mnt;
++ struct nfsd4_ssc_umount_item *ss_nsui;
+ struct nfs_fh c_fh;
+ nfs4_stateid stateid;
+ };
+diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
+index 75843c00f326a..22265b1ff0800 100644
+--- a/include/linux/nfs_ssc.h
++++ b/include/linux/nfs_ssc.h
+@@ -53,6 +53,7 @@ static inline void nfs42_ssc_close(struct file *filep)
+ if (nfs_ssc_client_tbl.ssc_nfs4_ops)
+ (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep);
+ }
++#endif
+
+ struct nfsd4_ssc_umount_item {
+ struct list_head nsui_list;
+@@ -66,7 +67,6 @@ struct nfsd4_ssc_umount_item {
+ struct vfsmount *nsui_vfsmount;
+ char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1];
+ };
+-#endif
+
+ /*
+ * NFS_FS
+--
+2.43.0
+
--- /dev/null
+From eaa6624e308681e0d6f2b4d6e57256a1f793fea5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:27:09 -0400
+Subject: NFSD: Ensure nf_inode is never dereferenced
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 427f5f83a3191cbf024c5aea6e5b601cdf88d895 ]
+
+The documenting comment for struct nf_file states:
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+
+Replace the two existing dereferences to make the comment always
+true.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 5 ++---
+ fs/nfsd/filecache.h | 2 +-
+ fs/nfsd/nfs4state.c | 2 +-
+ 3 files changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 7ad27655db699..55478d411e5a0 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -227,12 +227,11 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+ }
+
+ static struct nfsd_file_mark *
+-nfsd_file_mark_find_or_create(struct nfsd_file *nf)
++nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
+ {
+ int err;
+ struct fsnotify_mark *mark;
+ struct nfsd_file_mark *nfm = NULL, *new;
+- struct inode *inode = nf->nf_inode;
+
+ do {
+ fsnotify_group_lock(nfsd_file_fsnotify_group);
+@@ -1143,7 +1142,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ open_file:
+ trace_nfsd_file_alloc(nf);
+- nf->nf_mark = nfsd_file_mark_find_or_create(nf);
++ nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
+ if (nf->nf_mark) {
+ if (open) {
+ status = nfsd_open_verified(rqstp, fhp, may_flags,
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 28145f1628923..8e8c0c47d67df 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -39,7 +39,7 @@ struct nfsd_file {
+ #define NFSD_FILE_PENDING (1)
+ #define NFSD_FILE_REFERENCED (2)
+ unsigned long nf_flags;
+- struct inode *nf_inode;
++ struct inode *nf_inode; /* don't deref */
+ refcount_t nf_ref;
+ unsigned char nf_may;
+ struct nfsd_file_mark *nf_mark;
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 923eec2716d75..9d344164f814f 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2577,7 +2577,7 @@ static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f)
+
+ static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
+ {
+- struct inode *inode = f->nf_inode;
++ struct inode *inode = file_inode(f->nf_file);
+
+ seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
+ MAJOR(inode->i_sb->s_dev),
+--
+2.43.0
+
--- /dev/null
+From 47b312f17e1d1f2bc7867bcd7ab9cd7293ad3fda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Oct 2022 14:42:54 -0400
+Subject: nfsd: ensure we always call fh_verify_error tracepoint
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 93c128e709aec23b10f3a2f78a824080d4085318 ]
+
+This is a conditional tracepoint. Call it every time, not just when
+nfs_permission fails.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsfh.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index d73434200df98..8c52b6c9d31a2 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -392,8 +392,8 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+ skip_pseudoflavor_check:
+ /* Finally, check access permissions. */
+ error = nfsd_permission(rqstp, exp, dentry, access);
+- trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
+ out:
++ trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
+ if (error == nfserr_stale)
+ nfsd_stats_fh_stale_inc(exp);
+ return error;
+--
+2.43.0
+
--- /dev/null
+From c7f2b43e0b774118f17ac2bac8bb8eaea3e21ad0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Sep 2022 14:41:02 -0400
+Subject: nfsd: extra checks when freeing delegation stateids
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 895ddf5ed4c54ea9e3533606d7a8b4e4f27f95ef ]
+
+We've had some reports of problems in the refcounting for delegation
+stateids that we've yet to track down. Add some extra checks to ensure
+that we've removed the object from various lists before freeing it.
+
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2127067
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 61978ad43a0f7..d19629de2af5d 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1071,7 +1071,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
+
+ static void nfs4_free_deleg(struct nfs4_stid *stid)
+ {
+- WARN_ON(!list_empty(&stid->sc_cp_list));
++ struct nfs4_delegation *dp = delegstateid(stid);
++
++ WARN_ON_ONCE(!list_empty(&stid->sc_cp_list));
++ WARN_ON_ONCE(!list_empty(&dp->dl_perfile));
++ WARN_ON_ONCE(!list_empty(&dp->dl_perclnt));
++ WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru));
+ kmem_cache_free(deleg_slab, stid);
+ atomic_long_dec(&num_delegations);
+ }
+--
+2.43.0
+
--- /dev/null
+From 94f818e6de2f007d040df283f28896736454a1cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 16 Oct 2022 11:47:08 -0400
+Subject: NFSD: Finish converting the NFSv3 GETACL result encoder
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 841fd0a3cb490eae5dfd262eccb8c8b11d57f8b8 ]
+
+For some reason, the NFSv2 GETACL result encoder was fully converted
+to use the new nfs_stream_encode_acl(), but the NFSv3 equivalent was
+not similarly converted.
+
+Fixes: 20798dfe249a ("NFSD: Update the NFSv3 GETACL result encoder to use struct xdr_stream")
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3acl.c | 30 ++++++------------------------
+ 1 file changed, 6 insertions(+), 24 deletions(-)
+
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index 2fb9ee3564558..a34a22e272ad5 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -171,11 +171,7 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_getaclres *resp = rqstp->rq_resp;
+ struct dentry *dentry = resp->fh.fh_dentry;
+- struct kvec *head = rqstp->rq_res.head;
+ struct inode *inode;
+- unsigned int base;
+- int n;
+- int w;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+@@ -187,26 +183,12 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
+ return false;
+
+- base = (char *)xdr->p - (char *)head->iov_base;
+-
+- rqstp->rq_res.page_len = w = nfsacl_size(
+- (resp->mask & NFS_ACL) ? resp->acl_access : NULL,
+- (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+- while (w > 0) {
+- if (!*(rqstp->rq_next_page++))
+- return false;
+- w -= PAGE_SIZE;
+- }
+-
+- n = nfsacl_encode(&rqstp->rq_res, base, inode,
+- resp->acl_access,
+- resp->mask & NFS_ACL, 0);
+- if (n > 0)
+- n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
+- resp->acl_default,
+- resp->mask & NFS_DFACL,
+- NFS_ACL_DEFAULT);
+- if (n <= 0)
++ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
++ resp->mask & NFS_ACL, 0))
++ return false;
++ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
++ resp->mask & NFS_DFACL,
++ NFS_ACL_DEFAULT))
+ return false;
+ break;
+ default:
+--
+2.43.0
+
--- /dev/null
+From e85d2cba824c078972d2a5ce0f583f742e74cdef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Feb 2024 20:24:50 -0500
+Subject: nfsd: Fix a regression in nfsd_setattr()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 6412e44c40aaf8f1d7320b2099c5bdd6cb9126ac ]
+
+Commit bb4d53d66e4b ("NFSD: use (un)lock_inode instead of
+fh_(un)lock for file operations") broke the NFSv3 pre/post op
+attributes behaviour when doing a SETATTR rpc call by stripping out
+the calls to fh_fill_pre_attrs() and fh_fill_post_attrs().
+
+Fixes: bb4d53d66e4b ("NFSD: use (un)lock_inode instead of fh_(un)lock for file operations")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Message-ID: <20240216012451.22725-1-trondmy@kernel.org>
+[ cel: adjusted to apply to v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 4 ++++
+ fs/nfsd/vfs.c | 6 ++++--
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index b6d768bd5ccca..6779291efca9e 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1106,6 +1106,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ };
+ struct inode *inode;
+ __be32 status = nfs_ok;
++ bool save_no_wcc;
+ int err;
+
+ if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+@@ -1131,8 +1132,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+
+ if (status)
+ goto out;
++ save_no_wcc = cstate->current_fh.fh_no_wcc;
++ cstate->current_fh.fh_no_wcc = true;
+ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
+ 0, (time64_t)0);
++ cstate->current_fh.fh_no_wcc = save_no_wcc;
+ if (!status)
+ status = nfserrno(attrs.na_labelerr);
+ if (!status)
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 76ce19d42336f..0f430548bfbbe 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -474,7 +474,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+- int host_err;
++ int host_err = 0;
+ bool get_write_count;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
+ int retries;
+@@ -532,6 +532,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+
+ inode_lock(inode);
++ fh_fill_pre_attrs(fhp);
+ for (retries = 1;;) {
+ struct iattr attrs;
+
+@@ -559,13 +560,14 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ attr->na_aclerr = set_posix_acl(&init_user_ns,
+ inode, ACL_TYPE_DEFAULT,
+ attr->na_dpacl);
++ fh_fill_post_attrs(fhp);
+ inode_unlock(inode);
+ if (size_change)
+ put_write_access(inode);
+ out:
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+- return nfserrno(host_err);
++ return err != 0 ? err : nfserrno(host_err);
+ }
+
+ #if defined(CONFIG_NFSD_V4)
+--
+2.43.0
+
--- /dev/null
+From aed1ad3984d82255ba7da43d0030a2ee43ae4607 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Mar 2022 09:54:01 -0400
+Subject: nfsd: Fix a write performance regression
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 6b8a94332ee4f7d9a8ae0cbac7609f79c212f06c ]
+
+The call to filemap_flush() in nfsd_file_put() is there to ensure that
+we clear out any writes belonging to a NFSv3 client relatively quickly
+and avoid situations where the file can't be evicted by the garbage
+collector. It also ensures that we detect write errors quickly.
+
+The problem is this causes a regression in performance for some
+workloads.
+
+So try to improve matters by deferring writeback until we're ready to
+close the file, and need to detect errors so that we can force the
+client to resend.
+
+Tested-by: Jan Kara <jack@suse.cz>
+Fixes: b6669305d35a ("nfsd: Reduce the number of calls to nfsd_file_gc()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Link: https://lore.kernel.org/all/20220330103457.r4xrhy2d6nhtouzk@quack3.lan
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 94157b82b60e1..6cde6ef68996e 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -229,6 +229,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
+ return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+ }
+
++static void
++nfsd_file_flush(struct nfsd_file *nf)
++{
++ if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
++ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
++}
++
+ static void
+ nfsd_file_do_unhash(struct nfsd_file *nf)
+ {
+@@ -296,11 +303,14 @@ nfsd_file_put(struct nfsd_file *nf)
+ return;
+ }
+
+- filemap_flush(nf->nf_file->f_mapping);
+ is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+- nfsd_file_put_noref(nf);
+- if (is_hashed)
++ if (!is_hashed) {
++ nfsd_file_flush(nf);
++ nfsd_file_put_noref(nf);
++ } else {
++ nfsd_file_put_noref(nf);
+ nfsd_file_schedule_laundrette();
++ }
+ if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+ nfsd_file_gc();
+ }
+@@ -321,6 +331,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
++ nfsd_file_flush(nf);
+ nfsd_file_put_noref(nf);
+ }
+ }
+@@ -334,6 +345,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del(&nf->nf_lru);
++ nfsd_file_flush(nf);
+ if (!refcount_dec_and_test(&nf->nf_ref))
+ continue;
+ if (nfsd_file_free(nf))
+--
+2.43.0
+
--- /dev/null
+From 1d180e859a5c762e6486a8d6ac55fc3b5612bee7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Oct 2021 04:14:22 +0000
+Subject: NFSD:fix boolreturn.cocci warning
+
+From: Changcheng Deng <deng.changcheng@zte.com.cn>
+
+[ Upstream commit 291cd656da04163f4bba67953c1f2f823e0d1231 ]
+
+./fs/nfsd/nfssvc.c: 1072: 8-9: :WARNING return of 0/1 in function
+'nfssvc_decode_voidarg' with return type bool
+
+Return statements in functions returning bool should use true/false
+instead of 1/0.
+
+Reported-by: Zeal Robot <zealci@zte.com.cn>
+Signed-off-by: Changcheng Deng <deng.changcheng@zte.com.cn>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 7df1505425edc..408cff8fe32d3 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1069,7 +1069,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ */
+ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return 1;
++ return true;
+ }
+
+ /**
+--
+2.43.0
+
--- /dev/null
+From 7750ee347c93540138cd96d709be7e893cbe9744 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Sep 2022 12:38:45 -0400
+Subject: nfsd: fix comments about spinlock handling with delegations
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 25fbe1fca14142beae6c882f7906510363d42bff ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 1e9245303c0f2..e98306c69f424 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4870,14 +4870,14 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
+ * We're assuming the state code never drops its reference
+ * without first removing the lease. Since we're in this lease
+ * callback (and since the lease code is serialized by the
+- * i_lock) we know the server hasn't removed the lease yet, and
++ * flc_lock) we know the server hasn't removed the lease yet, and
+ * we know it's safe to take a reference.
+ */
+ refcount_inc(&dp->dl_stid.sc_count);
+ nfsd4_run_cb(&dp->dl_recall);
+ }
+
+-/* Called from break_lease() with i_lock held. */
++/* Called from break_lease() with flc_lock held. */
+ static bool
+ nfsd_break_deleg_cb(struct file_lock *fl)
+ {
+--
+2.43.0
+
--- /dev/null
+From c90bfa00d49618b14e6894cd1582e1281d1a5309 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Feb 2023 13:18:34 -0500
+Subject: nfsd: fix courtesy client with deny mode handling in
+ nfs4_upgrade_open
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit dcd779dc46540e174a6ac8d52fbed23593407317 ]
+
+The nested if statements here make no sense, as you can never reach
+"else" branch in the nested statement. Fix the error handling for
+when there is a courtesy client that holds a conflicting deny mode.
+
+Fixes: 3d6942715180 ("NFSD: add support for share reservation conflict to courteous server")
+Reported-by: 張智諺 <cc85nod@gmail.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index fab5805b3ca74..69bc4622a95a4 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5300,16 +5300,17 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ /* test and set deny mode */
+ spin_lock(&fp->fi_lock);
+ status = nfs4_file_check_deny(fp, open->op_share_deny);
+- if (status == nfs_ok) {
+- if (status != nfserr_share_denied) {
+- set_deny(open->op_share_deny, stp);
+- fp->fi_share_deny |=
+- (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+- } else {
+- if (nfs4_resolve_deny_conflicts_locked(fp, false,
+- stp, open->op_share_deny, false))
+- status = nfserr_jukebox;
+- }
++ switch (status) {
++ case nfs_ok:
++ set_deny(open->op_share_deny, stp);
++ fp->fi_share_deny |=
++ (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
++ break;
++ case nfserr_share_denied:
++ if (nfs4_resolve_deny_conflicts_locked(fp, false,
++ stp, open->op_share_deny, false))
++ status = nfserr_jukebox;
++ break;
+ }
+ spin_unlock(&fp->fi_lock);
+
+--
+2.43.0
+
--- /dev/null
+From 2de1e1cf75de34f230374b9a843b088210c2f536 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Jun 2023 17:09:06 -0400
+Subject: nfsd: Fix creation time serialization order
+
+From: Tavian Barnes <tavianator@tavianator.com>
+
+[ Upstream commit d7dbed457c2ef83709a2a2723a2d58de43623449 ]
+
+In nfsd4_encode_fattr(), TIME_CREATE was being written out after all
+other times. However, they should be written out in an order that
+matches the bit flags in bmval1, which in this case are
+
+ #define FATTR4_WORD1_TIME_ACCESS (1UL << 15)
+ #define FATTR4_WORD1_TIME_CREATE (1UL << 18)
+ #define FATTR4_WORD1_TIME_DELTA (1UL << 19)
+ #define FATTR4_WORD1_TIME_METADATA (1UL << 20)
+ #define FATTR4_WORD1_TIME_MODIFY (1UL << 21)
+
+so TIME_CREATE should come second.
+
+I noticed this on a FreeBSD NFSv4.2 client, which supports creation
+times. On this client, file times were weirdly permuted. With this
+patch applied on the server, times looked normal on the client.
+
+Fixes: e377a3e698fb ("nfsd: Add support for the birth time attribute")
+Link: https://unix.stackexchange.com/q/749605/56202
+Signed-off-by: Tavian Barnes <tavianator@tavianator.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index c40876daf60c0..5b95499a1f344 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3365,6 +3365,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ if (status)
+ goto out;
+ }
++ if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
++ status = nfsd4_encode_nfstime4(xdr, &stat.btime);
++ if (status)
++ goto out;
++ }
+ if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
+@@ -3381,11 +3386,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ if (status)
+ goto out;
+ }
+- if (bmval1 & FATTR4_WORD1_TIME_CREATE) {
+- status = nfsd4_encode_nfstime4(xdr, &stat.btime);
+- if (status)
+- goto out;
+- }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ u64 ino = stat.ino;
+
+--
+2.43.0
+
--- /dev/null
+From e031769045980e654c47f69e4b8e77838920debb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 May 2023 14:35:55 +0300
+Subject: nfsd: fix double fget() bug in __write_ports_addfd()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit c034203b6a9dae6751ef4371c18cb77983e30c28 ]
+
+The bug here is that you cannot rely on getting the same socket
+from multiple calls to fget() because userspace can influence
+that. This is a kind of double fetch bug.
+
+The fix is to delete the svc_alien_sock() function and instead do
+the checking inside the svc_addsock() function.
+
+Fixes: 3064639423c4 ("nfsd: check passed socket's net matches NFSd superblock's one")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: NeilBrown <neilb@suse.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 7 +------
+ include/linux/sunrpc/svcsock.h | 7 +++----
+ net/sunrpc/svcsock.c | 24 ++++++------------------
+ 3 files changed, 10 insertions(+), 28 deletions(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index c2577ee7ffb22..76a60e7a75097 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -714,16 +714,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
+ if (err != 0 || fd < 0)
+ return -EINVAL;
+
+- if (svc_alien_sock(net, fd)) {
+- printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
+- return -EINVAL;
+- }
+-
+ err = nfsd_create_serv(net);
+ if (err != 0)
+ return err;
+
+- err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
++ err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+
+ if (err >= 0 &&
+ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
+diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
+index bcc555c7ae9c6..13aff355d5a13 100644
+--- a/include/linux/sunrpc/svcsock.h
++++ b/include/linux/sunrpc/svcsock.h
+@@ -59,10 +59,9 @@ int svc_recv(struct svc_rqst *, long);
+ int svc_send(struct svc_rqst *);
+ void svc_drop(struct svc_rqst *);
+ void svc_sock_update_bufs(struct svc_serv *serv);
+-bool svc_alien_sock(struct net *net, int fd);
+-int svc_addsock(struct svc_serv *serv, const int fd,
+- char *name_return, const size_t len,
+- const struct cred *cred);
++int svc_addsock(struct svc_serv *serv, struct net *net,
++ const int fd, char *name_return, const size_t len,
++ const struct cred *cred);
+ void svc_init_xprt_sock(void);
+ void svc_cleanup_xprt_sock(void);
+ struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index be7081284a098..112236dd72901 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -1334,25 +1334,10 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
+ return svsk;
+ }
+
+-bool svc_alien_sock(struct net *net, int fd)
+-{
+- int err;
+- struct socket *sock = sockfd_lookup(fd, &err);
+- bool ret = false;
+-
+- if (!sock)
+- goto out;
+- if (sock_net(sock->sk) != net)
+- ret = true;
+- sockfd_put(sock);
+-out:
+- return ret;
+-}
+-EXPORT_SYMBOL_GPL(svc_alien_sock);
+-
+ /**
+ * svc_addsock - add a listener socket to an RPC service
+ * @serv: pointer to RPC service to which to add a new listener
++ * @net: caller's network namespace
+ * @fd: file descriptor of the new listener
+ * @name_return: pointer to buffer to fill in with name of listener
+ * @len: size of the buffer
+@@ -1362,8 +1347,8 @@ EXPORT_SYMBOL_GPL(svc_alien_sock);
+ * Name is terminated with '\n'. On error, returns a negative errno
+ * value.
+ */
+-int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
+- const size_t len, const struct cred *cred)
++int svc_addsock(struct svc_serv *serv, struct net *net, const int fd,
++ char *name_return, const size_t len, const struct cred *cred)
+ {
+ int err = 0;
+ struct socket *so = sockfd_lookup(fd, &err);
+@@ -1374,6 +1359,9 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
+
+ if (!so)
+ return err;
++ err = -EINVAL;
++ if (sock_net(so->sk) != net)
++ goto out;
+ err = -EAFNOSUPPORT;
+ if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
+ goto out;
+--
+2.43.0
+
--- /dev/null
+From a7c3b3f76794f3eae7503430fb854191ea258dbc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 14:55:56 -0500
+Subject: nfsd: fix handling of cached open files in nfsd4_open codepath
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 0b3a551fa58b4da941efeb209b3770868e2eddd7 ]
+
+Commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a
+regular NFSv4 file") added the ability to cache an open fd over a
+compound. There are a couple of problems with the way this currently
+works:
+
+It's racy, as a newly-created nfsd_file can end up with its PENDING bit
+cleared while the nf is hashed, and the nf_file pointer is still zeroed
+out. Other tasks can find it in this state and they expect to see a
+valid nf_file, and can oops if nf_file is NULL.
+
+Also, there is no guarantee that we'll end up creating a new nfsd_file
+if one is already in the hash. If an extant entry is in the hash with a
+valid nf_file, nfs4_get_vfs_file will clobber its nf_file pointer with
+the value of op_file and the old nf_file will leak.
+
+Fix both issues by making a new nfsd_file_acquirei_opened variant that
+takes an optional file pointer. If one is present when this is called,
+we'll take a new reference to it instead of trying to open the file. If
+the nfsd_file already has a valid nf_file, we'll just ignore the
+optional file and pass the nfsd_file back as-is.
+
+Also rework the tracepoints a bit to allow for an "opened" variant and
+don't try to avoid counting acquisitions in the case where we already
+have a cached open file.
+
+Fixes: fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file")
+Cc: Trond Myklebust <trondmy@hammerspace.com>
+Reported-by: Stanislav Saner <ssaner@redhat.com>
+Reported-and-Tested-by: Ruben Vestergaard <rubenv@drcmr.dk>
+Reported-and-Tested-by: Torkil Svensgaard <torkil@drcmr.dk>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 40 ++++++++++++++++++----------------
+ fs/nfsd/filecache.h | 5 +++--
+ fs/nfsd/nfs4state.c | 16 ++++----------
+ fs/nfsd/trace.h | 52 ++++++++++++---------------------------------
+ 4 files changed, 42 insertions(+), 71 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 140094a44cc40..6a62d95d5ce64 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1070,8 +1070,8 @@ nfsd_file_is_cached(struct inode *inode)
+
+ static __be32
+ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- unsigned int may_flags, struct nfsd_file **pnf,
+- bool open, bool want_gc)
++ unsigned int may_flags, struct file *file,
++ struct nfsd_file **pnf, bool want_gc)
+ {
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_FULL,
+@@ -1146,8 +1146,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
+ out:
+ if (status == nfs_ok) {
+- if (open)
+- this_cpu_inc(nfsd_file_acquisitions);
++ this_cpu_inc(nfsd_file_acquisitions);
+ *pnf = nf;
+ } else {
+ if (refcount_dec_and_test(&nf->nf_ref))
+@@ -1157,20 +1156,23 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ out_status:
+ put_cred(key.cred);
+- if (open)
+- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
++ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ return status;
+
+ open_file:
+ trace_nfsd_file_alloc(nf);
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
+ if (nf->nf_mark) {
+- if (open) {
++ if (file) {
++ get_file(file);
++ nf->nf_file = file;
++ status = nfs_ok;
++ trace_nfsd_file_opened(nf, status);
++ } else {
+ status = nfsd_open_verified(rqstp, fhp, may_flags,
+ &nf->nf_file);
+ trace_nfsd_file_open(nf, status);
+- } else
+- status = nfs_ok;
++ }
+ } else
+ status = nfserr_jukebox;
+ /*
+@@ -1206,7 +1208,7 @@ __be32
+ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+ {
+- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true);
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
+ }
+
+ /**
+@@ -1227,28 +1229,30 @@ __be32
+ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+ {
+- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false);
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false);
+ }
+
+ /**
+- * nfsd_file_create - Get a struct nfsd_file, do not open
++ * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file just created
+ * @may_flags: NFSD_MAY_ settings for the file
++ * @file: cached, already-open file (may be NULL)
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+- * The nfsd_file_object returned by this API is reference-counted
+- * but not garbage-collected. The object is released immediately
+- * one RCU grace period after the final nfsd_file_put().
++ * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
++ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
++ * opening a new one.
+ *
+ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+ * network byte order is returned.
+ */
+ __be32
+-nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- unsigned int may_flags, struct nfsd_file **pnf)
++nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct file *file,
++ struct nfsd_file **pnf)
+ {
+- return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false);
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
+ }
+
+ /*
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index b7efb2c3ddb18..41516a4263ea5 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+-__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- unsigned int may_flags, struct nfsd_file **nfp);
++__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct file *file,
++ struct nfsd_file **nfp);
+ int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
+ #endif /* _FS_NFSD_FILECACHE_H */
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 8bb75adbd4e6a..485e7055e52ec 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5262,18 +5262,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ if (!fp->fi_fds[oflag]) {
+ spin_unlock(&fp->fi_lock);
+
+- if (!open->op_filp) {
+- status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
+- if (status != nfs_ok)
+- goto out_put_access;
+- } else {
+- status = nfsd_file_create(rqstp, cur_fh, access, &nf);
+- if (status != nfs_ok)
+- goto out_put_access;
+- nf->nf_file = open->op_filp;
+- open->op_filp = NULL;
+- trace_nfsd_file_create(rqstp, access, nf);
+- }
++ status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
++ open->op_filp, &nf);
++ if (status != nfs_ok)
++ goto out_put_access;
+
+ spin_lock(&fp->fi_lock);
+ if (!fp->fi_fds[oflag]) {
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 77be39fcb3d44..5fb7e153ca865 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -980,43 +980,6 @@ TRACE_EVENT(nfsd_file_acquire,
+ )
+ );
+
+-TRACE_EVENT(nfsd_file_create,
+- TP_PROTO(
+- const struct svc_rqst *rqstp,
+- unsigned int may_flags,
+- const struct nfsd_file *nf
+- ),
+-
+- TP_ARGS(rqstp, may_flags, nf),
+-
+- TP_STRUCT__entry(
+- __field(const void *, nf_inode)
+- __field(const void *, nf_file)
+- __field(unsigned long, may_flags)
+- __field(unsigned long, nf_flags)
+- __field(unsigned long, nf_may)
+- __field(unsigned int, nf_ref)
+- __field(u32, xid)
+- ),
+-
+- TP_fast_assign(
+- __entry->nf_inode = nf->nf_inode;
+- __entry->nf_file = nf->nf_file;
+- __entry->may_flags = may_flags;
+- __entry->nf_flags = nf->nf_flags;
+- __entry->nf_may = nf->nf_may;
+- __entry->nf_ref = refcount_read(&nf->nf_ref);
+- __entry->xid = be32_to_cpu(rqstp->rq_xid);
+- ),
+-
+- TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
+- __entry->xid, __entry->nf_inode,
+- show_nfsd_may_flags(__entry->may_flags),
+- __entry->nf_ref, show_nf_flags(__entry->nf_flags),
+- show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
+- )
+-);
+-
+ TRACE_EVENT(nfsd_file_insert_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+@@ -1078,8 +1041,8 @@ TRACE_EVENT(nfsd_file_cons_err,
+ )
+ );
+
+-TRACE_EVENT(nfsd_file_open,
+- TP_PROTO(struct nfsd_file *nf, __be32 status),
++DECLARE_EVENT_CLASS(nfsd_file_open_class,
++ TP_PROTO(const struct nfsd_file *nf, __be32 status),
+ TP_ARGS(nf, status),
+ TP_STRUCT__entry(
+ __field(void *, nf_inode) /* cannot be dereferenced */
+@@ -1103,6 +1066,17 @@ TRACE_EVENT(nfsd_file_open,
+ __entry->nf_file)
+ )
+
++#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \
++DEFINE_EVENT(nfsd_file_open_class, name, \
++ TP_PROTO( \
++ const struct nfsd_file *nf, \
++ __be32 status \
++ ), \
++ TP_ARGS(nf, status))
++
++DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
++DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened);
++
+ TRACE_EVENT(nfsd_file_is_cached,
+ TP_PROTO(
+ const struct inode *inode,
+--
+2.43.0
+
--- /dev/null
+From e81e8390582e3785befa8380d05f816f1b90ff34 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Dec 2021 16:35:42 +0800
+Subject: NFSD: Fix inconsistent indenting
+
+From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+
+[ Upstream commit 1e37d0e5bda45881eea1bec4b812def72c7d4aea ]
+
+Eliminate the follow smatch warning:
+
+fs/nfsd/nfs4xdr.c:4766 nfsd4_encode_read_plus_hole() warn: inconsistent
+indenting.
+
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 1483cd1b5eed7..506ecfca2338b 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -4807,8 +4807,8 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
+ return nfserr_resource;
+
+ *p++ = htonl(NFS4_CONTENT_HOLE);
+- p = xdr_encode_hyper(p, read->rd_offset);
+- p = xdr_encode_hyper(p, count);
++ p = xdr_encode_hyper(p, read->rd_offset);
++ p = xdr_encode_hyper(p, count);
+
+ *eof = (read->rd_offset + count) >= f_size;
+ *maxcount = min_t(unsigned long, count, *maxcount);
+--
+2.43.0
+
--- /dev/null
+From 257681fe4969f6526b77d8f792dc8c846d0e81ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Jan 2023 21:34:13 -0800
+Subject: NFSD: fix leaked reference count of nfsd4_ssc_umount_item
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 34e8f9ec4c9ac235f917747b23a200a5e0ec857b ]
+
+The reference count of nfsd4_ssc_umount_item is not decremented
+on error conditions. This prevents the laundromat from unmounting
+the vfsmount of the source file.
+
+This patch decrements the reference count of nfsd4_ssc_umount_item
+on error.
+
+Fixes: f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed.")
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index ada46ef5a093d..c95e7ec5fb530 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1813,13 +1813,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ release_copy_files(copy);
+ return status;
+ out_err:
++ if (nfsd4_ssc_is_inter(copy)) {
++ /*
++ * Source's vfsmount of inter-copy will be unmounted
++ * by the laundromat. Use copy instead of async_copy
++ * since async_copy->ss_nsui might not be set yet.
++ */
++ refcount_dec(©->ss_nsui->nsui_refcnt);
++ }
+ if (async_copy)
+ cleanup_async_copy(async_copy);
+ status = nfserrno(-ENOMEM);
+- /*
+- * source's vfsmount of inter-copy will be unmounted
+- * by the laundromat
+- */
+ goto out;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 11ef28bbfbf676536f71ce22281ee0595d78c6d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Oct 2022 09:53:26 -0400
+Subject: NFSD: Fix licensing header in filecache.c
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3f054211b29c0fa06dfdcab402c795fd7e906be1 ]
+
+Add a missing SPDX header.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index d681faf48cf85..b43d2d7ac5957 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1,5 +1,6 @@
++// SPDX-License-Identifier: GPL-2.0
+ /*
+- * Open file cache.
++ * The NFSD open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ *
+--
+2.43.0
+
--- /dev/null
+From a60a513de03015bd19ff7e5f898c44cbdae96015 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Oct 2022 11:49:21 -0400
+Subject: nfsd: fix net-namespace logic in __nfsd_file_cache_purge
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit d3aefd2b29ff5ffdeb5c06a7d3191a027a18cdb8 ]
+
+If the namespace doesn't match the one in "net", then we'll continue,
+but that doesn't cause another rhashtable_walk_next call, so it will
+loop infinitely.
+
+Fixes: ce502f81ba88 ("NFSD: Convert the filecache to use rhashtable")
+Reported-by: Petr Vorel <pvorel@suse.cz>
+Link: https://lore.kernel.org/ltp/Y1%2FP8gDAcWC%2F+VR3@pevik/
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 0b19eb015c6c8..024adcbe67e95 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -892,9 +892,8 @@ __nfsd_file_cache_purge(struct net *net)
+
+ nf = rhashtable_walk_next(&iter);
+ while (!IS_ERR_OR_NULL(nf)) {
+- if (net && nf->nf_net != net)
+- continue;
+- nfsd_file_unhash_and_dispose(nf, &dispose);
++ if (!net || nf->nf_net == net)
++ nfsd_file_unhash_and_dispose(nf, &dispose);
+ nf = rhashtable_walk_next(&iter);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 7da018bc4548cf4b26d04a8330ed63e7be2d4b10 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Sep 2022 16:56:02 -0400
+Subject: nfsd: fix nfsd_file_unhash_and_dispose
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 8d0d254b15cc5b7d46d85fb7ab8ecede9575e672 ]
+
+nfsd_file_unhash_and_dispose() is called for two reasons:
+
+We're either shutting down and purging the filecache, or we've gotten a
+notification about a file delete, so we want to go ahead and unhash it
+so that it'll get cleaned up when we close.
+
+We're either walking the hashtable or doing a lookup in it and we
+don't take a reference in either case. What we want to do in both cases
+is to try and unhash the object and put it on the dispose list if that
+was successful. If it's no longer hashed, then we don't want to touch
+it, with the assumption being that something else is already cleaning
+up the sentinel reference.
+
+Instead of trying to selectively decrement the refcount in this
+function, just unhash it, and if that was successful, move it to the
+dispose list. Then, the disposal routine will just clean that up as
+usual.
+
+Also, just make this a void function, drop the WARN_ON_ONCE, and the
+comments about deadlocking since the nature of the purported deadlock
+is no longer clear.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 36 +++++++-----------------------------
+ 1 file changed, 7 insertions(+), 29 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index fa8e1546e0206..a0d93e797cdce 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -404,22 +404,15 @@ nfsd_file_unhash(struct nfsd_file *nf)
+ return false;
+ }
+
+-/*
+- * Return true if the file was unhashed.
+- */
+-static bool
++static void
+ nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
+ {
+ trace_nfsd_file_unhash_and_dispose(nf);
+- if (!nfsd_file_unhash(nf))
+- return false;
+- /* keep final reference for nfsd_file_lru_dispose */
+- if (refcount_dec_not_one(&nf->nf_ref))
+- return true;
+-
+- nfsd_file_lru_remove(nf);
+- list_add(&nf->nf_lru, dispose);
+- return true;
++ if (nfsd_file_unhash(nf)) {
++ /* caller must call nfsd_file_dispose_list() later */
++ nfsd_file_lru_remove(nf);
++ list_add(&nf->nf_lru, dispose);
++ }
+ }
+
+ static void
+@@ -561,8 +554,6 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
+ * @lock: LRU list lock (unused)
+ * @arg: dispose list
+ *
+- * Note this can deadlock with nfsd_file_cache_purge.
+- *
+ * Return values:
+ * %LRU_REMOVED: @item was removed from the LRU
+ * %LRU_ROTATE: @item is to be moved to the LRU tail
+@@ -747,8 +738,6 @@ nfsd_file_close_inode(struct inode *inode)
+ *
+ * Walk the LRU list and close any entries that have not been used since
+ * the last scan.
+- *
+- * Note this can deadlock with nfsd_file_cache_purge.
+ */
+ static void
+ nfsd_file_delayed_close(struct work_struct *work)
+@@ -890,16 +879,12 @@ nfsd_file_cache_init(void)
+ goto out;
+ }
+
+-/*
+- * Note this can deadlock with nfsd_file_lru_cb.
+- */
+ static void
+ __nfsd_file_cache_purge(struct net *net)
+ {
+ struct rhashtable_iter iter;
+ struct nfsd_file *nf;
+ LIST_HEAD(dispose);
+- bool del;
+
+ rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
+ do {
+@@ -909,14 +894,7 @@ __nfsd_file_cache_purge(struct net *net)
+ while (!IS_ERR_OR_NULL(nf)) {
+ if (net && nf->nf_net != net)
+ continue;
+- del = nfsd_file_unhash_and_dispose(nf, &dispose);
+-
+- /*
+- * Deadlock detected! Something marked this entry as
+- * unhased, but hasn't removed it from the hash list.
+- */
+- WARN_ON_ONCE(!del);
+-
++ nfsd_file_unhash_and_dispose(nf, &dispose);
+ nf = rhashtable_walk_next(&iter);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 89646d28f08ca05d1b163cfdf9e2a35a198ff6b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 May 2022 12:08:45 +0800
+Subject: nfsd: Fix null-ptr-deref in nfsd_fill_super()
+
+From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+
+[ Upstream commit 6f6f84aa215f7b6665ccbb937db50860f9ec2989 ]
+
+KASAN report null-ptr-deref as follows:
+
+ BUG: KASAN: null-ptr-deref in nfsd_fill_super+0xc6/0xe0 [nfsd]
+ Write of size 8 at addr 000000000000005d by task a.out/852
+
+ CPU: 7 PID: 852 Comm: a.out Not tainted 5.18.0-rc7-dirty #66
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
+ Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x44
+ kasan_report+0xab/0x120
+ ? nfsd_mkdir+0x71/0x1c0 [nfsd]
+ ? nfsd_fill_super+0xc6/0xe0 [nfsd]
+ nfsd_fill_super+0xc6/0xe0 [nfsd]
+ ? nfsd_mkdir+0x1c0/0x1c0 [nfsd]
+ get_tree_keyed+0x8e/0x100
+ vfs_get_tree+0x41/0xf0
+ __do_sys_fsconfig+0x590/0x670
+ ? fscontext_read+0x180/0x180
+ ? anon_inode_getfd+0x4f/0x70
+ do_syscall_64+0x35/0x80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+This can be reproduce by concurrent operations:
+ 1. fsopen(nfsd)/fsconfig
+ 2. insmod/rmmod nfsd
+
+Since the nfsd file system is registered before than nfsd_net allocated,
+the caller may get the file_system_type and use the nfsd_net before it
+allocated, then null-ptr-deref occurred.
+
+So init_nfsd() should call register_filesystem() last.
+
+Fixes: bd5ae9288d64 ("nfsd: register pernet ops last, unregister first")
+Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 55949e60897d5..0621c2faf2424 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1535,25 +1535,25 @@ static int __init init_nfsd(void)
+ retval = create_proc_exports_entry();
+ if (retval)
+ goto out_free_lockd;
+- retval = register_filesystem(&nfsd_fs_type);
+- if (retval)
+- goto out_free_exports;
+ retval = register_pernet_subsys(&nfsd_net_ops);
+ if (retval < 0)
+- goto out_free_filesystem;
++ goto out_free_exports;
+ retval = register_cld_notifier();
+ if (retval)
+ goto out_free_subsys;
+ retval = nfsd4_create_laundry_wq();
++ if (retval)
++ goto out_free_cld;
++ retval = register_filesystem(&nfsd_fs_type);
+ if (retval)
+ goto out_free_all;
+ return 0;
+ out_free_all:
++ nfsd4_destroy_laundry_wq();
++out_free_cld:
+ unregister_cld_notifier();
+ out_free_subsys:
+ unregister_pernet_subsys(&nfsd_net_ops);
+-out_free_filesystem:
+- unregister_filesystem(&nfsd_fs_type);
+ out_free_exports:
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
+@@ -1571,6 +1571,7 @@ static int __init init_nfsd(void)
+
+ static void __exit exit_nfsd(void)
+ {
++ unregister_filesystem(&nfsd_fs_type);
+ nfsd4_destroy_laundry_wq();
+ unregister_cld_notifier();
+ unregister_pernet_subsys(&nfsd_net_ops);
+@@ -1581,7 +1582,6 @@ static void __exit exit_nfsd(void)
+ nfsd_lockd_shutdown();
+ nfsd4_free_slabs();
+ nfsd4_exit_pnfs();
+- unregister_filesystem(&nfsd_fs_type);
+ }
+
+ MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+--
+2.43.0
+
--- /dev/null
+From 88d3cdad9129566aba6dc2c775bbdf3ee0c1eb40 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Sep 2023 11:25:00 +1000
+Subject: NFSD: fix possible oops when nfsd/pool_stats is closed.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 88956eabfdea7d01d550535af120d4ef265b1d02 ]
+
+If /proc/fs/nfsd/pool_stats is open when the last nfsd thread exits, then
+when the file is closed a NULL pointer is dereferenced.
+This is because nfsd_pool_stats_release() assumes that the
+pointer to the svc_serv cannot become NULL while a reference is held.
+
+This used to be the case but a recent patch split nfsd_last_thread() out
+from nfsd_put(), and clearing the pointer is done in nfsd_last_thread().
+
+This is easily reproduced by running
+ rpc.nfsd 8 ; ( rpc.nfsd 0;true) < /proc/fs/nfsd/pool_stats
+
+Fortunately nfsd_pool_stats_release() has easy access to the svc_serv
+pointer, and so can call svc_put() on it directly.
+
+Fixes: 9f28a971ee9f ("nfsd: separate nfsd_last_thread() from nfsd_put()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index ee5713fca1870..2a1dd580dfb94 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1084,11 +1084,12 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
+
+ int nfsd_pool_stats_release(struct inode *inode, struct file *file)
+ {
++ struct seq_file *seq = file->private_data;
++ struct svc_serv *serv = seq->private;
+ int ret = seq_release(inode, file);
+- struct net *net = inode->i_sb->s_fs_info;
+
+ mutex_lock(&nfsd_mutex);
+- nfsd_put(net);
++ svc_put(serv);
+ mutex_unlock(&nfsd_mutex);
+ return ret;
+ }
+--
+2.43.0
+
--- /dev/null
+From ad6502a4bc03b8f140d8ab38e6d0a54c275759b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 May 2022 19:49:01 -0400
+Subject: NFSD: Fix potential use-after-free in nfsd_file_put()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit b6c71c66b0ad8f2b59d9bc08c7a5079b110bec01 ]
+
+nfsd_file_put_noref() can free @nf, so don't dereference @nf
+immediately upon return from nfsd_file_put_noref().
+
+Suggested-by: Trond Myklebust <trondmy@hammerspace.com>
+Fixes: 999397926ab3 ("nfsd: Clean up nfsd_file_put()")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 27952e2f3aa14..1d3d13b78be0e 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -302,11 +302,12 @@ nfsd_file_put(struct nfsd_file *nf)
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
+ nfsd_file_flush(nf);
+ nfsd_file_put_noref(nf);
+- } else {
++ } else if (nf->nf_file) {
+ nfsd_file_put_noref(nf);
+- if (nf->nf_file)
+- nfsd_file_schedule_laundrette();
+- }
++ nfsd_file_schedule_laundrette();
++ } else
++ nfsd_file_put_noref(nf);
++
+ if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+ nfsd_file_gc();
+ }
+--
+2.43.0
+
--- /dev/null
+From ea0d226c8795d0fc02a90ce18e08ee13be914cad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Apr 2023 10:53:18 -0700
+Subject: NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 147abcacee33781e75588869e944ddb07528a897 ]
+
+The following request sequence to the same file causes the NFS client and
+server getting into an infinite loop with COMMIT and NFS4ERR_DELAY:
+
+OPEN
+REMOVE
+WRITE
+COMMIT
+
+Problem reported by recall11, recall12, recall14, recall20, recall22,
+recall40, recall42, recall48, recall50 of nfstest suite.
+
+This patch restores the handling of race condition in nfsd_file_do_acquire
+with unlink to that prior of the regression.
+
+Fixes: ac3a2585f018 ("nfsd: rework refcounting in filecache")
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 6b8706f23eaf0..615ea8324911e 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1098,8 +1098,6 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * then unhash.
+ */
+ if (status != nfs_ok || inode->i_nlink == 0)
+- status = nfserr_jukebox;
+- if (status != nfs_ok)
+ nfsd_file_unhash(nf);
+ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ if (status == nfs_ok)
+--
+2.43.0
+
--- /dev/null
+From 135601fe8bddf569c37fc383e2e8fb38a5ddeceb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 Jan 2023 11:12:29 -0800
+Subject: NFSD: fix problems with cleanup on errors in nfsd4_copy
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 81e722978ad21072470b73d8f6a50ad62c7d5b7d ]
+
+When nfsd4_copy fails to allocate memory for async_copy->cp_src, or
+nfs4_init_copy_state fails, it calls cleanup_async_copy to do the
+cleanup for the async_copy which causes page fault since async_copy
+is not yet initialized.
+
+This patche rearranges the order of initializing the fields in
+async_copy and adds checks in cleanup_async_copy to skip un-initialized
+fields.
+
+Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy")
+Fixes: 87689df69491 ("NFSD: Shrink size of struct nfsd4_copy")
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 12 ++++++++----
+ fs/nfsd/nfs4state.c | 5 +++--
+ 2 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index c95e7ec5fb530..ba53cd89ec62c 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1687,9 +1687,12 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
+ {
+ nfs4_free_copy_state(copy);
+ release_copy_files(copy);
+- spin_lock(©->cp_clp->async_lock);
+- list_del(©->copies);
+- spin_unlock(©->cp_clp->async_lock);
++ if (copy->cp_clp) {
++ spin_lock(©->cp_clp->async_lock);
++ if (!list_empty(©->copies))
++ list_del_init(©->copies);
++ spin_unlock(©->cp_clp->async_lock);
++ }
+ nfs4_put_copy(copy);
+ }
+
+@@ -1786,12 +1789,13 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+ if (!async_copy)
+ goto out_err;
++ INIT_LIST_HEAD(&async_copy->copies);
++ refcount_set(&async_copy->refcount, 1);
+ async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
+ if (!async_copy->cp_src)
+ goto out_err;
+ if (!nfs4_init_copy_state(nn, copy))
+ goto out_err;
+- refcount_set(&async_copy->refcount, 1);
+ memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid,
+ sizeof(copy->cp_res.cb_stateid));
+ dup_copy_fields(copy, async_copy);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 773971a75b62d..fab5805b3ca74 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -990,7 +990,6 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
+
+ stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+ stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+- stid->cs_type = cs_type;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&nn->s2s_cp_lock);
+@@ -1001,6 +1000,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
+ idr_preload_end();
+ if (new_id < 0)
+ return 0;
++ stid->cs_type = cs_type;
+ return 1;
+ }
+
+@@ -1034,7 +1034,8 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy)
+ {
+ struct nfsd_net *nn;
+
+- WARN_ON_ONCE(copy->cp_stateid.cs_type != NFS4_COPY_STID);
++ if (copy->cp_stateid.cs_type != NFS4_COPY_STID)
++ return;
+ nn = net_generic(copy->cp_clp->net, nfsd_net_id);
+ spin_lock(&nn->s2s_cp_lock);
+ idr_remove(&nn->s2s_cp_stateids,
+--
+2.43.0
+
--- /dev/null
+From c334501aa7e307ab69c161b35e66c1c3e60a3091 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Nov 2022 14:14:32 -0500
+Subject: NFSD: Fix reads with a non-zero offset that don't end on a page
+ boundary
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit ac8db824ead0de2e9111337c401409d010fba2f0 ]
+
+This was found when virtual machines with nfs-mounted qcow2 disks
+failed to boot properly.
+
+Reported-by: Anders Blomdell <anders.blomdell@control.lth.se>
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2142132
+Fixes: bfbfb6182ad1 ("nfsd_splice_actor(): handle compound pages")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index d17377148b669..9215350ad095c 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -872,11 +872,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct svc_rqst *rqstp = sd->u.data;
+ struct page *page = buf->page; // may be a compound one
+ unsigned offset = buf->offset;
+- int i;
++ struct page *last_page;
+
+- page += offset / PAGE_SIZE;
+- for (i = sd->len; i > 0; i -= PAGE_SIZE)
+- svc_rqst_replace_page(rqstp, page++);
++ last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
++ for (page += offset / PAGE_SIZE; page <= last_page; page++)
++ svc_rqst_replace_page(rqstp, page);
+ if (rqstp->rq_res.page_len == 0) // first call
+ rqstp->rq_res.page_base = offset % PAGE_SIZE;
+ rqstp->rq_res.page_len += sd->len;
+--
+2.43.0
+
--- /dev/null
+From b464d10764d55d06cb1a7a2dedf61815edc4bb32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 12:08:40 +1000
+Subject: NFSD: fix regression with setting ACLs.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 00801cd92d91e94aa04d687f9bb9a9104e7c3d46 ]
+
+A recent patch moved ACL setting into nfsd_setattr().
+Unfortunately it didn't work as nfsd_setattr() aborts early if
+iap->ia_valid is 0.
+
+Remove this test, and instead avoid calling notify_change() when
+ia_valid is 0.
+
+This means that nfsd_setattr() will now *always* lock the inode.
+Previously it didn't if only a ATTR_MODE change was requested on a
+symlink (see Commit 15b7a1b86d66 ("[PATCH] knfsd: fix setattr-on-symlink
+error return")). I don't think this change really matters.
+
+Fixes: c0cbe70742f4 ("NFSD: add posix ACLs to struct nfsd_attrs")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 66d4a126f20ab..ad689215b1f37 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -299,6 +299,10 @@ commit_metadata(struct svc_fh *fhp)
+ static void
+ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
+ {
++ /* Ignore mode updates on symlinks */
++ if (S_ISLNK(inode->i_mode))
++ iap->ia_valid &= ~ATTR_MODE;
++
+ /* sanitize the mode change */
+ if (iap->ia_valid & ATTR_MODE) {
+ iap->ia_mode &= S_IALLUGO;
+@@ -354,7 +358,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+- int host_err;
++ int host_err = 0;
+ bool get_write_count;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
+
+@@ -392,13 +396,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ dentry = fhp->fh_dentry;
+ inode = d_inode(dentry);
+
+- /* Ignore any mode updates on symlinks */
+- if (S_ISLNK(inode->i_mode))
+- iap->ia_valid &= ~ATTR_MODE;
+-
+- if (!iap->ia_valid)
+- return 0;
+-
+ nfsd_sanitize_attrs(inode, iap);
+
+ if (check_guard && guardtime != inode->i_ctime.tv_sec)
+@@ -449,8 +446,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto out_unlock;
+ }
+
+- iap->ia_valid |= ATTR_CTIME;
+- host_err = notify_change(&init_user_ns, dentry, iap, NULL);
++ if (iap->ia_valid) {
++ iap->ia_valid |= ATTR_CTIME;
++ host_err = notify_change(&init_user_ns, dentry, iap, NULL);
++ }
+
+ out_unlock:
+ if (attr->na_seclabel && attr->na_seclabel->len)
+--
+2.43.0
+
--- /dev/null
+From cd7436f74e870f3d610095ee2a2f850ff56642d7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Jan 2024 14:58:16 +1100
+Subject: nfsd: fix RELEASE_LOCKOWNER
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ]
+
+The test on so_count in nfsd4_release_lockowner() is nonsense and
+harmful. Revert to using check_for_locks(), changing that to not sleep.
+
+First: harmful.
+As is documented in the kdoc comment for nfsd4_release_lockowner(), the
+test on so_count can transiently return a false positive resulting in a
+return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
+clearly a protocol violation and with the Linux NFS client it can cause
+incorrect behaviour.
+
+If RELEASE_LOCKOWNER is sent while some other thread is still
+processing a LOCK request which failed because, at the time that request
+was received, the given owner held a conflicting lock, then the nfsd
+thread processing that LOCK request can hold a reference (conflock) to
+the lock owner that causes nfsd4_release_lockowner() to return an
+incorrect error.
+
+The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
+never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
+it knows that the error is impossible. It assumes the lock owner was in
+fact released so it feels free to use the same lock owner identifier in
+some later locking request.
+
+When it does reuse a lock owner identifier for which a previous RELEASE
+failed, it will naturally use a lock_seqid of zero. However the server,
+which didn't release the lock owner, will expect a larger lock_seqid and
+so will respond with NFS4ERR_BAD_SEQID.
+
+So clearly it is harmful to allow a false positive, which testing
+so_count allows.
+
+The test is nonsense because ... well... it doesn't mean anything.
+
+so_count is the sum of three different counts.
+1/ the set of states listed on so_stateids
+2/ the set of active vfs locks owned by any of those states
+3/ various transient counts such as for conflicting locks.
+
+When it is tested against '2' it is clear that one of these is the
+transient reference obtained by find_lockowner_str_locked(). It is not
+clear what the other one is expected to be.
+
+In practice, the count is often 2 because there is precisely one state
+on so_stateids. If there were more, this would fail.
+
+In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
+In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
+all the lock states being removed, and so the lockowner being discarded
+(it is removed when there are no more references which usually happens
+when the lock state is discarded). When nfsd4_release_lockowner() finds
+that the lock owner doesn't exist, it returns success.
+
+The other case shows an so_count of '2' and precisely one state listed
+in so_stateid. It appears that the Linux client uses a separate lock
+owner for each file resulting in one lock state per lock owner, so this
+test on '2' is safe. For another client it might not be safe.
+
+So this patch changes check_for_locks() to use the (newish)
+find_any_file_locked() so that it doesn't take a reference on the
+nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
+this check is it safe to restore the use of check_for_locks() rather
+than testing so_count against the mysterious '2'.
+
+Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 4d95b2052c31a..64a8567ea4c40 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7831,14 +7831,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ {
+ struct file_lock *fl;
+ int status = false;
+- struct nfsd_file *nf = find_any_file(fp);
++ struct nfsd_file *nf;
+ struct inode *inode;
+ struct file_lock_context *flctx;
+
++ spin_lock(&fp->fi_lock);
++ nf = find_any_file_locked(fp);
+ if (!nf) {
+ /* Any valid lock stateid should have some sort of access */
+ WARN_ON_ONCE(1);
+- return status;
++ goto out;
+ }
+
+ inode = locks_inode(nf->nf_file);
+@@ -7854,7 +7856,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ }
+ spin_unlock(&flctx->flc_lock);
+ }
+- nfsd_file_put(nf);
++out:
++ spin_unlock(&fp->fi_lock);
+ return status;
+ }
+
+@@ -7864,10 +7867,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ * @cstate: NFSv4 COMPOUND state
+ * @u: RELEASE_LOCKOWNER arguments
+ *
+- * The lockowner's so_count is bumped when a lock record is added
+- * or when copying a conflicting lock. The latter case is brief,
+- * but can lead to fleeting false positives when looking for
+- * locks-in-use.
++ * Check if theree are any locks still held and if not - free the lockowner
++ * and any lock state that is owned.
+ *
+ * Return values:
+ * %nfs_ok: lockowner released or not found
+@@ -7903,10 +7904,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ spin_unlock(&clp->cl_lock);
+ return nfs_ok;
+ }
+- if (atomic_read(&lo->lo_owner.so_count) != 2) {
+- spin_unlock(&clp->cl_lock);
+- nfs4_put_stateowner(&lo->lo_owner);
+- return nfserr_locks_held;
++
++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
++ if (check_for_locks(stp->st_stid.sc_file, lo)) {
++ spin_unlock(&clp->cl_lock);
++ nfs4_put_stateowner(&lo->lo_owner);
++ return nfserr_locks_held;
++ }
+ }
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+--
+2.43.0
+
--- /dev/null
+From 83d9cc3ff1602332d08f47c3d9e415a86ae9a120 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 16:20:05 +0800
+Subject: NFSD: Fix space and spelling mistake
+
+From: Zhang Jiaming <jiaming@nfschina.com>
+
+[ Upstream commit f532c9ff103897be0e2a787c0876683c3dc39ed3 ]
+
+Add a blank space after ','.
+Change 'succesful' to 'successful'.
+
+Signed-off-by: Zhang Jiaming <jiaming@nfschina.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 5b56877c7fb57..d70c4e78f0b3f 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -828,7 +828,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ goto out_umask;
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+- &create->cr_iattr,S_IFCHR, rdev, &resfh);
++ &create->cr_iattr, S_IFCHR, rdev, &resfh);
+ break;
+
+ case NF4SOCK:
+@@ -2712,7 +2712,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
+ /*
+ * Don't execute this op if we couldn't encode a
+- * succesful reply:
++ * successful reply:
+ */
+ u32 plen = op->opdesc->op_rsize_bop(rqstp, op);
+ /*
+--
+2.43.0
+
--- /dev/null
+From 27ead6558d77e8f3cb20aa8a82e5f9a847bbcfec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:03 -0400
+Subject: NFSD: Fix strncpy() fortify warning
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5304877936c0a67e1a01464d113bae4c81eacdb6 ]
+
+In function ‘strncpy’,
+ inlined from ‘nfsd4_ssc_setup_dul’ at /home/cel/src/linux/manet/fs/nfsd/nfs4proc.c:1392:3,
+ inlined from ‘nfsd4_interssc_connect’ at /home/cel/src/linux/manet/fs/nfsd/nfs4proc.c:1489:11:
+/home/cel/src/linux/manet/include/linux/fortify-string.h:52:33: warning: ‘__builtin_strncpy’ specified bound 63 equals destination size [-Wstringop-truncation]
+ 52 | #define __underlying_strncpy __builtin_strncpy
+ | ^
+/home/cel/src/linux/manet/include/linux/fortify-string.h:89:16: note: in expansion of macro ‘__underlying_strncpy’
+ 89 | return __underlying_strncpy(p, q, size);
+ | ^~~~~~~~~~~~~~~~~~~~
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 +-
+ include/linux/nfs_ssc.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 15991eb9b8d8c..5dce18fe99085 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1392,7 +1392,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ return 0;
+ }
+ if (work) {
+- strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
++ strlcpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
+ refcount_set(&work->nsui_refcnt, 2);
+ work->nsui_busy = true;
+ list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
+diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h
+index 222ae8883e854..75843c00f326a 100644
+--- a/include/linux/nfs_ssc.h
++++ b/include/linux/nfs_ssc.h
+@@ -64,7 +64,7 @@ struct nfsd4_ssc_umount_item {
+ refcount_t nsui_refcnt;
+ unsigned long nsui_expire;
+ struct vfsmount *nsui_vfsmount;
+- char nsui_ipaddr[RPC_MAX_ADDRBUFLEN];
++ char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1];
+ };
+ #endif
+
+--
+2.43.0
+
--- /dev/null
+From bb70f0375facb568b6bdfb69ae65e233dd1df6db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:24 -0400
+Subject: NFSD: Fix the filecache LRU shrinker
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit edead3a55804739b2e4af0f35e9c7326264e7b22 ]
+
+Without LRU item rotation, the shrinker visits only a few items on
+the end of the LRU list, and those would always be long-term OPEN
+files for NFSv4 workloads. That makes the filecache shrinker
+completely ineffective.
+
+Adopt the same strategy as the inode LRU by using LRU_ROTATE.
+
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 5c9e3ff6397b0..849c010c6ef61 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -445,6 +445,7 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
+ *
+ * Return values:
+ * %LRU_REMOVED: @item was removed from the LRU
++ * %LRU_ROTATE: @item is to be moved to the LRU tail
+ * %LRU_SKIP: @item cannot be evicted
+ */
+ static enum lru_status
+@@ -483,7 +484,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+
+ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
+ trace_nfsd_file_gc_referenced(nf);
+- return LRU_SKIP;
++ return LRU_ROTATE;
+ }
+
+ if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+@@ -525,7 +526,7 @@ nfsd_file_gc(void)
+ unsigned long ret;
+
+ ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
+- &dispose, LONG_MAX);
++ &dispose, list_lru_count(&nfsd_file_lru));
+ trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
+ nfsd_file_gc_dispose_list(&dispose);
+ }
+--
+2.43.0
+
--- /dev/null
+From 6cf2813fe46fa851e1ba78c8f285bae81d4d471a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Nov 2022 15:06:07 -0500
+Subject: NFSD: Fix trace_nfsd_fh_verify_err() crasher
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5a01c805441bdc86e7af206d8a03735cc9394ffb ]
+
+Now that the nfsd_fh_verify_err() tracepoint is always called on
+error, it needs to handle cases where the filehandle is not yet
+fully formed.
+
+Fixes: 93c128e709ae ("nfsd: ensure we always call fh_verify_error tracepoint")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/trace.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 1229502b6e9e0..72aa7435d55bd 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -254,7 +254,10 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
+ rqstp->rq_xprt->xpt_remotelen);
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+- __entry->inode = d_inode(fhp->fh_dentry);
++ if (fhp->fh_dentry)
++ __entry->inode = d_inode(fhp->fh_dentry);
++ else
++ __entry->inode = NULL;
+ __entry->type = type;
+ __entry->access = access;
+ __entry->error = be32_to_cpu(error);
+--
+2.43.0
+
--- /dev/null
+From 6f33f5254748be823a885ed8e7164ee9bf96a67c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 14:44:50 -0400
+Subject: nfsd: fix up the filecache laundrette scheduling
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 22ae4c114f77b55a4c5036e8f70409a0799a08f8 ]
+
+We don't really care whether there are hashed entries when it comes to
+scheduling the laundrette. They might all be non-gc entries, after all.
+We only want to schedule it if there are entries on the LRU.
+
+Switch to using list_lru_count, and move the check into
+nfsd_file_gc_worker. The other callsite in nfsd_file_put doesn't need to
+count entries, since it only schedules the laundrette after adding an
+entry to the LRU.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 12 +++++-------
+ 1 file changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index fb7ada3f7410e..522e900a88605 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -210,12 +210,9 @@ static const struct rhashtable_params nfsd_file_rhash_params = {
+ static void
+ nfsd_file_schedule_laundrette(void)
+ {
+- if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) ||
+- test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
+- return;
+-
+- queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+- NFSD_LAUNDRETTE_DELAY);
++ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
++ queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
++ NFSD_LAUNDRETTE_DELAY);
+ }
+
+ static void
+@@ -665,7 +662,8 @@ static void
+ nfsd_file_gc_worker(struct work_struct *work)
+ {
+ nfsd_file_gc();
+- nfsd_file_schedule_laundrette();
++ if (list_lru_count(&nfsd_file_lru))
++ nfsd_file_schedule_laundrette();
+ }
+
+ static unsigned long
+--
+2.43.0
+
--- /dev/null
+From 53f39666e341c24bbacba9b5528de78ff111c2bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 5 Nov 2022 09:49:26 -0400
+Subject: nfsd: fix use-after-free in nfsd_file_do_acquire tracepoint
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit bdd6b5624c62d0acd350d07564f1c82fe649235f ]
+
+When we fail to insert into the hashtable with a non-retryable error,
+we'll free the object and then goto out_status. If the tracepoint is
+enabled, it'll end up accessing the freed object when it tries to
+grab the fields out of it.
+
+Set nf to NULL after freeing it to avoid the issue.
+
+Fixes: 243a5263014a ("nfsd: rework hashtable handling in nfsd_do_file_acquire")
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <error27@gmail.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 024adcbe67e95..dceb522f5cee9 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1075,6 +1075,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto open_file;
+
+ nfsd_file_slab_free(&nf->nf_rcu);
++ nf = NULL;
+ if (ret == -EEXIST)
+ goto retry;
+ trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
+--
+2.43.0
+
--- /dev/null
+From 670891d2427eda6b5a6131a12748841c9c575f15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 19 Mar 2022 21:27:04 +0100
+Subject: nfsd: fix using the correct variable for sizeof()
+
+From: Jakob Koschel <jakobkoschel@gmail.com>
+
+[ Upstream commit 4fc5f5346592cdc91689455d83885b0af65d71b8 ]
+
+While the original code is valid, it is not the obvious choice for the
+sizeof() call and in preparation to limit the scope of the list iterator
+variable the sizeof should be changed to the size of the destination.
+
+Signed-off-by: Jakob Koschel <jakobkoschel@gmail.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4layouts.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
+index 2673019d30ecd..7018d209b784a 100644
+--- a/fs/nfsd/nfs4layouts.c
++++ b/fs/nfsd/nfs4layouts.c
+@@ -421,7 +421,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
+ new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
+ if (!new)
+ return nfserr_jukebox;
+- memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
++ memcpy(&new->lo_seg, seg, sizeof(new->lo_seg));
+ new->lo_state = ls;
+
+ spin_lock(&fp->fi_lock);
+--
+2.43.0
+
--- /dev/null
+From 6c2bfff03e466b6db5c2205144c30f84eb5aa316 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Mar 2022 16:41:32 -0400
+Subject: NFSD: Fix whitespace
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 26320d7e317c37404c811603d50d811132aef78c ]
+
+Clean up: Pull case arms back one tab stop to conform every other
+switch statement in fs/nfsd/nfs4proc.c.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 50 +++++++++++++++++++++++-----------------------
+ 1 file changed, 25 insertions(+), 25 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 611aedeab406b..6a9c7e6a23ba5 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -600,33 +600,33 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ goto out;
+
+ switch (open->op_claim_type) {
+- case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+- case NFS4_OPEN_CLAIM_NULL:
+- status = do_open_lookup(rqstp, cstate, open, &resfh);
+- if (status)
+- goto out;
+- break;
+- case NFS4_OPEN_CLAIM_PREVIOUS:
+- status = nfs4_check_open_reclaim(cstate->clp);
+- if (status)
+- goto out;
+- open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+- reclaim = true;
+- fallthrough;
+- case NFS4_OPEN_CLAIM_FH:
+- case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+- status = do_open_fhandle(rqstp, cstate, open);
+- if (status)
+- goto out;
+- resfh = &cstate->current_fh;
+- break;
+- case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+- case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+- status = nfserr_notsupp;
++ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
++ case NFS4_OPEN_CLAIM_NULL:
++ status = do_open_lookup(rqstp, cstate, open, &resfh);
++ if (status)
+ goto out;
+- default:
+- status = nfserr_inval;
++ break;
++ case NFS4_OPEN_CLAIM_PREVIOUS:
++ status = nfs4_check_open_reclaim(cstate->clp);
++ if (status)
+ goto out;
++ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
++ reclaim = true;
++ fallthrough;
++ case NFS4_OPEN_CLAIM_FH:
++ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
++ status = do_open_fhandle(rqstp, cstate, open);
++ if (status)
++ goto out;
++ resfh = &cstate->current_fh;
++ break;
++ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
++ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
++ status = nfserr_notsupp;
++ goto out;
++ default:
++ status = nfserr_inval;
++ goto out;
+ }
+ /*
+ * nfsd4_process_open2() does the actual opening of the file. If
+--
+2.43.0
+
--- /dev/null
+From bdf7af93e6a41a906ca09819d4b45547640d5edb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Nov 2022 13:30:46 -0400
+Subject: NFSD: Flesh out a documenting comment for filecache.c
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit b3276c1f5b268ff56622e9e125b792b4c3dc03ac ]
+
+Record what we've learned recently about the NFSD filecache in a
+documenting comment so our future selves don't forget what all this
+is for.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 13a25503b80e1..d681faf48cf85 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -2,6 +2,30 @@
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
++ *
++ * An nfsd_file object is a per-file collection of open state that binds
++ * together:
++ * - a struct file *
++ * - a user credential
++ * - a network namespace
++ * - a read-ahead context
++ * - monitoring for writeback errors
++ *
++ * nfsd_file objects are reference-counted. Consumers acquire a new
++ * object via the nfsd_file_acquire API. They manage their interest in
++ * the acquired object, and hence the object's reference count, via
++ * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file
++ * object:
++ *
++ * * non-garbage-collected: When a consumer wants to precisely control
++ * the lifetime of a file's open state, it acquires a non-garbage-
++ * collected nfsd_file. The final nfsd_file_put releases the open
++ * state immediately.
++ *
++ * * garbage-collected: When a consumer does not control the lifetime
++ * of open state, it acquires a garbage-collected nfsd_file. The
++ * final nfsd_file_put allows the open state to linger for a period
++ * during which it may be re-used.
+ */
+
+ #include <linux/hash.h>
+--
+2.43.0
+
--- /dev/null
+From bee689f738cb9ceced8c0cde723f78e038e913d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: NFSD: handle errors better in write_ports_addfd()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 89b24336f03a8ba560e96b0c47a8434a7fa48e3c ]
+
+If write_ports_add() fails, we shouldn't destroy the serv, unless we had
+only just created it. So if there are any permanent sockets already
+attached, leave the serv in place.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index d0761ca8cb542..162866cfe83a2 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -742,7 +742,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
+ return err;
+
+ err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+- if (err < 0) {
++ if (err < 0 && list_empty(&nn->nfsd_serv->sv_permsocks)) {
+ nfsd_destroy(net);
+ return err;
+ }
+--
+2.43.0
+
--- /dev/null
+From 25eb0136578fb98a675a7d81de7f55c61ae21346 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:58 -0400
+Subject: NFSD: Hook up the filecache stat file
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 2e6c6e4c4375bfd3defa5b1ff3604d9f33d1c936 ]
+
+There has always been the capability of exporting filecache metrics
+via /proc, but it was never hooked up. Let's surface these metrics
+to enable better observability of the filecache.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 66c352bf61b1d..7002edbf26870 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -25,6 +25,7 @@
+ #include "state.h"
+ #include "netns.h"
+ #include "pnfs.h"
++#include "filecache.h"
+
+ /*
+ * We have a single directory with several nodes in it.
+@@ -45,6 +46,7 @@ enum {
+ NFSD_Ports,
+ NFSD_MaxBlkSize,
+ NFSD_MaxConnections,
++ NFSD_Filecache,
+ NFSD_SupportedEnctypes,
+ /*
+ * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
+@@ -229,6 +231,13 @@ static const struct file_operations reply_cache_stats_operations = {
+ .release = single_release,
+ };
+
++static const struct file_operations filecache_ops = {
++ .open = nfsd_file_cache_stats_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
++
+ /*----------------------------------------------------------------------------*/
+ /*
+ * payload - write methods
+@@ -1370,6 +1379,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
+ [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
++ [NFSD_Filecache] = {"filecache", &filecache_ops, S_IRUGO},
+ #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
+ [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
+ #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
+--
+2.43.0
+
--- /dev/null
+From b59fadb3b01eca42885ae89b2c24e54302b0566b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 07:47:54 -0400
+Subject: nfsd: ignore requests to disable unsupported versions
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 8e823bafff2308753d430566256c83d8085952da ]
+
+The kernel currently errors out if you attempt to enable or disable a
+version that it doesn't recognize. Change it to ignore attempts to
+disable an unrecognized version. If we don't support it, then there is
+no harm in doing so.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Tom Talpey <tom@talpey.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index dc74a947a440c..68ed42fd29fc8 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -601,7 +601,9 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
+ }
+ break;
+ default:
+- return -EINVAL;
++ /* Ignore requests to disable non-existent versions */
++ if (cmd == NFSD_SET)
++ return -EINVAL;
+ }
+ vers += len + 1;
+ } while ((len = qword_get(&mesg, vers, size)) > 0);
+--
+2.43.0
+
--- /dev/null
+From 008352c0f626345a40e8afee1601ca8ba42efc7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Dec 2021 17:32:21 -0500
+Subject: nfsd: improve stateid access bitmask documentation
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+[ Upstream commit 3dcd1d8aab00c5d3a0a3725253c86440b1a0f5a7 ]
+
+The use of the bitmaps is confusing. Add a cross-reference to make it
+easier to find the existing comment. Add an updated reference with URL
+to make it quicker to look up. And a bit more editorializing about the
+value of this.
+
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 14 ++++++++++----
+ fs/nfsd/state.h | 4 ++++
+ 2 files changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index e14b38d6751d8..f7e2beded6d7f 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -360,11 +360,13 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used. This allows us
+- * to enforce the recommendation of rfc 3530 14.2.19 that the server
+- * return an error if the client attempt to downgrade to a combination
+- * of share bits not explicable by closing some of its previous opens.
++ * to enforce the recommendation in
++ * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that
++ * the server return an error if the client attempt to downgrade to a
++ * combination of share bits not explicable by closing some of its
++ * previous opens.
+ *
+- * XXX: This enforcement is actually incomplete, since we don't keep
++ * This enforcement is arguably incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ * OPEN allow read, deny write
+@@ -372,6 +374,10 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ * DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
++ *
++ * But you could also argue that our current code is already overkill,
++ * since it only exists to return NFS4ERR_INVAL on incorrect client
++ * behavior.
+ */
+ static unsigned int
+ bmap_to_share_mode(unsigned long bmap)
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index e73bdbb1634ab..6eb3c7157214b 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -568,6 +568,10 @@ struct nfs4_ol_stateid {
+ struct list_head st_locks;
+ struct nfs4_stateowner *st_stateowner;
+ struct nfs4_clnt_odstate *st_clnt_odstate;
++/*
++ * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the
++ * comment above bmap_to_share_mode() for explanation:
++ */
+ unsigned char st_access_bmap;
+ unsigned char st_deny_bmap;
+ struct nfs4_ol_stateid *st_openstp;
+--
+2.43.0
+
--- /dev/null
+From 724b9b2e83f4f9982a0656bdab45870dd7ebdd8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Sep 2022 18:18:16 -0400
+Subject: NFSD: Increase NFSD_MAX_OPS_PER_COMPOUND
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 80e591ce636f3ae6855a0ca26963da1fdd6d4508 ]
+
+When attempting an NFSv4 mount, a Solaris NFSv4 client builds a
+single large COMPOUND that chains a series of LOOKUPs to get to the
+pseudo filesystem root directory that is to be mounted. The Linux
+NFS server's current maximum of 16 operations per NFSv4 COMPOUND is
+not large enough to ensure that this works for paths that are more
+than a few components deep.
+
+Since NFSD_MAX_OPS_PER_COMPOUND is mostly a sanity check, and most
+NFSv4 COMPOUNDS are between 3 and 6 operations (thus they do not
+trigger any re-allocation of the operation array on the server),
+increasing this maximum should result in little to no impact.
+
+The ops array can get large now, so allocate it via vmalloc() to
+help ensure memory fragmentation won't cause an allocation failure.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216383
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 7 ++++---
+ fs/nfsd/state.h | 2 +-
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 5aafbd0f7ae30..0f30d93577e7b 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -42,6 +42,8 @@
+ #include <linux/sunrpc/svcauth_gss.h>
+ #include <linux/sunrpc/addr.h>
+ #include <linux/xattr.h>
++#include <linux/vmalloc.h>
++
+ #include <uapi/linux/xattr.h>
+
+ #include "idmap.h"
+@@ -2364,10 +2366,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ NFSD_MAX_OPS_PER_COMPOUND);
+
+ if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
+- argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
++ argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
+ if (!argp->ops) {
+ argp->ops = argp->iops;
+- dprintk("nfsd: couldn't allocate room for COMPOUND\n");
+ return false;
+ }
+ }
+@@ -5399,7 +5400,7 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
+ if (args->ops != args->iops) {
+- kfree(args->ops);
++ vfree(args->ops);
+ args->ops = args->iops;
+ }
+ while (args->to_free) {
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index ae596dbf86675..5d28beb290fef 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -175,7 +175,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
+ /* Maximum number of slots per session. 160 is useful for long haul TCP */
+ #define NFSD_MAX_SLOTS_PER_SESSION 160
+ /* Maximum number of operations per session compound */
+-#define NFSD_MAX_OPS_PER_COMPOUND 16
++#define NFSD_MAX_OPS_PER_COMPOUND 50
+ /* Maximum session per slot cache size */
+ #define NFSD_SLOT_CACHE_SIZE 2048
+ /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+--
+2.43.0
+
--- /dev/null
+From a2fc17ce259714eca36b6f6dac7d47e9ed974ab1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 25 Sep 2021 23:58:41 +0100
+Subject: NFSD: Initialize pointer ni with NULL and not plain integer 0
+
+From: Colin Ian King <colin.king@canonical.com>
+
+[ Upstream commit 8e70bf27fd20cc17e87150327a640e546bfbee64 ]
+
+Pointer ni is being initialized with plain integer zero. Fix
+this by initializing with NULL.
+
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/nfs4state.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index f5ac637b6e83d..bc7ae9a8604ec 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1184,7 +1184,7 @@ extern void nfs_sb_deactive(struct super_block *sb);
+ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt)
+ {
+- struct nfsd4_ssc_umount_item *ni = 0;
++ struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd4_ssc_umount_item *work = NULL;
+ struct nfsd4_ssc_umount_item *tmp;
+ DEFINE_WAIT(wait);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 26c4212bcfcde..e14b38d6751d8 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5573,7 +5573,7 @@ static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
+ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+ {
+ bool do_wakeup = false;
+- struct nfsd4_ssc_umount_item *ni = 0;
++ struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd4_ssc_umount_item *tmp;
+
+ spin_lock(&nn->nfsd_ssc_lock);
+--
+2.43.0
+
--- /dev/null
+From 275c4a30df85ed595a2c01acfcd73f87d95071b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Mar 2022 10:30:54 -0400
+Subject: NFSD: Instantiate a struct file when creating a regular NFSv4 file
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit fb70bf124b051d4ded4ce57511dfec6d3ebf2b43 ]
+
+There have been reports of races that cause NFSv4 OPEN(CREATE) to
+return an error even though the requested file was created. NFSv4
+does not provide a status code for this case.
+
+To mitigate some of these problems, reorganize the NFSv4
+OPEN(CREATE) logic to allocate resources before the file is actually
+created, and open the new file while the parent directory is still
+locked.
+
+Two new APIs are added:
+
++ Add an API that works like nfsd_file_acquire() but does not open
+the underlying file. The OPEN(CREATE) path can use this API when it
+already has an open file.
+
++ Add an API that is kin to dentry_open(). NFSD needs to create a
+file and grab an open "struct file *" atomically. The
+alloc_empty_file() has to be done before the inode create. If it
+fails (for example, because the NFS server has exceeded its
+max_files limit), we avoid creating the file and can still return
+an error to the NFS client.
+
+BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=382
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: JianHong Yin <jiyin@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 51 ++++++++++++++++++++++++++++++++++++++-------
+ fs/nfsd/filecache.h | 2 ++
+ fs/nfsd/nfs4proc.c | 43 ++++++++++++++++++++++++++++++++++----
+ fs/nfsd/nfs4state.c | 16 +++++++++++---
+ fs/nfsd/xdr4.h | 1 +
+ fs/open.c | 42 +++++++++++++++++++++++++++++++++++++
+ include/linux/fs.h | 2 ++
+ 7 files changed, 143 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 0f6553b316f58..045f5a869ddc7 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -892,9 +892,9 @@ nfsd_file_is_cached(struct inode *inode)
+ return ret;
+ }
+
+-__be32
+-nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- unsigned int may_flags, struct nfsd_file **pnf)
++static __be32
++nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct nfsd_file **pnf, bool open)
+ {
+ __be32 status;
+ struct net *net = SVC_NET(rqstp);
+@@ -975,10 +975,13 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ nfsd_file_gc();
+
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+- if (nf->nf_mark)
+- status = nfsd_open_verified(rqstp, fhp, may_flags,
+- &nf->nf_file);
+- else
++ if (nf->nf_mark) {
++ if (open)
++ status = nfsd_open_verified(rqstp, fhp, may_flags,
++ &nf->nf_file);
++ else
++ status = nfs_ok;
++ } else
+ status = nfserr_jukebox;
+ /*
+ * If construction failed, or we raced with a call to unlink()
+@@ -998,6 +1001,40 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto out;
+ }
+
++/**
++ * nfsd_file_acquire - Get a struct nfsd_file with an open file
++ * @rqstp: the RPC transaction being executed
++ * @fhp: the NFS filehandle of the file to be opened
++ * @may_flags: NFSD_MAY_ settings for the file
++ * @pnf: OUT: new or found "struct nfsd_file" object
++ *
++ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
++ * network byte order is returned.
++ */
++__be32
++nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct nfsd_file **pnf)
++{
++ return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
++}
++
++/**
++ * nfsd_file_create - Get a struct nfsd_file, do not open
++ * @rqstp: the RPC transaction being executed
++ * @fhp: the NFS filehandle of the file just created
++ * @may_flags: NFSD_MAY_ settings for the file
++ * @pnf: OUT: new or found "struct nfsd_file" object
++ *
++ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
++ * network byte order is returned.
++ */
++__be32
++nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct nfsd_file **pnf)
++{
++ return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
++}
++
+ /*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 63104be2865c5..c9e3c6eb4776e 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -57,5 +57,7 @@ void nfsd_file_close_inode_sync(struct inode *inode);
+ bool nfsd_file_is_cached(struct inode *inode);
+ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
++__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ unsigned int may_flags, struct nfsd_file **nfp);
+ int nfsd_file_cache_stats_open(struct inode *, struct file *);
+ #endif /* _FS_NFSD_FILECACHE_H */
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 489cdcd8f8c9a..f0cb92466da84 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -243,6 +243,37 @@ static inline bool nfsd4_create_is_exclusive(int createmode)
+ createmode == NFS4_CREATE_EXCLUSIVE4_1;
+ }
+
++static __be32
++nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
++ struct nfsd4_open *open)
++{
++ struct file *filp;
++ struct path path;
++ int oflags;
++
++ oflags = O_CREAT | O_LARGEFILE;
++ switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) {
++ case NFS4_SHARE_ACCESS_WRITE:
++ oflags |= O_WRONLY;
++ break;
++ case NFS4_SHARE_ACCESS_BOTH:
++ oflags |= O_RDWR;
++ break;
++ default:
++ oflags |= O_RDONLY;
++ }
++
++ path.mnt = fhp->fh_export->ex_path.mnt;
++ path.dentry = child;
++ filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
++ current_cred());
++ if (IS_ERR(filp))
++ return nfserrno(PTR_ERR(filp));
++
++ open->op_filp = filp;
++ return nfs_ok;
++}
++
+ /*
+ * Implement NFSv4's unchecked, guarded, and exclusive create
+ * semantics for regular files. Open state for this new file is
+@@ -355,11 +386,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
+- host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
+- if (host_err < 0) {
+- status = nfserrno(host_err);
++ status = nfsd4_vfs_create(fhp, child, open);
++ if (status != nfs_ok)
+ goto out;
+- }
+ open->op_created = true;
+
+ /* A newly created file already has a file size of zero. */
+@@ -517,6 +546,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ (int)open->op_fnamelen, open->op_fname,
+ open->op_openowner);
+
++ open->op_filp = NULL;
++
+ /* This check required by spec. */
+ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
+ return nfserr_inval;
+@@ -613,6 +644,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (reclaim && !status)
+ nn->somebody_reclaimed = true;
+ out:
++ if (open->op_filp) {
++ fput(open->op_filp);
++ open->op_filp = NULL;
++ }
+ if (resfh && resfh != &cstate->current_fh) {
+ fh_dup2(&cstate->current_fh, resfh);
+ fh_put(resfh);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 5bbf769b688bc..f9681a4d116ad 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5110,9 +5110,19 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+
+ if (!fp->fi_fds[oflag]) {
+ spin_unlock(&fp->fi_lock);
+- status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
+- if (status)
+- goto out_put_access;
++
++ if (!open->op_filp) {
++ status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
++ if (status != nfs_ok)
++ goto out_put_access;
++ } else {
++ status = nfsd_file_create(rqstp, cur_fh, access, &nf);
++ if (status != nfs_ok)
++ goto out_put_access;
++ nf->nf_file = open->op_filp;
++ open->op_filp = NULL;
++ }
++
+ spin_lock(&fp->fi_lock);
+ if (!fp->fi_fds[oflag]) {
+ fp->fi_fds[oflag] = nf;
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 4f1090c32c29b..77286e8c9ab02 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -273,6 +273,7 @@ struct nfsd4_open {
+ bool op_truncate; /* used during processing */
+ bool op_created; /* used during processing */
+ struct nfs4_openowner *op_openowner; /* used during processing */
++ struct file *op_filp; /* used during processing */
+ struct nfs4_file *op_file; /* used during processing */
+ struct nfs4_ol_stateid *op_stp; /* used during processing */
+ struct nfs4_clnt_odstate *op_odstate; /* used during processing */
+diff --git a/fs/open.c b/fs/open.c
+index 159a2765b7eb2..43e5ca4324bc0 100644
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -979,6 +979,48 @@ struct file *dentry_open(const struct path *path, int flags,
+ }
+ EXPORT_SYMBOL(dentry_open);
+
++/**
++ * dentry_create - Create and open a file
++ * @path: path to create
++ * @flags: O_ flags
++ * @mode: mode bits for new file
++ * @cred: credentials to use
++ *
++ * Caller must hold the parent directory's lock, and have prepared
++ * a negative dentry, placed in @path->dentry, for the new file.
++ *
++ * Caller sets @path->mnt to the vfsmount of the filesystem where
++ * the new file is to be created. The parent directory and the
++ * negative dentry must reside on the same filesystem instance.
++ *
++ * On success, returns a "struct file *". Otherwise a ERR_PTR
++ * is returned.
++ */
++struct file *dentry_create(const struct path *path, int flags, umode_t mode,
++ const struct cred *cred)
++{
++ struct file *f;
++ int error;
++
++ validate_creds(cred);
++ f = alloc_empty_file(flags, cred);
++ if (IS_ERR(f))
++ return f;
++
++ error = vfs_create(mnt_user_ns(path->mnt),
++ d_inode(path->dentry->d_parent),
++ path->dentry, mode, true);
++ if (!error)
++ error = vfs_open(path, f);
++
++ if (unlikely(error)) {
++ fput(f);
++ return ERR_PTR(error);
++ }
++ return f;
++}
++EXPORT_SYMBOL(dentry_create);
++
+ struct file *open_with_fake_path(const struct path *path, int flags,
+ struct inode *inode, const struct cred *cred)
+ {
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 371d67c9221c5..ef5a04d626953 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2755,6 +2755,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
+ name, flags, mode);
+ }
+ extern struct file * dentry_open(const struct path *, int, const struct cred *);
++extern struct file *dentry_create(const struct path *path, int flags,
++ umode_t mode, const struct cred *cred);
+ extern struct file * open_with_fake_path(const struct path *, int,
+ struct inode*, const struct cred *);
+ static inline struct file *file_clone_open(struct file *file)
+--
+2.43.0
+
--- /dev/null
+From eb9e33d3dadb930c572c323a6de5345cc17c8349 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jun 2022 10:06:23 -0400
+Subject: NFSD: Instrument fh_verify()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 051382885552e12541cc0ebf82092be374a9ed2a ]
+
+Capture file handles and how they map to local inodes. In particular,
+NFSv4 PUTFH uses fh_verify() so we can now observe which file handles
+are the target of OPEN, LOOKUP, RENAME, and so on.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsfh.c | 5 +++--
+ fs/nfsd/trace.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 49 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index c29baa03dfafd..5e2ed4b2a925c 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -331,8 +331,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+ struct dentry *dentry;
+ __be32 error;
+
+- dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
+-
+ if (!fhp->fh_dentry) {
+ error = nfsd_set_fh_dentry(rqstp, fhp);
+ if (error)
+@@ -340,6 +338,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+ }
+ dentry = fhp->fh_dentry;
+ exp = fhp->fh_export;
++
++ trace_nfsd_fh_verify(rqstp, fhp, type, access);
++
+ /*
+ * We still have to do all these permission checks, even when
+ * fh_dentry is already set:
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 3cff3ada00a85..593218d8a54d0 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -171,6 +171,52 @@ TRACE_EVENT(nfsd_compound_encode_err,
+ __entry->opnum, __entry->status)
+ );
+
++#define show_fs_file_type(x) \
++ __print_symbolic(x, \
++ { S_IFLNK, "LNK" }, \
++ { S_IFREG, "REG" }, \
++ { S_IFDIR, "DIR" }, \
++ { S_IFCHR, "CHR" }, \
++ { S_IFBLK, "BLK" }, \
++ { S_IFIFO, "FIFO" }, \
++ { S_IFSOCK, "SOCK" })
++
++TRACE_EVENT(nfsd_fh_verify,
++ TP_PROTO(
++ const struct svc_rqst *rqstp,
++ const struct svc_fh *fhp,
++ umode_t type,
++ int access
++ ),
++ TP_ARGS(rqstp, fhp, type, access),
++ TP_STRUCT__entry(
++ __field(unsigned int, netns_ino)
++ __sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
++ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
++ __field(u32, xid)
++ __field(u32, fh_hash)
++ __field(void *, inode)
++ __field(unsigned long, type)
++ __field(unsigned long, access)
++ ),
++ TP_fast_assign(
++ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
++ __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local,
++ rqstp->rq_xprt->xpt_locallen);
++ __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote,
++ rqstp->rq_xprt->xpt_remotelen);
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
++ __entry->inode = d_inode(fhp->fh_dentry);
++ __entry->type = type;
++ __entry->access = access;
++ ),
++ TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s",
++ __entry->xid, __entry->fh_hash, __entry->inode,
++ show_fs_file_type(__entry->type),
++ show_nfsd_may_flags(__entry->access)
++ )
++);
+
+ DECLARE_EVENT_CLASS(nfsd_fh_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+--
+2.43.0
+
--- /dev/null
+From 2665a88c9d8e81d1e9f47d56f9002bf2c1a6646f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: introduce struct nfsd_attrs
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 7fe2a71dda349a1afa75781f0cc7975be9784d15 ]
+
+The attributes that nfsd might want to set on a file include 'struct
+iattr' as well as an ACL and security label.
+The latter two are passed around quite separately from the first, in
+part because they are only needed for NFSv4. This leads to some
+clumsiness in the code, such as the attributes NOT being set in
+nfsd_create_setattr().
+
+We need to keep the directory locked until all attributes are set to
+ensure the file is never visibile without all its attributes. This need
+combined with the inconsistent handling of attributes leads to more
+clumsiness.
+
+As a first step towards tidying this up, introduce 'struct nfsd_attrs'.
+This is passed (by reference) to vfs.c functions that work with
+attributes, and is assembled by the various nfs*proc functions which
+call them. As yet only iattr is included, but future patches will
+expand this.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 20 ++++++++++++++++----
+ fs/nfsd/nfs4proc.c | 23 ++++++++++++++++-------
+ fs/nfsd/nfs4state.c | 5 ++++-
+ fs/nfsd/nfsproc.c | 17 +++++++++++++----
+ fs/nfsd/vfs.c | 24 ++++++++++++++----------
+ fs/nfsd/vfs.h | 12 ++++++++----
+ 6 files changed, 71 insertions(+), 30 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 57854ca022d18..113567b3a98a5 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -67,12 +67,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_sattrargs *argp = rqstp->rq_argp;
+ struct nfsd3_attrstat *resp = rqstp->rq_resp;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &argp->attrs,
++ };
+
+ dprintk("nfsd: SETATTR(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+- resp->status = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
++ resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs,
+ argp->check_guard, argp->guardtime);
+ return rpc_success;
+ }
+@@ -233,6 +236,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ {
+ struct iattr *iap = &argp->attrs;
+ struct dentry *parent, *child;
++ struct nfsd_attrs attrs = {
++ .na_iattr = iap,
++ };
+ __u32 v_mtime, v_atime;
+ struct inode *inode;
+ __be32 status;
+@@ -331,7 +337,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+
+ set_attr:
+- status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
++ status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
+
+ out:
+ fh_unlock(fhp);
+@@ -368,6 +374,9 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_createargs *argp = rqstp->rq_argp;
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &argp->attrs,
++ };
+
+ dprintk("nfsd: MKDIR(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+@@ -378,7 +387,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
+ fh_copy(&resp->dirfh, &argp->fh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+ resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+- &argp->attrs, S_IFDIR, 0, &resp->fh);
++ &attrs, S_IFDIR, 0, &resp->fh);
+ fh_unlock(&resp->dirfh);
+ return rpc_success;
+ }
+@@ -428,6 +437,9 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_mknodargs *argp = rqstp->rq_argp;
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &argp->attrs,
++ };
+ int type;
+ dev_t rdev = 0;
+
+@@ -453,7 +465,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
+
+ type = nfs3_ftypes[argp->ftype];
+ resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+- &argp->attrs, type, rdev, &resp->fh);
++ &attrs, type, rdev, &resp->fh);
+ fh_unlock(&resp->dirfh);
+ out:
+ return rpc_success;
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index ae0948271da9c..9b04611a318d7 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -286,6 +286,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd4_open *open)
+ {
+ struct iattr *iap = &open->op_iattr;
++ struct nfsd_attrs attrs = {
++ .na_iattr = iap,
++ };
+ struct dentry *parent, *child;
+ __u32 v_mtime, v_atime;
+ struct inode *inode;
+@@ -404,7 +407,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+
+ set_attr:
+- status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
++ status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
+
+ out:
+ fh_unlock(fhp);
+@@ -787,6 +790,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+ {
+ struct nfsd4_create *create = &u->create;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &create->cr_iattr,
++ };
+ struct svc_fh resfh;
+ __be32 status;
+ dev_t rdev;
+@@ -818,7 +824,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ goto out_umask;
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+- &create->cr_iattr, S_IFBLK, rdev, &resfh);
++ &attrs, S_IFBLK, rdev, &resfh);
+ break;
+
+ case NF4CHR:
+@@ -829,26 +835,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ goto out_umask;
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+- &create->cr_iattr, S_IFCHR, rdev, &resfh);
++ &attrs, S_IFCHR, rdev, &resfh);
+ break;
+
+ case NF4SOCK:
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+- &create->cr_iattr, S_IFSOCK, 0, &resfh);
++ &attrs, S_IFSOCK, 0, &resfh);
+ break;
+
+ case NF4FIFO:
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+- &create->cr_iattr, S_IFIFO, 0, &resfh);
++ &attrs, S_IFIFO, 0, &resfh);
+ break;
+
+ case NF4DIR:
+ create->cr_iattr.ia_valid &= ~ATTR_SIZE;
+ status = nfsd_create(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+- &create->cr_iattr, S_IFDIR, 0, &resfh);
++ &attrs, S_IFDIR, 0, &resfh);
+ break;
+
+ default:
+@@ -1142,6 +1148,9 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+ {
+ struct nfsd4_setattr *setattr = &u->setattr;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &setattr->sa_iattr,
++ };
+ __be32 status = nfs_ok;
+ int err;
+
+@@ -1174,7 +1183,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ &setattr->sa_label);
+ if (status)
+ goto out;
+- status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
++ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
+ 0, (time64_t)0);
+ out:
+ fh_drop_write(&cstate->current_fh);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 7122ebc50a035..a299aeaa0de07 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5077,11 +5077,14 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
+ .ia_valid = ATTR_SIZE,
+ .ia_size = 0,
+ };
++ struct nfsd_attrs attrs = {
++ .na_iattr = &iattr,
++ };
+ if (!open->op_truncate)
+ return 0;
+ if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+ return nfserr_inval;
+- return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0);
++ return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0);
+ }
+
+ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index f65eba938a57d..c75d83bc3f21b 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -51,6 +51,9 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
+ struct nfsd_sattrargs *argp = rqstp->rq_argp;
+ struct nfsd_attrstat *resp = rqstp->rq_resp;
+ struct iattr *iap = &argp->attrs;
++ struct nfsd_attrs attrs = {
++ .na_iattr = iap,
++ };
+ struct svc_fh *fhp;
+
+ dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
+@@ -100,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
+ }
+ }
+
+- resp->status = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0);
++ resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0);
+ if (resp->status != nfs_ok)
+ goto out;
+
+@@ -261,6 +264,9 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+ svc_fh *dirfhp = &argp->fh;
+ svc_fh *newfhp = &resp->fh;
+ struct iattr *attr = &argp->attrs;
++ struct nfsd_attrs attrs = {
++ .na_iattr = attr,
++ };
+ struct inode *inode;
+ struct dentry *dchild;
+ int type, mode;
+@@ -386,7 +392,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+ if (!inode) {
+ /* File doesn't exist. Create it and set attrs */
+ resp->status = nfsd_create_locked(rqstp, dirfhp, argp->name,
+- argp->len, attr, type, rdev,
++ argp->len, &attrs, type, rdev,
+ newfhp);
+ } else if (type == S_IFREG) {
+ dprintk("nfsd: existing %s, valid=%x, size=%ld\n",
+@@ -397,7 +403,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+ */
+ attr->ia_valid &= ATTR_SIZE;
+ if (attr->ia_valid)
+- resp->status = nfsd_setattr(rqstp, newfhp, attr, 0,
++ resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0,
+ (time64_t)0);
+ }
+
+@@ -512,6 +518,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
+ {
+ struct nfsd_createargs *argp = rqstp->rq_argp;
+ struct nfsd_diropres *resp = rqstp->rq_resp;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &argp->attrs,
++ };
+
+ dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
+
+@@ -523,7 +532,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
+ argp->attrs.ia_valid &= ~ATTR_SIZE;
+ fh_init(&resp->fh, NFS_FHSIZE);
+ resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
+- &argp->attrs, S_IFDIR, 0, &resp->fh);
++ &attrs, S_IFDIR, 0, &resp->fh);
+ fh_put(&argp->fh);
+ if (resp->status != nfs_ok)
+ goto out;
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 6689ad5bb790d..489225de05a2a 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -350,11 +350,13 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * Set various file attributes. After this call fhp needs an fh_put.
+ */
+ __be32
+-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
++nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ struct nfsd_attrs *attr,
+ int check_guard, time64_t guardtime)
+ {
+ struct dentry *dentry;
+ struct inode *inode;
++ struct iattr *iap = attr->na_iattr;
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+@@ -1203,14 +1205,15 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ * @rqstp: RPC transaction being executed
+ * @fhp: NFS filehandle of parent directory
+ * @resfhp: NFS filehandle of new object
+- * @iap: requested attributes of new object
++ * @attrs: requested attributes of new object
+ *
+ * Returns nfs_ok on success, or an nfsstat in network byte order.
+ */
+ __be32
+ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct svc_fh *resfhp, struct iattr *iap)
++ struct svc_fh *resfhp, struct nfsd_attrs *attrs)
+ {
++ struct iattr *iap = attrs->na_iattr;
+ __be32 status;
+
+ /*
+@@ -1231,7 +1234,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * if the attributes have not changed.
+ */
+ if (iap->ia_valid)
+- status = nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
++ status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0);
+ else
+ status = nfserrno(commit_metadata(resfhp));
+
+@@ -1270,11 +1273,12 @@ nfsd_check_ignore_resizing(struct iattr *iap)
+ /* The parent directory should already be locked: */
+ __be32
+ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- char *fname, int flen, struct iattr *iap,
+- int type, dev_t rdev, struct svc_fh *resfhp)
++ char *fname, int flen, struct nfsd_attrs *attrs,
++ int type, dev_t rdev, struct svc_fh *resfhp)
+ {
+ struct dentry *dentry, *dchild;
+ struct inode *dirp;
++ struct iattr *iap = attrs->na_iattr;
+ __be32 err;
+ int host_err;
+
+@@ -1348,7 +1352,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (host_err < 0)
+ goto out_nfserr;
+
+- err = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
++ err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
+
+ out:
+ dput(dchild);
+@@ -1367,8 +1371,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ __be32
+ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- char *fname, int flen, struct iattr *iap,
+- int type, dev_t rdev, struct svc_fh *resfhp)
++ char *fname, int flen, struct nfsd_attrs *attrs,
++ int type, dev_t rdev, struct svc_fh *resfhp)
+ {
+ struct dentry *dentry, *dchild = NULL;
+ __be32 err;
+@@ -1400,7 +1404,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ dput(dchild);
+ if (err)
+ return err;
+- return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
++ return nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type,
+ rdev, resfhp);
+ }
+
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 26347d76f44a0..d8b1a36fca956 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -42,6 +42,10 @@ struct nfsd_file;
+ typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
+
+ /* nfsd/vfs.c */
++struct nfsd_attrs {
++ struct iattr *na_iattr; /* input */
++};
++
+ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp);
+ __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+@@ -50,7 +54,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
+ const char *, unsigned int,
+ struct svc_export **, struct dentry **);
+ __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+- struct iattr *, int, time64_t);
++ struct nfsd_attrs *, int, time64_t);
+ int nfsd_mountpoint(struct dentry *, struct svc_export *);
+ #ifdef CONFIG_NFSD_V4
+ __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
+@@ -63,14 +67,14 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ u64 count, bool sync);
+ #endif /* CONFIG_NFSD_V4 */
+ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
+- char *name, int len, struct iattr *attrs,
++ char *name, int len, struct nfsd_attrs *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
+ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
+- char *name, int len, struct iattr *attrs,
++ char *name, int len, struct nfsd_attrs *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
+ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+ __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- struct svc_fh *resfhp, struct iattr *iap);
++ struct svc_fh *resfhp, struct nfsd_attrs *iap);
+ __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
+ #ifdef CONFIG_NFSD_V4
+--
+2.43.0
+
--- /dev/null
+From b9b380d8f0204e73c3a23eb1192c0b1348e2be18 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Sep 2022 08:54:25 -0700
+Subject: NFSD: keep track of the number of courtesy clients in the system
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 3a4ea23d86a317c4b68b9a69d51f7e84e1e04357 ]
+
+Add counter nfs4_courtesy_client_count to nfsd_net to keep track
+of the number of courtesy clients in the system.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 2 ++
+ fs/nfsd/nfs4state.c | 17 ++++++++++++++++-
+ 2 files changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index ffe17743cc74b..55c7006d6109a 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -192,6 +192,8 @@ struct nfsd_net {
+
+ atomic_t nfs4_client_count;
+ int nfs4_max_clients;
++
++ atomic_t nfsd_courtesy_clients;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 6cb654e308787..6a7a99511111d 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -160,6 +160,13 @@ static bool is_client_expired(struct nfs4_client *clp)
+ return clp->cl_time == 0;
+ }
+
++static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn,
++ struct nfs4_client *clp)
++{
++ if (clp->cl_state != NFSD4_ACTIVE)
++ atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0);
++}
++
+ static __be32 get_client_locked(struct nfs4_client *clp)
+ {
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+@@ -169,6 +176,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
+ if (is_client_expired(clp))
+ return nfserr_expired;
+ atomic_inc(&clp->cl_rpc_users);
++ nfsd4_dec_courtesy_client_count(nn, clp);
+ clp->cl_state = NFSD4_ACTIVE;
+ return nfs_ok;
+ }
+@@ -190,6 +198,7 @@ renew_client_locked(struct nfs4_client *clp)
+
+ list_move_tail(&clp->cl_lru, &nn->client_lru);
+ clp->cl_time = ktime_get_boottime_seconds();
++ nfsd4_dec_courtesy_client_count(nn, clp);
+ clp->cl_state = NFSD4_ACTIVE;
+ }
+
+@@ -2248,6 +2257,7 @@ __destroy_client(struct nfs4_client *clp)
+ if (clp->cl_cb_conn.cb_xprt)
+ svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+ atomic_add_unless(&nn->nfs4_client_count, -1, 0);
++ nfsd4_dec_courtesy_client_count(nn, clp);
+ free_client(clp);
+ wake_up_all(&expiry_wq);
+ }
+@@ -4375,6 +4385,8 @@ void nfsd4_init_leases_net(struct nfsd_net *nn)
+ max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024);
+ max_clients *= NFS4_CLIENTS_PER_GB;
+ nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
++
++ atomic_set(&nn->nfsd_courtesy_clients, 0);
+ }
+
+ static void init_nfs4_replay(struct nfs4_replay *rp)
+@@ -5928,8 +5940,11 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
+ goto exp_client;
+ if (!state_expired(lt, clp->cl_time))
+ break;
+- if (!atomic_read(&clp->cl_rpc_users))
++ if (!atomic_read(&clp->cl_rpc_users)) {
++ if (clp->cl_state == NFSD4_ACTIVE)
++ atomic_inc(&nn->nfsd_courtesy_clients);
+ clp->cl_state = NFSD4_COURTESY;
++ }
+ if (!client_has_state(clp))
+ goto exp_client;
+ if (!nfs4_anylock_blockers(clp))
+--
+2.43.0
+
--- /dev/null
+From 12dbb9402244cbc7a51b2ba6b2defcd50686f369 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 16:54:52 -0700
+Subject: NFSD: keep track of the number of v4 clients in the system
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 0926c39515aa065a296e97dfc8790026f1e53f86 ]
+
+Add counter nfs4_client_count to keep track of the total number
+of v4 clients, including courtesy clients, in the system.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 2 ++
+ fs/nfsd/nfs4state.c | 10 ++++++++--
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 1b1a962a18041..ce864f001a3ee 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -189,6 +189,8 @@ struct nfsd_net {
+ struct nfsd_fcache_disposal *fcache_disposal;
+
+ siphash_key_t siphash_key;
++
++ atomic_t nfs4_client_count;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index a75f3f7c94d50..3d5ef021632b9 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2066,7 +2066,8 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
+ * This type of memory management is somewhat inefficient, but we use it
+ * anyway since SETCLIENTID is not a common operation.
+ */
+-static struct nfs4_client *alloc_client(struct xdr_netobj name)
++static struct nfs4_client *alloc_client(struct xdr_netobj name,
++ struct nfsd_net *nn)
+ {
+ struct nfs4_client *clp;
+ int i;
+@@ -2089,6 +2090,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
+ atomic_set(&clp->cl_rpc_users, 0);
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ clp->cl_state = NFSD4_ACTIVE;
++ atomic_inc(&nn->nfs4_client_count);
+ atomic_set(&clp->cl_delegs_in_recall, 0);
+ INIT_LIST_HEAD(&clp->cl_idhash);
+ INIT_LIST_HEAD(&clp->cl_openowners);
+@@ -2196,6 +2198,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
+ static void
+ __destroy_client(struct nfs4_client *clp)
+ {
++ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ int i;
+ struct nfs4_openowner *oo;
+ struct nfs4_delegation *dp;
+@@ -2239,6 +2242,7 @@ __destroy_client(struct nfs4_client *clp)
+ nfsd4_shutdown_callback(clp);
+ if (clp->cl_cb_conn.cb_xprt)
+ svc_xprt_put(clp->cl_cb_conn.cb_xprt);
++ atomic_add_unless(&nn->nfs4_client_count, -1, 0);
+ free_client(clp);
+ wake_up_all(&expiry_wq);
+ }
+@@ -2865,7 +2869,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct dentry *dentries[ARRAY_SIZE(client_files)];
+
+- clp = alloc_client(name);
++ clp = alloc_client(name, nn);
+ if (clp == NULL)
+ return NULL;
+
+@@ -4357,6 +4361,8 @@ void nfsd4_init_leases_net(struct nfsd_net *nn)
+ nn->clientid_base = prandom_u32();
+ nn->clientid_counter = nn->clientid_base + 1;
+ nn->s2s_cp_cl_id = nn->clientid_counter++;
++
++ atomic_set(&nn->nfs4_client_count, 0);
+ }
+
+ static void init_nfs4_replay(struct nfs4_replay *rp)
+--
+2.43.0
+
--- /dev/null
+From 5efc94b04f57523f370b3342c877925f91c18737 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:17 -0400
+Subject: NFSD: Leave open files out of the filecache LRU
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 4a0e73e635e3f36b616ad5c943e3d23debe4632f ]
+
+There have been reports of problems when running fstests generic/531
+against Linux NFS servers with NFSv4. The NFS server that hosts the
+test's SCRATCH_DEV suffers from CPU soft lock-ups during the test.
+Analysis shows that:
+
+fs/nfsd/filecache.c
+ 482 ret = list_lru_walk(&nfsd_file_lru,
+ 483 nfsd_file_lru_cb,
+ 484 &head, LONG_MAX);
+
+causes nfsd_file_gc() to walk the entire length of the filecache LRU
+list every time it is called (which is quite frequently). The walk
+holds a spinlock the entire time that prevents other nfsd threads
+from accessing the filecache.
+
+What's more, for NFSv4 workloads, none of the items that are visited
+during this walk may be evicted, since they are all files that are
+held OPEN by NFS clients.
+
+Address this by ensuring that open files are not kept on the LRU
+list.
+
+Reported-by: Frank van der Linden <fllinden@amazon.com>
+Reported-by: Wang Yugui <wangyugui@e16-tech.com>
+Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=386
+Suggested-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 24 +++++++++++++++++++-----
+ fs/nfsd/trace.h | 2 ++
+ 2 files changed, 21 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index a995a744a7481..5c9e3ff6397b0 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -262,6 +262,7 @@ nfsd_file_flush(struct nfsd_file *nf)
+
+ static void nfsd_file_lru_add(struct nfsd_file *nf)
+ {
++ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
+ trace_nfsd_file_lru_add(nf);
+ }
+@@ -291,7 +292,6 @@ nfsd_file_unhash(struct nfsd_file *nf)
+ {
+ if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ nfsd_file_do_unhash(nf);
+- nfsd_file_lru_remove(nf);
+ return true;
+ }
+ return false;
+@@ -312,6 +312,7 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp
+ if (refcount_dec_not_one(&nf->nf_ref))
+ return true;
+
++ nfsd_file_lru_remove(nf);
+ list_add(&nf->nf_lru, dispose);
+ return true;
+ }
+@@ -323,6 +324,7 @@ nfsd_file_put_noref(struct nfsd_file *nf)
+
+ if (refcount_dec_and_test(&nf->nf_ref)) {
+ WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
++ nfsd_file_lru_remove(nf);
+ nfsd_file_free(nf);
+ }
+ }
+@@ -332,7 +334,7 @@ nfsd_file_put(struct nfsd_file *nf)
+ {
+ might_sleep();
+
+- set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
++ nfsd_file_lru_add(nf);
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
+ nfsd_file_flush(nf);
+ nfsd_file_put_noref(nf);
+@@ -432,8 +434,18 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
+ }
+ }
+
+-/*
++/**
++ * nfsd_file_lru_cb - Examine an entry on the LRU list
++ * @item: LRU entry to examine
++ * @lru: controlling LRU
++ * @lock: LRU list lock (unused)
++ * @arg: dispose list
++ *
+ * Note this can deadlock with nfsd_file_cache_purge.
++ *
++ * Return values:
++ * %LRU_REMOVED: @item was removed from the LRU
++ * %LRU_SKIP: @item cannot be evicted
+ */
+ static enum lru_status
+ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+@@ -455,8 +467,9 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ * That order is deliberate to ensure that we can do this locklessly.
+ */
+ if (refcount_read(&nf->nf_ref) > 1) {
++ list_lru_isolate(lru, &nf->nf_lru);
+ trace_nfsd_file_gc_in_use(nf);
+- return LRU_SKIP;
++ return LRU_REMOVED;
+ }
+
+ /*
+@@ -1013,6 +1026,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto retry;
+ }
+
++ nfsd_file_lru_remove(nf);
+ this_cpu_inc(nfsd_file_cache_hits);
+
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
+@@ -1034,7 +1048,6 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ refcount_inc(&nf->nf_ref);
+ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+ __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+- nfsd_file_lru_add(nf);
+ hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+ ++nfsd_file_hashtbl[hashval].nfb_count;
+ nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+@@ -1059,6 +1072,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ if (status != nfs_ok || inode->i_nlink == 0) {
+ bool do_free;
++ nfsd_file_lru_remove(nf);
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ do_free = nfsd_file_unhash(nf);
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index c47f46d433ddb..cc55a2b32e8cd 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -926,7 +926,9 @@ DEFINE_EVENT(nfsd_file_gc_class, name, \
+ TP_ARGS(nf))
+
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
+--
+2.43.0
+
--- /dev/null
+From e36af7d80ade017b1aa38d5af03fab3157328e6c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 16:54:53 -0700
+Subject: NFSD: limit the number of v4 clients to 1024 per 1GB of system memory
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 4271c2c0887562318a0afef97d32d8a71cbe0743 ]
+
+Currently there is no limit on how many v4 clients are supported
+by the system. This can be a problem in systems with small memory
+configuration to function properly when a very large number of
+clients exist that creates memory shortage conditions.
+
+This patch enforces a limit of 1024 NFSv4 clients, including courtesy
+clients, per 1GB of system memory. When the number of the clients
+reaches the limit, requests that create new clients are returned
+with NFS4ERR_DELAY and the laundromat is kicked start to trim old
+clients. Due to the overhead of the upcall to remove the client
+record, the maximun number of clients the laundromat removes on
+each run is limited to 128. This is done to ensure the laundromat
+can still process the other tasks in a timely manner.
+
+Since there is now a limit of the number of clients, the 24-hr
+idle time limit of courtesy client is no longer needed and was
+removed.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 1 +
+ fs/nfsd/nfs4state.c | 27 +++++++++++++++++++++------
+ fs/nfsd/nfsd.h | 2 ++
+ 3 files changed, 24 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index ce864f001a3ee..ffe17743cc74b 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -191,6 +191,7 @@ struct nfsd_net {
+ siphash_key_t siphash_key;
+
+ atomic_t nfs4_client_count;
++ int nfs4_max_clients;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 3d5ef021632b9..340d533dcafd3 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2072,6 +2072,10 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
+ struct nfs4_client *clp;
+ int i;
+
++ if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) {
++ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
++ return NULL;
++ }
+ clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
+ if (clp == NULL)
+ return NULL;
+@@ -4353,6 +4357,9 @@ nfsd4_init_slabs(void)
+
+ void nfsd4_init_leases_net(struct nfsd_net *nn)
+ {
++ struct sysinfo si;
++ u64 max_clients;
++
+ nn->nfsd4_lease = 90; /* default lease time */
+ nn->nfsd4_grace = 90;
+ nn->somebody_reclaimed = false;
+@@ -4363,6 +4370,10 @@ void nfsd4_init_leases_net(struct nfsd_net *nn)
+ nn->s2s_cp_cl_id = nn->clientid_counter++;
+
+ atomic_set(&nn->nfs4_client_count, 0);
++ si_meminfo(&si);
++ max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024);
++ max_clients *= NFS4_CLIENTS_PER_GB;
++ nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
+ }
+
+ static void init_nfs4_replay(struct nfs4_replay *rp)
+@@ -5828,9 +5839,12 @@ static void
+ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
+ struct laundry_time *lt)
+ {
++ unsigned int maxreap, reapcnt = 0;
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
++ maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ?
++ NFSD_CLIENT_MAX_TRIM_PER_RUN : 0;
+ INIT_LIST_HEAD(reaplist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+@@ -5841,14 +5855,15 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
+ break;
+ if (!atomic_read(&clp->cl_rpc_users))
+ clp->cl_state = NFSD4_COURTESY;
+- if (!client_has_state(clp) ||
+- ktime_get_boottime_seconds() >=
+- (clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
++ if (!client_has_state(clp))
+ goto exp_client;
+- if (nfs4_anylock_blockers(clp)) {
++ if (!nfs4_anylock_blockers(clp))
++ if (reapcnt >= maxreap)
++ continue;
+ exp_client:
+- if (!mark_client_expired_locked(clp))
+- list_add(&clp->cl_lru, reaplist);
++ if (!mark_client_expired_locked(clp)) {
++ list_add(&clp->cl_lru, reaplist);
++ reapcnt++;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index ef8087691138a..57a468ed85c35 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -341,6 +341,8 @@ void nfsd_lockd_shutdown(void);
+
+ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
+ #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
++#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
++#define NFS4_CLIENTS_PER_GB 1024
+
+ /*
+ * The following attributes are currently not supported by the NFSv4 server:
+--
+2.43.0
+
--- /dev/null
+From 9978aa324e42206241b84ae97a0574aa9d4517ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 May 2023 12:26:44 -0400
+Subject: nfsd: make a copy of struct iattr before calling notify_change
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit d53d70084d27f56bcdf5074328f2c9ec861be596 ]
+
+notify_change can modify the iattr structure. In particular it can
+end up setting ATTR_MODE when ATTR_KILL_SUID is already set, causing
+a BUG() if the same iattr is passed to notify_change more than once.
+
+Make a copy of the struct iattr before calling notify_change.
+
+Reported-by: Zhi Li <yieli@redhat.com>
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2207969
+Tested-by: Zhi Li <yieli@redhat.com>
+Fixes: 34b91dda7124 ("NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY")
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 71788a5e4a55c..76ce19d42336f 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -533,7 +533,15 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ inode_lock(inode);
+ for (retries = 1;;) {
+- host_err = __nfsd_setattr(dentry, iap);
++ struct iattr attrs;
++
++ /*
++ * notify_change() can alter its iattr argument, making
++ * @iap unsuitable for submission multiple times. Make a
++ * copy for every loop iteration.
++ */
++ attrs = *iap;
++ host_err = __nfsd_setattr(dentry, &attrs);
+ if (host_err != -EAGAIN || !retries--)
+ break;
+ if (!nfsd_wait_for_delegreturn(rqstp, inode))
+--
+2.43.0
+
--- /dev/null
+From 5550b46c735b95172e5eff8b784a10a2d3d9caea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: NFSD: Make it possible to use svc_set_num_threads_sync
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 3409e4f1e8f239f0ed81be0b068ecf4e73e2e826 ]
+
+nfsd cannot currently use svc_set_num_threads_sync. It instead
+uses svc_set_num_threads which does *not* wait for threads to all
+exit, and has a separate mechanism (nfsd_shutdown_complete) to wait
+for completion.
+
+The reason that nfsd is unlike other services is that nfsd threads can
+exit separately from svc_set_num_threads being called - they die on
+receipt of SIGKILL. Also, when the last thread exits, the service must
+be shut down (sockets closed).
+
+For this, the nfsd_mutex needs to be taken, and as that mutex needs to
+be held while svc_set_num_threads is called, the one cannot wait for
+the other.
+
+This patch changes the nfsd thread so that it can drop the ref on the
+service without blocking on nfsd_mutex, so that svc_set_num_threads_sync
+can be used:
+ - if it can drop a non-last reference, it does that. This does not
+ trigger shutdown and does not require a mutex. This will likely
+ happen for all but the last thread signalled, and for all threads
+ being shut down by nfsd_shutdown_threads()
+ - if it can get the mutex without blocking (trylock), it does that
+ and then drops the reference. This will likely happen for the
+ last thread killed by SIGKILL
+ - Otherwise there might be an unrelated task holding the mutex,
+ possibly in another network namespace, or nfsd_shutdown_threads()
+ might be just about to get a reference on the service, after which
+ we can drop ours safely.
+ We cannot conveniently get wakeup notifications on these events,
+ and we are unlikely to need to, so we sleep briefly and check again.
+
+With this we can discard nfsd_shutdown_complete and
+nfsd_complete_shutdown(), and switch to svc_set_num_threads_sync.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 3 ---
+ fs/nfsd/nfssvc.c | 41 +++++++++++++++++++-------------------
+ include/linux/sunrpc/svc.h | 13 ++++++++++++
+ 3 files changed, 33 insertions(+), 24 deletions(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 08bcd8f23b013..1fd59eb0730bb 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -134,9 +134,6 @@ struct nfsd_net {
+ wait_queue_head_t ntf_wq;
+ atomic_t ntf_refcnt;
+
+- /* Allow umount to wait for nfsd state cleanup */
+- struct completion nfsd_shutdown_complete;
+-
+ /*
+ * clientid and stateid data for construction of net unique COPY
+ * stateids.
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index eb8cc4d914fee..6b10415e4006b 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -593,20 +593,10 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
+ .svo_shutdown = nfsd_last_thread,
+ .svo_function = nfsd,
+ .svo_enqueue_xprt = svc_xprt_do_enqueue,
+- .svo_setup = svc_set_num_threads,
++ .svo_setup = svc_set_num_threads_sync,
+ .svo_module = THIS_MODULE,
+ };
+
+-static void nfsd_complete_shutdown(struct net *net)
+-{
+- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+-
+- WARN_ON(!mutex_is_locked(&nfsd_mutex));
+-
+- nn->nfsd_serv = NULL;
+- complete(&nn->nfsd_shutdown_complete);
+-}
+-
+ void nfsd_shutdown_threads(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+@@ -624,8 +614,6 @@ void nfsd_shutdown_threads(struct net *net)
+ serv->sv_ops->svo_setup(serv, NULL, 0);
+ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+- /* Wait for shutdown of nfsd_serv to complete */
+- wait_for_completion(&nn->nfsd_shutdown_complete);
+ }
+
+ bool i_am_nfsd(void)
+@@ -650,7 +638,6 @@ int nfsd_create_serv(struct net *net)
+ &nfsd_thread_sv_ops);
+ if (nn->nfsd_serv == NULL)
+ return -ENOMEM;
+- init_completion(&nn->nfsd_shutdown_complete);
+
+ nn->nfsd_serv->sv_maxconn = nn->max_connections;
+ error = svc_bind(nn->nfsd_serv, net);
+@@ -659,7 +646,7 @@ int nfsd_create_serv(struct net *net)
+ * been set up yet.
+ */
+ svc_put(nn->nfsd_serv);
+- nfsd_complete_shutdown(net);
++ nn->nfsd_serv = NULL;
+ return error;
+ }
+
+@@ -715,7 +702,7 @@ void nfsd_put(struct net *net)
+ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
+ svc_shutdown_net(nn->nfsd_serv, net);
+ svc_destroy(&nn->nfsd_serv->sv_refcnt);
+- nfsd_complete_shutdown(net);
++ nn->nfsd_serv = NULL;
+ }
+ }
+
+@@ -743,7 +730,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
+ if (tot > NFSD_MAXSERVS) {
+ /* total too large: scale down requested numbers */
+ for (i = 0; i < n && tot > 0; i++) {
+- int new = nthreads[i] * NFSD_MAXSERVS / tot;
++ int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ tot -= (nthreads[i] - new);
+ nthreads[i] = new;
+ }
+@@ -989,10 +976,22 @@ nfsd(void *vrqstp)
+ /* Release the thread */
+ svc_exit_thread(rqstp);
+
+- /* Now if needed we call svc_destroy in appropriate context */
+- mutex_lock(&nfsd_mutex);
+- nfsd_put(net);
+- mutex_unlock(&nfsd_mutex);
++ /* We need to drop a ref, but may not drop the last reference
++ * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
++ * could deadlock with nfsd_shutdown_threads() waiting for us.
++ * So three options are:
++ * - drop a non-final reference,
++ * - get the mutex without waiting
++ * - sleep briefly andd try the above again
++ */
++ while (!svc_put_not_last(nn->nfsd_serv)) {
++ if (mutex_trylock(&nfsd_mutex)) {
++ nfsd_put(net);
++ mutex_unlock(&nfsd_mutex);
++ break;
++ }
++ msleep(20);
++ }
+
+ /* Release module */
+ module_put_and_kthread_exit(0);
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index fdc32ffef0184..d22bd62093146 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -141,6 +141,19 @@ static inline void svc_put(struct svc_serv *serv)
+ kref_put(&serv->sv_refcnt, svc_destroy);
+ }
+
++/**
++ * svc_put_not_last - decrement non-final reference count on SUNRPC serv
++ * @serv: the svc_serv to have count decremented
++ *
++ * Returns: %true is refcount was decremented.
++ *
++ * If the refcount is 1, it is not decremented and instead failure is reported.
++ */
++static inline bool svc_put_not_last(struct svc_serv *serv)
++{
++ return refcount_dec_not_one(&serv->sv_refcnt.refcount);
++}
++
+ /*
+ * Maximum payload size supported by a kernel RPC server.
+ * This is use to determine the max number of pages nfsd is
+--
+2.43.0
+
--- /dev/null
+From 5eed1028d1e64f005025bdbeb053a17fe4549657 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:35 -0400
+Subject: NFSD: Make nfs4_put_copy() static
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 8ea6e2c90bb0eb74a595a12e23a1dff9abbc760a ]
+
+Clean up: All call sites are in fs/nfsd/nfs4proc.c.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/state.h | 1 -
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index f0722d4ed0810..3e4b0fb44c7b7 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1287,7 +1287,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ return status;
+ }
+
+-void nfs4_put_copy(struct nfsd4_copy *copy)
++static void nfs4_put_copy(struct nfsd4_copy *copy)
+ {
+ if (!refcount_dec_and_test(©->refcount))
+ return;
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index f3d6313914ed0..ae596dbf86675 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -703,7 +703,6 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name
+ extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
+
+ void put_nfs4_file(struct nfs4_file *fi);
+-extern void nfs4_put_copy(struct nfsd4_copy *copy);
+ extern struct nfsd4_copy *
+ find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
+ extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
+--
+2.43.0
+
--- /dev/null
+From 2eced2a96c05862e9cc25dbdbbbdd57cabea6fa6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:14:25 -0400
+Subject: NFSD: Make nfsd4_remove() wait before returning NFS4ERR_DELAY
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5f5f8b6d655fd947e899b1771c2f7cb581a06764 ]
+
+nfsd_unlink() can kick off a CB_RECALL (via
+vfs_unlink() -> leases_conflict()) if a delegation is present.
+Before returning NFS4ERR_DELAY, give the client holding that
+delegation a chance to return it and then retry the nfsd_unlink()
+again, once.
+
+Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=354
+Tested-by: Igor Mammedov <imammedo@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 4c5cc142562b2..d17377148b669 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1790,9 +1790,18 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+
+ fh_fill_pre_attrs(fhp);
+ if (type != S_IFDIR) {
++ int retries;
++
+ if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
+ nfsd_close_cached_files(rdentry);
+- host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
++
++ for (retries = 1;;) {
++ host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
++ if (host_err != -EAGAIN || !retries--)
++ break;
++ if (!nfsd_wait_for_delegreturn(rqstp, rinode))
++ break;
++ }
+ } else {
+ host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
+ }
+--
+2.43.0
+
--- /dev/null
+From caf85d1492a720e447dfe84e7396041e59a71672 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:14:19 -0400
+Subject: NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 68c522afd0b1936b48a03a4c8b81261e7597c62d ]
+
+nfsd_rename() can kick off a CB_RECALL (via
+vfs_rename() -> leases_conflict()) if a delegation is present.
+Before returning NFS4ERR_DELAY, give the client holding that
+delegation a chance to return it and then retry the nfsd_rename()
+again, once.
+
+This version of the patch handles renaming an existing file,
+but does not deal with renaming onto an existing file. That
+case will still always trigger an NFS4ERR_DELAY.
+
+Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=354
+Tested-by: Igor Mammedov <imammedo@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index e8329051dde01..4c5cc142562b2 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1698,7 +1698,15 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+ .new_dir = tdir,
+ .new_dentry = ndentry,
+ };
+- host_err = vfs_rename(&rd);
++ int retries;
++
++ for (retries = 1;;) {
++ host_err = vfs_rename(&rd);
++ if (host_err != -EAGAIN || !retries--)
++ break;
++ if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry)))
++ break;
++ }
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
+ if (!host_err)
+--
+2.43.0
+
--- /dev/null
+From 0c5c5bf6b39c5fededcbc2d66e072bb6d5e58bc5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Sep 2022 14:41:01 -0400
+Subject: nfsd: make nfsd4_run_cb a bool return function
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit b95239ca4954a0d48b19c09ce7e8f31b453b4216 ]
+
+queue_work can return false and not queue anything, if the work is
+already queued. If that happens in the case of a CB_RECALL, we'll have
+taken an extra reference to the stid that will never be put. Ensure we
+throw a warning in that case.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4callback.c | 14 ++++++++++++--
+ fs/nfsd/nfs4state.c | 5 ++---
+ fs/nfsd/state.h | 2 +-
+ 3 files changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index face8908a40b1..39989c14c8a1e 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -1373,11 +1373,21 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
+ cb->cb_holds_slot = false;
+ }
+
+-void nfsd4_run_cb(struct nfsd4_callback *cb)
++/**
++ * nfsd4_run_cb - queue up a callback job to run
++ * @cb: callback to queue
++ *
++ * Kick off a callback to do its thing. Returns false if it was already
++ * on a queue, true otherwise.
++ */
++bool nfsd4_run_cb(struct nfsd4_callback *cb)
+ {
+ struct nfs4_client *clp = cb->cb_clp;
++ bool queued;
+
+ nfsd41_cb_inflight_begin(clp);
+- if (!nfsd4_queue_cb(cb))
++ queued = nfsd4_queue_cb(cb);
++ if (!queued)
+ nfsd41_cb_inflight_end(clp);
++ return queued;
+ }
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index e98306c69f424..61978ad43a0f7 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4874,14 +4874,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
+ * we know it's safe to take a reference.
+ */
+ refcount_inc(&dp->dl_stid.sc_count);
+- nfsd4_run_cb(&dp->dl_recall);
++ WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
+ }
+
+ /* Called from break_lease() with flc_lock held. */
+ static bool
+ nfsd_break_deleg_cb(struct file_lock *fl)
+ {
+- bool ret = false;
+ struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+ struct nfs4_file *fp = dp->dl_stid.sc_file;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+@@ -4907,7 +4906,7 @@ nfsd_break_deleg_cb(struct file_lock *fl)
+ fp->fi_had_conflict = true;
+ nfsd_break_one_deleg(dp);
+ spin_unlock(&fp->fi_lock);
+- return ret;
++ return false;
+ }
+
+ /**
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index b3477087a9fc3..e2daef3cc0034 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -692,7 +692,7 @@ extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
+ extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
+ extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
+ const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
+-extern void nfsd4_run_cb(struct nfsd4_callback *cb);
++extern bool nfsd4_run_cb(struct nfsd4_callback *cb);
+ extern int nfsd4_create_callback_queue(void);
+ extern void nfsd4_destroy_callback_queue(void);
+ extern void nfsd4_shutdown_callback(struct nfs4_client *);
+--
+2.43.0
+
--- /dev/null
+From 34d02389e3f928865e671deff25d80efd5b20af4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:14:13 -0400
+Subject: NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 34b91dda7124fc3259e4b2ae53e0c933dedfec01 ]
+
+nfsd_setattr() can kick off a CB_RECALL (via
+notify_change() -> break_lease()) if a delegation is present. Before
+returning NFS4ERR_DELAY, give the client holding that delegation a
+chance to return it and then retry the nfsd_setattr() again, once.
+
+Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=354
+Tested-by: Igor Mammedov <imammedo@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 392df2353556e..e8329051dde01 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -414,6 +414,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ int host_err;
+ bool get_write_count;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
++ int retries;
+
+ if (iap->ia_valid & ATTR_SIZE) {
+ accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+@@ -468,7 +469,13 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+
+ inode_lock(inode);
+- host_err = __nfsd_setattr(dentry, iap);
++ for (retries = 1;;) {
++ host_err = __nfsd_setattr(dentry, iap);
++ if (host_err != -EAGAIN || !retries--)
++ break;
++ if (!nfsd_wait_for_delegreturn(rqstp, inode))
++ break;
++ }
+ if (attr->na_seclabel && attr->na_seclabel->len)
+ attr->na_labelerr = security_inode_setsecctx(dentry,
+ attr->na_seclabel->data, attr->na_seclabel->len);
+--
+2.43.0
+
--- /dev/null
+From 5d57657be391e4246272ca7e4b70fb4d4a66000e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: nfsd: make nfsd_stats.th_cnt atomic_t
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 9b6c8c9bebccd5fb785c306b948c08874a88874d ]
+
+This allows us to move the updates for th_cnt out of the mutex.
+This is a step towards reducing mutex coverage in nfsd().
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 6 +++---
+ fs/nfsd/stats.c | 2 +-
+ fs/nfsd/stats.h | 4 +---
+ 3 files changed, 5 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 141d884fee4f4..32f2c46a38323 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -57,7 +57,7 @@ static __be32 nfsd_init_request(struct svc_rqst *,
+ /*
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
+ * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+- * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
++ * extent ->sv_temp_socks and ->sv_permsocks.
+ *
+ * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
+@@ -955,8 +955,8 @@ nfsd(void *vrqstp)
+ allow_signal(SIGINT);
+ allow_signal(SIGQUIT);
+
+- nfsdstats.th_cnt++;
+ mutex_unlock(&nfsd_mutex);
++ atomic_inc(&nfsdstats.th_cnt);
+
+ set_freezable();
+
+@@ -983,8 +983,8 @@ nfsd(void *vrqstp)
+ /* Clear signals before calling svc_exit_thread() */
+ flush_signals(current);
+
++ atomic_dec(&nfsdstats.th_cnt);
+ mutex_lock(&nfsd_mutex);
+- nfsdstats.th_cnt --;
+
+ out:
+ /* Take an extra ref so that the svc_put in svc_exit_thread()
+diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
+index 1d3b881e73821..a8c5a02a84f04 100644
+--- a/fs/nfsd/stats.c
++++ b/fs/nfsd/stats.c
+@@ -45,7 +45,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
+ percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
+
+ /* thread usage: */
+- seq_printf(seq, "th %u 0", nfsdstats.th_cnt);
++ seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
+
+ /* deprecated thread usage histogram stats */
+ for (i = 0; i < 10; i++)
+diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
+index 51ecda852e23b..9b43dc3d99913 100644
+--- a/fs/nfsd/stats.h
++++ b/fs/nfsd/stats.h
+@@ -29,11 +29,9 @@ enum {
+ struct nfsd_stats {
+ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
+
+- /* Protected by nfsd_mutex */
+- unsigned int th_cnt; /* number of available threads */
++ atomic_t th_cnt; /* number of available threads */
+ };
+
+-
+ extern struct nfsd_stats nfsdstats;
+
+ extern struct svc_stat nfsd_svcstats;
+--
+2.43.0
+
--- /dev/null
+From cb583f32b5611e12c156191fe335db41bd74f968 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 18 Dec 2021 20:37:54 -0500
+Subject: nfsd: map EBADF
+
+From: Peng Tao <tao.peng@primarydata.com>
+
+[ Upstream commit b3d0db706c77d02055910fcfe2f6eb5155ff9d5e ]
+
+Now that we have open file cache, it is possible that another client
+deletes the file and DP will not know about it. Then IO to MDS would
+fail with BADSTATEID and knfsd would start state recovery, which
+should fail as well and then nfs read/write will fail with EBADF.
+And it triggers a WARN() in nfserrno().
+
+-----------[ cut here ]------------
+WARNING: CPU: 0 PID: 13529 at fs/nfsd/nfsproc.c:758 nfserrno+0x58/0x70 [nfsd]()
+nfsd: non-standard errno: -9
+modules linked in: nfsv3 nfs_layout_flexfiles rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_connt
+pata_acpi floppy
+CPU: 0 PID: 13529 Comm: nfsd Tainted: G W 4.1.5-00307-g6e6579b #7
+Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/30/2014
+ 0000000000000000 00000000464e6c9c ffff88079085fba8 ffffffff81789936
+ 0000000000000000 ffff88079085fc00 ffff88079085fbe8 ffffffff810a08ea
+ ffff88079085fbe8 ffff88080f45c900 ffff88080f627d50 ffff880790c46a48
+ all Trace:
+ [<ffffffff81789936>] dump_stack+0x45/0x57
+ [<ffffffff810a08ea>] warn_slowpath_common+0x8a/0xc0
+ [<ffffffff810a0975>] warn_slowpath_fmt+0x55/0x70
+ [<ffffffff81252908>] ? splice_direct_to_actor+0x148/0x230
+ [<ffffffffa02fb8c0>] ? fsid_source+0x60/0x60 [nfsd]
+ [<ffffffffa02f9918>] nfserrno+0x58/0x70 [nfsd]
+ [<ffffffffa02fba57>] nfsd_finish_read+0x97/0xb0 [nfsd]
+ [<ffffffffa02fc7a6>] nfsd_splice_read+0x76/0xa0 [nfsd]
+ [<ffffffffa02fcca1>] nfsd_read+0xc1/0xd0 [nfsd]
+ [<ffffffffa0233af2>] ? svc_tcp_adjust_wspace+0x12/0x30 [sunrpc]
+ [<ffffffffa03073da>] nfsd3_proc_read+0xba/0x150 [nfsd]
+ [<ffffffffa02f7a03>] nfsd_dispatch+0xc3/0x210 [nfsd]
+ [<ffffffffa0233af2>] ? svc_tcp_adjust_wspace+0x12/0x30 [sunrpc]
+ [<ffffffffa0232913>] svc_process_common+0x453/0x6f0 [sunrpc]
+ [<ffffffffa0232cc3>] svc_process+0x113/0x1b0 [sunrpc]
+ [<ffffffffa02f740f>] nfsd+0xff/0x170 [nfsd]
+ [<ffffffffa02f7310>] ? nfsd_destroy+0x80/0x80 [nfsd]
+ [<ffffffff810bf3a8>] kthread+0xd8/0xf0
+ [<ffffffff810bf2d0>] ? kthread_create_on_node+0x1b0/0x1b0
+ [<ffffffff817912a2>] ret_from_fork+0x42/0x70
+ [<ffffffff810bf2d0>] ? kthread_create_on_node+0x1b0/0x1b0
+
+Signed-off-by: Peng Tao <tao.peng@primarydata.com>
+Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsproc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index b009da1dcbb50..3c5e87805cc8d 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -845,6 +845,7 @@ nfserrno (int errno)
+ { nfserr_io, -EIO },
+ { nfserr_nxio, -ENXIO },
+ { nfserr_fbig, -E2BIG },
++ { nfserr_stale, -EBADF },
+ { nfserr_acces, -EACCES },
+ { nfserr_exist, -EEXIST },
+ { nfserr_xdev, -EXDEV },
+--
+2.43.0
+
--- /dev/null
+From 2588daf555e751e6f6f1a669a7d4b77c7c3d2781 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 22 May 2022 12:07:18 -0400
+Subject: NFSD: Modernize nfsd4_release_lockowner()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit bd8fdb6e545f950f4654a9a10d7e819ad48146e5 ]
+
+Refactor: Use existing helpers that other lock operations use. This
+change removes several automatic variables, so re-organize the
+variable declarations for readability.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 36 +++++++++++-------------------------
+ 1 file changed, 11 insertions(+), 25 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index d79b736019d49..2d52656095340 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7548,16 +7548,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ union nfsd4_op_u *u)
+ {
+ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ clientid_t *clid = &rlockowner->rl_clientid;
+- struct nfs4_stateowner *sop;
+- struct nfs4_lockowner *lo = NULL;
+ struct nfs4_ol_stateid *stp;
+- struct xdr_netobj *owner = &rlockowner->rl_owner;
+- unsigned int hashval = ownerstr_hashval(owner);
+- __be32 status;
+- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
++ struct nfs4_lockowner *lo;
+ struct nfs4_client *clp;
+- LIST_HEAD (reaplist);
++ LIST_HEAD(reaplist);
++ __be32 status;
+
+ dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
+ clid->cl_boot, clid->cl_id);
+@@ -7565,30 +7562,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ status = set_client(clid, cstate, nn);
+ if (status)
+ return status;
+-
+ clp = cstate->clp;
+- /* Find the matching lock stateowner */
+- spin_lock(&clp->cl_lock);
+- list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
+- so_strhash) {
+
+- if (sop->so_is_open_owner || !same_owner_str(sop, owner))
+- continue;
+-
+- if (atomic_read(&sop->so_count) != 1) {
+- spin_unlock(&clp->cl_lock);
+- return nfserr_locks_held;
+- }
+-
+- lo = lockowner(sop);
+- nfs4_get_stateowner(sop);
+- break;
+- }
++ spin_lock(&clp->cl_lock);
++ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
+ if (!lo) {
+ spin_unlock(&clp->cl_lock);
+ return status;
+ }
+-
++ if (atomic_read(&lo->lo_owner.so_count) != 2) {
++ spin_unlock(&clp->cl_lock);
++ nfs4_put_stateowner(&lo->lo_owner);
++ return nfserr_locks_held;
++ }
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+ stp = list_first_entry(&lo->lo_owner.so_stateids,
+--
+2.43.0
+
--- /dev/null
+From b9090de846c262ac6f15e4b3ac594a3c071e46a2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:41:18 -0400
+Subject: NFSD: Move copy offload callback arguments into a separate structure
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit a11ada99ce93a79393dc6683d22f7915748c8f6b ]
+
+Refactor so that CB_OFFLOAD arguments can be passed without
+allocating a whole struct nfsd4_copy object. On my system (x86_64)
+this removes another 96 bytes from struct nfsd4_copy.
+
+[ cel: adjusted to apply to v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4callback.c | 37 +++++++++++++++++------------------
+ fs/nfsd/nfs4proc.c | 44 +++++++++++++++++++++---------------------
+ fs/nfsd/xdr4.h | 11 +++++++----
+ 3 files changed, 47 insertions(+), 45 deletions(-)
+
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index e1272a7f45220..face8908a40b1 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -679,7 +679,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
+ * case NFS4_OK:
+ * write_response4 coa_resok4;
+ * default:
+- * length4 coa_bytes_copied;
++ * length4 coa_bytes_copied;
+ * };
+ * struct CB_OFFLOAD4args {
+ * nfs_fh4 coa_fh;
+@@ -688,21 +688,22 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
+ * };
+ */
+ static void encode_offload_info4(struct xdr_stream *xdr,
+- __be32 nfserr,
+- const struct nfsd4_copy *cp)
++ const struct nfsd4_cb_offload *cbo)
+ {
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+- *p++ = nfserr;
+- if (!nfserr) {
++ *p = cbo->co_nfserr;
++ switch (cbo->co_nfserr) {
++ case nfs_ok:
+ p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
+ p = xdr_encode_empty_array(p);
+- p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
+- *p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
+- p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
++ p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written);
++ *p++ = cpu_to_be32(cbo->co_res.wr_stable_how);
++ p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data,
+ NFS4_VERIFIER_SIZE);
+- } else {
++ break;
++ default:
+ p = xdr_reserve_space(xdr, 8);
+ /* We always return success if bytes were written */
+ p = xdr_encode_hyper(p, 0);
+@@ -710,18 +711,16 @@ static void encode_offload_info4(struct xdr_stream *xdr,
+ }
+
+ static void encode_cb_offload4args(struct xdr_stream *xdr,
+- __be32 nfserr,
+- const struct knfsd_fh *fh,
+- const struct nfsd4_copy *cp,
++ const struct nfsd4_cb_offload *cbo,
+ struct nfs4_cb_compound_hdr *hdr)
+ {
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+- *p++ = cpu_to_be32(OP_CB_OFFLOAD);
+- encode_nfs_fh4(xdr, fh);
+- encode_stateid4(xdr, &cp->cp_res.cb_stateid);
+- encode_offload_info4(xdr, nfserr, cp);
++ *p = cpu_to_be32(OP_CB_OFFLOAD);
++ encode_nfs_fh4(xdr, &cbo->co_fh);
++ encode_stateid4(xdr, &cbo->co_res.cb_stateid);
++ encode_offload_info4(xdr, cbo);
+
+ hdr->nops++;
+ }
+@@ -731,8 +730,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
+ const void *data)
+ {
+ const struct nfsd4_callback *cb = data;
+- const struct nfsd4_copy *cp =
+- container_of(cb, struct nfsd4_copy, cp_cb);
++ const struct nfsd4_cb_offload *cbo =
++ container_of(cb, struct nfsd4_cb_offload, co_cb);
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = 0,
+ .minorversion = cb->cb_clp->cl_minorversion,
+@@ -740,7 +739,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+- encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
++ encode_cb_offload4args(xdr, cbo, &hdr);
+ encode_cb_nops(&hdr);
+ }
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index be51338deda46..46ec66f4ec9e7 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1636,9 +1636,10 @@ nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst)
+
+ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
+ {
+- struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
++ struct nfsd4_cb_offload *cbo =
++ container_of(cb, struct nfsd4_cb_offload, co_cb);
+
+- nfs4_put_copy(copy);
++ kfree(cbo);
+ }
+
+ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
+@@ -1755,25 +1756,23 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
+ nfs4_put_copy(copy);
+ }
+
+-static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
++static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr)
+ {
+- struct nfsd4_copy *cb_copy;
++ struct nfsd4_cb_offload *cbo;
+
+- cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+- if (!cb_copy)
++ cbo = kzalloc(sizeof(*cbo), GFP_KERNEL);
++ if (!cbo)
+ return;
+
+- refcount_set(&cb_copy->refcount, 1);
+- memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res));
+- cb_copy->cp_clp = copy->cp_clp;
+- cb_copy->nfserr = copy->nfserr;
+- memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh));
++ memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res));
++ memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh));
++ cbo->co_nfserr = nfserr;
+
+- nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp,
+- &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
+- trace_nfsd_cb_offload(copy->cp_clp, ©->cp_res.cb_stateid,
+- ©->fh, copy->cp_count, copy->nfserr);
+- nfsd4_run_cb(&cb_copy->cp_cb);
++ nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops,
++ NFSPROC4_CLNT_CB_OFFLOAD);
++ trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid,
++ &cbo->co_fh, copy->cp_count, nfserr);
++ nfsd4_run_cb(&cbo->co_cb);
+ }
+
+ /**
+@@ -1786,6 +1785,7 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
+ static int nfsd4_do_async_copy(void *data)
+ {
+ struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
++ __be32 nfserr;
+
+ if (nfsd4_ssc_is_inter(copy)) {
+ struct file *filp;
+@@ -1793,21 +1793,21 @@ static int nfsd4_do_async_copy(void *data)
+ filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh,
+ ©->stateid);
+ if (IS_ERR(filp)) {
+- copy->nfserr = nfserr_offload_denied;
++ nfserr = nfserr_offload_denied;
+ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
+- copy->nfserr = nfsd4_do_copy(copy, filp,
+- copy->nf_dst->nf_file, false);
++ nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
++ false);
+ nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst);
+ } else {
+- copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+- copy->nf_dst->nf_file, false);
++ nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
++ copy->nf_dst->nf_file, false);
+ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+ }
+
+ do_callback:
+- nfsd4_send_cb_offload(copy);
++ nfsd4_send_cb_offload(copy, nfserr);
+ cleanup_async_copy(copy);
+ return 0;
+ }
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 37d1b6d0486b3..adb9aef26d7f1 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -533,6 +533,13 @@ struct nfsd42_write_res {
+ stateid_t cb_stateid;
+ };
+
++struct nfsd4_cb_offload {
++ struct nfsd4_callback co_cb;
++ struct nfsd42_write_res co_res;
++ __be32 co_nfserr;
++ struct knfsd_fh co_fh;
++};
++
+ struct nfsd4_copy {
+ /* request */
+ stateid_t cp_src_stateid;
+@@ -550,10 +557,6 @@ struct nfsd4_copy {
+
+ /* response */
+ struct nfsd42_write_res cp_res;
+-
+- /* for cb_offload */
+- struct nfsd4_callback cp_cb;
+- __be32 nfserr;
+ struct knfsd_fh fh;
+
+ struct nfs4_client *cp_clp;
+--
+2.43.0
+
--- /dev/null
+From 817c0bcd222e95057253eb6a90e9b6a7ea977418 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 14:19:23 -0700
+Subject: NFSD: move create/destroy of laundry_wq to init_nfsd and exit_nfsd
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit d76cc46b37e123e8d245cc3490978dbda56f979d ]
+
+This patch moves create/destroy of laundry_wq from nfs4_state_start
+and nfs4_state_shutdown_net to init_nfsd and exit_nfsd to prevent
+the laundromat from being freed while a thread is processing a
+conflicting lock.
+
+Reviewed-by: J. Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 28 ++++++++++++++++------------
+ fs/nfsd/nfsctl.c | 4 ++++
+ fs/nfsd/nfsd.h | 4 ++++
+ 3 files changed, 24 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 3dfdd9e1c5028..582c9c7ba60a8 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -127,6 +127,21 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+
+ static struct workqueue_struct *laundry_wq;
+
++int nfsd4_create_laundry_wq(void)
++{
++ int rc = 0;
++
++ laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
++ if (laundry_wq == NULL)
++ rc = -ENOMEM;
++ return rc;
++}
++
++void nfsd4_destroy_laundry_wq(void)
++{
++ destroy_workqueue(laundry_wq);
++}
++
+ static bool is_session_dead(struct nfsd4_session *ses)
+ {
+ return ses->se_flags & NFS4_SESSION_DEAD;
+@@ -7761,22 +7776,12 @@ nfs4_state_start(void)
+ {
+ int ret;
+
+- laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
+- if (laundry_wq == NULL) {
+- ret = -ENOMEM;
+- goto out;
+- }
+ ret = nfsd4_create_callback_queue();
+ if (ret)
+- goto out_free_laundry;
++ return ret;
+
+ set_max_delegations();
+ return 0;
+-
+-out_free_laundry:
+- destroy_workqueue(laundry_wq);
+-out:
+- return ret;
+ }
+
+ void
+@@ -7813,7 +7818,6 @@ nfs4_state_shutdown_net(struct net *net)
+ void
+ nfs4_state_shutdown(void)
+ {
+- destroy_workqueue(laundry_wq);
+ nfsd4_destroy_callback_queue();
+ }
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 16920e4512bde..322a208878f2c 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1542,6 +1542,9 @@ static int __init init_nfsd(void)
+ if (retval < 0)
+ goto out_free_filesystem;
+ retval = register_cld_notifier();
++ if (retval)
++ goto out_free_all;
++ retval = nfsd4_create_laundry_wq();
+ if (retval)
+ goto out_free_all;
+ return 0;
+@@ -1566,6 +1569,7 @@ static int __init init_nfsd(void)
+
+ static void __exit exit_nfsd(void)
+ {
++ nfsd4_destroy_laundry_wq();
+ unregister_cld_notifier();
+ unregister_pernet_subsys(&nfsd_net_ops);
+ nfsd_drc_slab_free();
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 23996c6ca75e3..847b482155ae9 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -162,6 +162,8 @@ void nfs4_state_shutdown_net(struct net *net);
+ int nfs4_reset_recoverydir(char *recdir);
+ char * nfs4_recoverydir(void);
+ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
++int nfsd4_create_laundry_wq(void);
++void nfsd4_destroy_laundry_wq(void);
+ #else
+ static inline int nfsd4_init_slabs(void) { return 0; }
+ static inline void nfsd4_free_slabs(void) { }
+@@ -175,6 +177,8 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
+ {
+ return false;
+ }
++static inline int nfsd4_create_laundry_wq(void) { return 0; };
++static inline void nfsd4_destroy_laundry_wq(void) {};
+ #endif
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 9920cb84df2a80154b25bad5eec180f511dea8f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Mar 2022 13:55:37 -0400
+Subject: NFSD: Move documenting comment for nfsd4_process_open2()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 7e2ce0cc15a509b859199235a2bad9cece00f67a ]
+
+Clean up nfsd4_open() by converting a large comment at the only
+call site for nfsd4_process_open2() to a kerneldoc comment in
+front of that function.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 6 +-----
+ fs/nfsd/nfs4state.c | 12 ++++++++++++
+ 2 files changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 6a9c7e6a23ba5..3ac2978c596ae 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -628,11 +628,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfserr_inval;
+ goto out;
+ }
+- /*
+- * nfsd4_process_open2() does the actual opening of the file. If
+- * successful, it (1) truncates the file if open->op_truncate was
+- * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+- */
++
+ status = nfsd4_process_open2(rqstp, resfh, open);
+ WARN(status && open->op_created,
+ "nfsd4_process_open2 failed to open newly-created file! status=%u\n",
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index f9681a4d116ad..d79b736019d49 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5465,6 +5465,18 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
+ */
+ }
+
++/**
++ * nfsd4_process_open2 - finish open processing
++ * @rqstp: the RPC transaction being executed
++ * @current_fh: NFSv4 COMPOUND's current filehandle
++ * @open: OPEN arguments
++ *
++ * If successful, (1) truncate the file if open->op_truncate was
++ * set, (2) set open->op_stateid, (3) set open->op_delegation.
++ *
++ * Returns %nfs_ok on success; otherwise an nfs4stat value in
++ * network byte order is returned.
++ */
+ __be32
+ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+ {
+--
+2.43.0
+
--- /dev/null
+From 9f526d4537f7b46d119df8206c32483f9dcac572 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Sep 2021 11:14:47 +1000
+Subject: NFSD: move filehandle format declarations out of "uapi".
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit ef5825e3cf0d0af657f5fb4dd86d750ed42fee0a ]
+
+A small part of the declaration concerning filehandle format are
+currently in the "uapi" include directory:
+ include/uapi/linux/nfsd/nfsfh.h
+
+There is a lot more to the filehandle format, including "enum fid_type"
+and "enum nfsd_fsid" which are not exported via "uapi".
+
+This small part of the filehandle definition is of minimal use outside
+of the kernel, and I can find no evidence that an other code is using
+it. Certainly nfs-utils and wireshark (The most likely candidates) do not
+use these declarations.
+
+So move it out of "uapi" by copying the content from
+ include/uapi/linux/nfsd/nfsfh.h
+into
+ fs/nfsd/nfsfh.h
+
+A few unnecessary "#include" directives are not copied, and neither is
+the #define of fh_auth, which is annotated as being for userspace only.
+
+The copyright claims in the uapi file are identical to those in the nfsd
+file, so there is no need to copy those.
+
+The "__u32" style integer types are only needed in "uapi". In
+kernel-only code we can use the more familiar "u32" style.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsfh.h | 97 ++++++++++++++++++++++++++-
+ fs/nfsd/vfs.c | 1 +
+ include/uapi/linux/nfsd/nfsfh.h | 115 --------------------------------
+ 3 files changed, 97 insertions(+), 116 deletions(-)
+ delete mode 100644 include/uapi/linux/nfsd/nfsfh.h
+
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index 6106697adc04b..ad47f16676a8c 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -10,9 +10,104 @@
+
+ #include <linux/crc32.h>
+ #include <linux/sunrpc/svc.h>
+-#include <uapi/linux/nfsd/nfsfh.h>
+ #include <linux/iversion.h>
+ #include <linux/exportfs.h>
++#include <linux/nfs4.h>
++
++
++/*
++ * This is the old "dentry style" Linux NFSv2 file handle.
++ *
++ * The xino and xdev fields are currently used to transport the
++ * ino/dev of the exported inode.
++ */
++struct nfs_fhbase_old {
++ u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */
++ u32 fb_ino; /* our inode number */
++ u32 fb_dirino; /* dir inode number, 0 for directories */
++ u32 fb_dev; /* our device */
++ u32 fb_xdev;
++ u32 fb_xino;
++ u32 fb_generation;
++};
++
++/*
++ * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
++ *
++ * The file handle starts with a sequence of four-byte words.
++ * The first word contains a version number (1) and three descriptor bytes
++ * that tell how the remaining 3 variable length fields should be handled.
++ * These three bytes are auth_type, fsid_type and fileid_type.
++ *
++ * All four-byte values are in host-byte-order.
++ *
++ * The auth_type field is deprecated and must be set to 0.
++ *
++ * The fsid_type identifies how the filesystem (or export point) is
++ * encoded.
++ * Current values:
++ * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number
++ * NOTE: we cannot use the kdev_t device id value, because kdev_t.h
++ * says we mustn't. We must break it up and reassemble.
++ * 1 - 4 byte user specified identifier
++ * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
++ * 3 - 4 byte device id, encoded for user-space, 4 byte inode number
++ * 4 - 4 byte inode number and 4 byte uuid
++ * 5 - 8 byte uuid
++ * 6 - 16 byte uuid
++ * 7 - 8 byte inode number and 16 byte uuid
++ *
++ * The fileid_type identified how the file within the filesystem is encoded.
++ * The values for this field are filesystem specific, exccept that
++ * filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
++ * in include/linux/exportfs.h for currently registered values.
++ */
++struct nfs_fhbase_new {
++ union {
++ struct {
++ u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */
++ u8 fb_auth_type_aux;
++ u8 fb_fsid_type_aux;
++ u8 fb_fileid_type_aux;
++ u32 fb_auth[1];
++ /* u32 fb_fsid[0]; floating */
++ /* u32 fb_fileid[0]; floating */
++ };
++ struct {
++ u8 fb_version; /* == 1, even => nfs_fhbase_old */
++ u8 fb_auth_type;
++ u8 fb_fsid_type;
++ u8 fb_fileid_type;
++ u32 fb_auth_flex[]; /* flexible-array member */
++ };
++ };
++};
++
++struct knfsd_fh {
++ unsigned int fh_size; /* significant for NFSv3.
++ * Points to the current size while building
++ * a new file handle
++ */
++ union {
++ struct nfs_fhbase_old fh_old;
++ u32 fh_pad[NFS4_FHSIZE/4];
++ struct nfs_fhbase_new fh_new;
++ } fh_base;
++};
++
++#define ofh_dcookie fh_base.fh_old.fb_dcookie
++#define ofh_ino fh_base.fh_old.fb_ino
++#define ofh_dirino fh_base.fh_old.fb_dirino
++#define ofh_dev fh_base.fh_old.fb_dev
++#define ofh_xdev fh_base.fh_old.fb_xdev
++#define ofh_xino fh_base.fh_old.fb_xino
++#define ofh_generation fh_base.fh_old.fb_generation
++
++#define fh_version fh_base.fh_new.fb_version
++#define fh_fsid_type fh_base.fh_new.fb_fsid_type
++#define fh_auth_type fh_base.fh_new.fb_auth_type
++#define fh_fileid_type fh_base.fh_new.fb_fileid_type
++#define fh_fsid fh_base.fh_new.fb_auth_flex
+
+ static inline __u32 ino_t_to_u32(ino_t ino)
+ {
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index c39b8a6538042..24a5b5cfcfb03 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -244,6 +244,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ * clients and is explicitly disallowed for NFSv3
++ * NeilBrown <neilb@cse.unsw.edu.au>
+ */
+ __be32
+ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
+deleted file mode 100644
+index e29e8accc4f4d..0000000000000
+--- a/include/uapi/linux/nfsd/nfsfh.h
++++ /dev/null
+@@ -1,115 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+-/*
+- * This file describes the layout of the file handles as passed
+- * over the wire.
+- *
+- * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+- */
+-
+-#ifndef _UAPI_LINUX_NFSD_FH_H
+-#define _UAPI_LINUX_NFSD_FH_H
+-
+-#include <linux/types.h>
+-#include <linux/nfs.h>
+-#include <linux/nfs2.h>
+-#include <linux/nfs3.h>
+-#include <linux/nfs4.h>
+-
+-/*
+- * This is the old "dentry style" Linux NFSv2 file handle.
+- *
+- * The xino and xdev fields are currently used to transport the
+- * ino/dev of the exported inode.
+- */
+-struct nfs_fhbase_old {
+- __u32 fb_dcookie; /* dentry cookie - always 0xfeebbaca */
+- __u32 fb_ino; /* our inode number */
+- __u32 fb_dirino; /* dir inode number, 0 for directories */
+- __u32 fb_dev; /* our device */
+- __u32 fb_xdev;
+- __u32 fb_xino;
+- __u32 fb_generation;
+-};
+-
+-/*
+- * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
+- *
+- * The file handle starts with a sequence of four-byte words.
+- * The first word contains a version number (1) and three descriptor bytes
+- * that tell how the remaining 3 variable length fields should be handled.
+- * These three bytes are auth_type, fsid_type and fileid_type.
+- *
+- * All four-byte values are in host-byte-order.
+- *
+- * The auth_type field is deprecated and must be set to 0.
+- *
+- * The fsid_type identifies how the filesystem (or export point) is
+- * encoded.
+- * Current values:
+- * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number
+- * NOTE: we cannot use the kdev_t device id value, because kdev_t.h
+- * says we mustn't. We must break it up and reassemble.
+- * 1 - 4 byte user specified identifier
+- * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
+- * 3 - 4 byte device id, encoded for user-space, 4 byte inode number
+- * 4 - 4 byte inode number and 4 byte uuid
+- * 5 - 8 byte uuid
+- * 6 - 16 byte uuid
+- * 7 - 8 byte inode number and 16 byte uuid
+- *
+- * The fileid_type identified how the file within the filesystem is encoded.
+- * The values for this field are filesystem specific, exccept that
+- * filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+- * in include/linux/exportfs.h for currently registered values.
+- */
+-struct nfs_fhbase_new {
+- union {
+- struct {
+- __u8 fb_version_aux; /* == 1, even => nfs_fhbase_old */
+- __u8 fb_auth_type_aux;
+- __u8 fb_fsid_type_aux;
+- __u8 fb_fileid_type_aux;
+- __u32 fb_auth[1];
+- /* __u32 fb_fsid[0]; floating */
+- /* __u32 fb_fileid[0]; floating */
+- };
+- struct {
+- __u8 fb_version; /* == 1, even => nfs_fhbase_old */
+- __u8 fb_auth_type;
+- __u8 fb_fsid_type;
+- __u8 fb_fileid_type;
+- __u32 fb_auth_flex[]; /* flexible-array member */
+- };
+- };
+-};
+-
+-struct knfsd_fh {
+- unsigned int fh_size; /* significant for NFSv3.
+- * Points to the current size while building
+- * a new file handle
+- */
+- union {
+- struct nfs_fhbase_old fh_old;
+- __u32 fh_pad[NFS4_FHSIZE/4];
+- struct nfs_fhbase_new fh_new;
+- } fh_base;
+-};
+-
+-#define ofh_dcookie fh_base.fh_old.fb_dcookie
+-#define ofh_ino fh_base.fh_old.fb_ino
+-#define ofh_dirino fh_base.fh_old.fb_dirino
+-#define ofh_dev fh_base.fh_old.fb_dev
+-#define ofh_xdev fh_base.fh_old.fb_xdev
+-#define ofh_xino fh_base.fh_old.fb_xino
+-#define ofh_generation fh_base.fh_old.fb_generation
+-
+-#define fh_version fh_base.fh_new.fb_version
+-#define fh_fsid_type fh_base.fh_new.fb_fsid_type
+-#define fh_auth_type fh_base.fh_new.fb_auth_type
+-#define fh_fileid_type fh_base.fh_new.fb_fileid_type
+-#define fh_fsid fh_base.fh_new.fb_auth_flex
+-
+-/* Do not use, provided for userspace compatiblity. */
+-#define fh_auth fh_base.fh_new.fb_auth
+-
+-#endif /* _UAPI_LINUX_NFSD_FH_H */
+--
+2.43.0
+
--- /dev/null
+From bbbee79d363a4cdd606e56b7f707784ab59cd959 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Dec 2021 14:36:49 -0500
+Subject: NFSD: Move fill_pre_wcc() and fill_post_wcc()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit fcb5e3fa012351f3b96024c07bc44834c2478213 ]
+
+These functions are related to file handle processing and have
+nothing to do with XDR encoding or decoding. Also they are no longer
+NFSv3-specific. As a clean-up, move their definitions to a more
+appropriate location. WCC is also an NFSv3-specific term, so rename
+them as general-purpose helpers.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3xdr.c | 55 --------------------------------------
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/nfsfh.c | 66 +++++++++++++++++++++++++++++++++++++++++++++-
+ fs/nfsd/nfsfh.h | 40 ++++++++++++++++++----------
+ fs/nfsd/vfs.c | 8 +++---
+ 5 files changed, 96 insertions(+), 75 deletions(-)
+
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 48e8a02ebc83b..2e47a07029f1d 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -487,61 +487,6 @@ svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ return true;
+ }
+
+-/*
+- * Fill in the pre_op attr for the wcc data
+- */
+-void fill_pre_wcc(struct svc_fh *fhp)
+-{
+- struct inode *inode;
+- struct kstat stat;
+- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+- __be32 err;
+-
+- if (fhp->fh_no_wcc || fhp->fh_pre_saved)
+- return;
+- inode = d_inode(fhp->fh_dentry);
+- err = fh_getattr(fhp, &stat);
+- if (err) {
+- /* Grab the times from inode anyway */
+- stat.mtime = inode->i_mtime;
+- stat.ctime = inode->i_ctime;
+- stat.size = inode->i_size;
+- }
+- if (v4)
+- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+-
+- fhp->fh_pre_mtime = stat.mtime;
+- fhp->fh_pre_ctime = stat.ctime;
+- fhp->fh_pre_size = stat.size;
+- fhp->fh_pre_saved = true;
+-}
+-
+-/*
+- * Fill in the post_op attr for the wcc data
+- */
+-void fill_post_wcc(struct svc_fh *fhp)
+-{
+- bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+- struct inode *inode = d_inode(fhp->fh_dentry);
+- __be32 err;
+-
+- if (fhp->fh_no_wcc)
+- return;
+-
+- if (fhp->fh_post_saved)
+- printk("nfsd: inode locked twice during operation.\n");
+-
+- err = fh_getattr(fhp, &fhp->fh_post_attr);
+- if (err) {
+- fhp->fh_post_saved = false;
+- fhp->fh_post_attr.ctime = inode->i_ctime;
+- } else
+- fhp->fh_post_saved = true;
+- if (v4)
+- fhp->fh_post_change =
+- nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+-}
+-
+ /*
+ * XDR decode functions
+ */
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index a8ad7e6ace927..73c62561580a1 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2536,7 +2536,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ goto encode_op;
+ }
+
+- fh_clear_wcc(current_fh);
++ fh_clear_pre_post_attrs(current_fh);
+
+ /* If op is non-idempotent */
+ if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index f3779fa72c896..145208bcb9bd4 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -611,6 +611,70 @@ fh_update(struct svc_fh *fhp)
+ return nfserr_serverfault;
+ }
+
++#ifdef CONFIG_NFSD_V3
++
++/**
++ * fh_fill_pre_attrs - Fill in pre-op attributes
++ * @fhp: file handle to be updated
++ *
++ */
++void fh_fill_pre_attrs(struct svc_fh *fhp)
++{
++ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
++ struct inode *inode;
++ struct kstat stat;
++ __be32 err;
++
++ if (fhp->fh_no_wcc || fhp->fh_pre_saved)
++ return;
++
++ inode = d_inode(fhp->fh_dentry);
++ err = fh_getattr(fhp, &stat);
++ if (err) {
++ /* Grab the times from inode anyway */
++ stat.mtime = inode->i_mtime;
++ stat.ctime = inode->i_ctime;
++ stat.size = inode->i_size;
++ }
++ if (v4)
++ fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
++
++ fhp->fh_pre_mtime = stat.mtime;
++ fhp->fh_pre_ctime = stat.ctime;
++ fhp->fh_pre_size = stat.size;
++ fhp->fh_pre_saved = true;
++}
++
++/**
++ * fh_fill_post_attrs - Fill in post-op attributes
++ * @fhp: file handle to be updated
++ *
++ */
++void fh_fill_post_attrs(struct svc_fh *fhp)
++{
++ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
++ struct inode *inode = d_inode(fhp->fh_dentry);
++ __be32 err;
++
++ if (fhp->fh_no_wcc)
++ return;
++
++ if (fhp->fh_post_saved)
++ printk("nfsd: inode locked twice during operation.\n");
++
++ err = fh_getattr(fhp, &fhp->fh_post_attr);
++ if (err) {
++ fhp->fh_post_saved = false;
++ fhp->fh_post_attr.ctime = inode->i_ctime;
++ } else
++ fhp->fh_post_saved = true;
++ if (v4)
++ fhp->fh_post_change =
++ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
++}
++
++#endif /* CONFIG_NFSD_V3 */
++
+ /*
+ * Release a file handle.
+ */
+@@ -623,7 +687,7 @@ fh_put(struct svc_fh *fhp)
+ fh_unlock(fhp);
+ fhp->fh_dentry = NULL;
+ dput(dentry);
+- fh_clear_wcc(fhp);
++ fh_clear_pre_post_attrs(fhp);
+ }
+ fh_drop_write(fhp);
+ if (exp) {
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index d11e4b6870d68..434930d8a946e 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -284,12 +284,13 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
+ #endif
+
+ #ifdef CONFIG_NFSD_V3
+-/*
+- * The wcc data stored in current_fh should be cleared
+- * between compound ops.
++
++/**
++ * fh_clear_pre_post_attrs - Reset pre/post attributes
++ * @fhp: file handle to be updated
++ *
+ */
+-static inline void
+-fh_clear_wcc(struct svc_fh *fhp)
++static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
+ {
+ fhp->fh_post_saved = false;
+ fhp->fh_pre_saved = false;
+@@ -323,13 +324,24 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
+ return time_to_chattr(&stat->ctime);
+ }
+
+-extern void fill_pre_wcc(struct svc_fh *fhp);
+-extern void fill_post_wcc(struct svc_fh *fhp);
+-#else
+-#define fh_clear_wcc(ignored)
+-#define fill_pre_wcc(ignored)
+-#define fill_post_wcc(notused)
+-#endif /* CONFIG_NFSD_V3 */
++extern void fh_fill_pre_attrs(struct svc_fh *fhp);
++extern void fh_fill_post_attrs(struct svc_fh *fhp);
++
++#else /* !CONFIG_NFSD_V3 */
++
++static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
++{
++}
++
++static inline void fh_fill_pre_attrs(struct svc_fh *fhp)
++{
++}
++
++static inline void fh_fill_post_attrs(struct svc_fh *fhp)
++{
++}
++
++#endif /* !CONFIG_NFSD_V3 */
+
+
+ /*
+@@ -355,7 +367,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+
+ inode = d_inode(dentry);
+ inode_lock_nested(inode, subclass);
+- fill_pre_wcc(fhp);
++ fh_fill_pre_attrs(fhp);
+ fhp->fh_locked = true;
+ }
+
+@@ -372,7 +384,7 @@ static inline void
+ fh_unlock(struct svc_fh *fhp)
+ {
+ if (fhp->fh_locked) {
+- fill_post_wcc(fhp);
++ fh_fill_post_attrs(fhp);
+ inode_unlock(d_inode(fhp->fh_dentry));
+ fhp->fh_locked = false;
+ }
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 284dc900d10ba..4d07a506164b0 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1793,8 +1793,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+ * so do it by hand */
+ trap = lock_rename(tdentry, fdentry);
+ ffhp->fh_locked = tfhp->fh_locked = true;
+- fill_pre_wcc(ffhp);
+- fill_pre_wcc(tfhp);
++ fh_fill_pre_attrs(ffhp);
++ fh_fill_pre_attrs(tfhp);
+
+ odentry = lookup_one_len(fname, fdentry, flen);
+ host_err = PTR_ERR(odentry);
+@@ -1848,8 +1848,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+ * were the same, so again we do it by hand.
+ */
+ if (!close_cached) {
+- fill_post_wcc(ffhp);
+- fill_post_wcc(tfhp);
++ fh_fill_post_attrs(ffhp);
++ fh_fill_post_attrs(tfhp);
+ }
+ unlock_rename(tdentry, fdentry);
+ ffhp->fh_locked = tfhp->fh_locked = false;
+--
+2.43.0
+
--- /dev/null
+From cdc20378f20a642d3c0b46095359c02d0af5c168 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 Aug 2022 23:01:14 +0200
+Subject: NFSD: move from strlcpy with unused retval to strscpy
+
+From: Wolfram Sang <wsa+renesas@sang-engineering.com>
+
+[ Upstream commit 72f78ae00a8e5d7abe13abac8305a300f6afd74b ]
+
+Follow the advice of the below link and prefer 'strscpy' in this
+subsystem. Conversion is 1:1 because the return value is not used.
+Generated by a coccinelle script.
+
+Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/
+Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4idmap.c | 8 ++++----
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/nfssvc.c | 2 +-
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
+index f92161ce1f97d..e70a1a2999b7b 100644
+--- a/fs/nfsd/nfs4idmap.c
++++ b/fs/nfsd/nfs4idmap.c
+@@ -82,8 +82,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
+ new->id = itm->id;
+ new->type = itm->type;
+
+- strlcpy(new->name, itm->name, sizeof(new->name));
+- strlcpy(new->authname, itm->authname, sizeof(new->authname));
++ strscpy(new->name, itm->name, sizeof(new->name));
++ strscpy(new->authname, itm->authname, sizeof(new->authname));
+ }
+
+ static void
+@@ -548,7 +548,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
+ return nfserr_badowner;
+ memcpy(key.name, name, namelen);
+ key.name[namelen] = '\0';
+- strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
++ strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item);
+ if (ret == -ENOENT)
+ return nfserr_badowner;
+@@ -584,7 +584,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr,
+ int ret;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+- strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
++ strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
+ if (ret == -ENOENT)
+ return encode_ascii_id(xdr, id);
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 5e4b7858b2e50..b2e6fa962f7d9 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1346,7 +1346,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ return 0;
+ }
+ if (work) {
+- strlcpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
++ strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
+ refcount_set(&work->nsui_refcnt, 2);
+ work->nsui_busy = true;
+ list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 011c556caa1e7..8b1afde192118 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -799,7 +799,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+ if (nrservs == 0 && nn->nfsd_serv == NULL)
+ goto out;
+
+- strlcpy(nn->nfsd_name, utsname()->nodename,
++ strscpy(nn->nfsd_name, utsname()->nodename,
+ sizeof(nn->nfsd_name));
+
+ error = nfsd_create_serv(net);
+--
+2.43.0
+
--- /dev/null
+From 9774597631ef3425f5fe48819a1958d458c7ff95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:49 -0400
+Subject: NFSD: Move nfsd_file_trace_alloc() tracepoint
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit b40a2839470cd62ed68c4a32d72a18ee8975b1ac ]
+
+Avoid recording the allocation of an nfsd_file item that is
+immediately released because a matching item was already
+inserted in the hash.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 2 +-
+ fs/nfsd/trace.h | 25 ++++++++++++++++++++++++-
+ 2 files changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 85813affb8abf..26cfae138b906 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -302,7 +302,6 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+ refcount_set(&nf->nf_ref, 2);
+ nf->nf_may = key->need;
+ nf->nf_mark = NULL;
+- trace_nfsd_file_alloc(nf);
+ }
+ return nf;
+ }
+@@ -1125,6 +1124,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ return status;
+
+ open_file:
++ trace_nfsd_file_alloc(nf);
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+ if (nf->nf_mark) {
+ if (open) {
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 12dbc190e6595..c824ab30a758e 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -773,12 +773,35 @@ DEFINE_EVENT(nfsd_file_class, name, \
+ TP_PROTO(struct nfsd_file *nf), \
+ TP_ARGS(nf))
+
+-DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose);
+
++TRACE_EVENT(nfsd_file_alloc,
++ TP_PROTO(
++ const struct nfsd_file *nf
++ ),
++ TP_ARGS(nf),
++ TP_STRUCT__entry(
++ __field(const void *, nf_inode)
++ __field(unsigned long, nf_flags)
++ __field(unsigned long, nf_may)
++ __field(unsigned int, nf_ref)
++ ),
++ TP_fast_assign(
++ __entry->nf_inode = nf->nf_inode;
++ __entry->nf_flags = nf->nf_flags;
++ __entry->nf_ref = refcount_read(&nf->nf_ref);
++ __entry->nf_may = nf->nf_may;
++ ),
++ TP_printk("inode=%p ref=%u flags=%s may=%s",
++ __entry->nf_inode, __entry->nf_ref,
++ show_nf_flags(__entry->nf_flags),
++ show_nfsd_may_flags(__entry->nf_may)
++ )
++);
++
+ TRACE_EVENT(nfsd_file_acquire,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+--
+2.43.0
+
--- /dev/null
+From 4ea89f0e2fb932659df46ce86c85d76f83d421ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 07:47:55 -0400
+Subject: nfsd: move nfserrno() to vfs.c
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit cb12fae1c34b1fa7eaae92c5aadc72d86d7fae19 ]
+
+nfserrno() is common to all nfs versions, but nfsproc.c is specifically
+for NFSv2. Move it to vfs.c, and the prototype to vfs.h.
+
+While we're in here, remove the #ifdef EDQUOT check in this function.
+It's apparently a holdover from the initial merge of the nfsd code in
+1997. No other place in the kernel checks that that symbol is defined
+before using it, so I think we can dispense with it here.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/blocklayout.c | 1 +
+ fs/nfsd/blocklayoutxdr.c | 1 +
+ fs/nfsd/export.h | 1 -
+ fs/nfsd/flexfilelayout.c | 1 +
+ fs/nfsd/nfs4idmap.c | 1 +
+ fs/nfsd/nfsproc.c | 62 ---------------------------------------
+ fs/nfsd/vfs.c | 63 ++++++++++++++++++++++++++++++++++++++++
+ fs/nfsd/vfs.h | 1 +
+ 8 files changed, 68 insertions(+), 63 deletions(-)
+
+diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
+index c99dee99a3c15..0ddd20cb68064 100644
+--- a/fs/nfsd/blocklayout.c
++++ b/fs/nfsd/blocklayout.c
+@@ -16,6 +16,7 @@
+ #include "blocklayoutxdr.h"
+ #include "pnfs.h"
+ #include "filecache.h"
++#include "vfs.h"
+
+ #define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
+index 2455dc8be18a8..1ed2f691ebb90 100644
+--- a/fs/nfsd/blocklayoutxdr.c
++++ b/fs/nfsd/blocklayoutxdr.c
+@@ -9,6 +9,7 @@
+
+ #include "nfsd.h"
+ #include "blocklayoutxdr.h"
++#include "vfs.h"
+
+ #define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
+index ee0e3aba4a6e5..d03f7f6a8642d 100644
+--- a/fs/nfsd/export.h
++++ b/fs/nfsd/export.h
+@@ -115,7 +115,6 @@ struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *);
+ int exp_rootfh(struct net *, struct auth_domain *,
+ char *path, struct knfsd_fh *, int maxsize);
+ __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
+-__be32 nfserrno(int errno);
+
+ static inline void exp_put(struct svc_export *exp)
+ {
+diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
+index 2e2f1d5e9f623..fabc21ed68cea 100644
+--- a/fs/nfsd/flexfilelayout.c
++++ b/fs/nfsd/flexfilelayout.c
+@@ -15,6 +15,7 @@
+
+ #include "flexfilelayoutxdr.h"
+ #include "pnfs.h"
++#include "vfs.h"
+
+ #define NFSDDBG_FACILITY NFSDDBG_PNFS
+
+diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
+index e70a1a2999b7b..5e9809aff37eb 100644
+--- a/fs/nfsd/nfs4idmap.c
++++ b/fs/nfsd/nfs4idmap.c
+@@ -41,6 +41,7 @@
+ #include "idmap.h"
+ #include "nfsd.h"
+ #include "netns.h"
++#include "vfs.h"
+
+ /*
+ * Turn off idmapping when using AUTH_SYS.
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 82b3ddeacc338..52fc222c34f26 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -848,65 +848,3 @@ const struct svc_version nfsd_version2 = {
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS2_SVC_XDRSIZE,
+ };
+-
+-/*
+- * Map errnos to NFS errnos.
+- */
+-__be32
+-nfserrno (int errno)
+-{
+- static struct {
+- __be32 nfserr;
+- int syserr;
+- } nfs_errtbl[] = {
+- { nfs_ok, 0 },
+- { nfserr_perm, -EPERM },
+- { nfserr_noent, -ENOENT },
+- { nfserr_io, -EIO },
+- { nfserr_nxio, -ENXIO },
+- { nfserr_fbig, -E2BIG },
+- { nfserr_stale, -EBADF },
+- { nfserr_acces, -EACCES },
+- { nfserr_exist, -EEXIST },
+- { nfserr_xdev, -EXDEV },
+- { nfserr_mlink, -EMLINK },
+- { nfserr_nodev, -ENODEV },
+- { nfserr_notdir, -ENOTDIR },
+- { nfserr_isdir, -EISDIR },
+- { nfserr_inval, -EINVAL },
+- { nfserr_fbig, -EFBIG },
+- { nfserr_nospc, -ENOSPC },
+- { nfserr_rofs, -EROFS },
+- { nfserr_mlink, -EMLINK },
+- { nfserr_nametoolong, -ENAMETOOLONG },
+- { nfserr_notempty, -ENOTEMPTY },
+-#ifdef EDQUOT
+- { nfserr_dquot, -EDQUOT },
+-#endif
+- { nfserr_stale, -ESTALE },
+- { nfserr_jukebox, -ETIMEDOUT },
+- { nfserr_jukebox, -ERESTARTSYS },
+- { nfserr_jukebox, -EAGAIN },
+- { nfserr_jukebox, -EWOULDBLOCK },
+- { nfserr_jukebox, -ENOMEM },
+- { nfserr_io, -ETXTBSY },
+- { nfserr_notsupp, -EOPNOTSUPP },
+- { nfserr_toosmall, -ETOOSMALL },
+- { nfserr_serverfault, -ESERVERFAULT },
+- { nfserr_serverfault, -ENFILE },
+- { nfserr_io, -EREMOTEIO },
+- { nfserr_stale, -EOPENSTALE },
+- { nfserr_io, -EUCLEAN },
+- { nfserr_perm, -ENOKEY },
+- { nfserr_no_grace, -ENOGRACE},
+- };
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
+- if (nfs_errtbl[i].syserr == errno)
+- return nfs_errtbl[i].nfserr;
+- }
+- WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
+- return nfserr_io;
+-}
+-
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 88a2ad962a055..70a967789a611 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -48,6 +48,69 @@
+
+ #define NFSDDBG_FACILITY NFSDDBG_FILEOP
+
++/**
++ * nfserrno - Map Linux errnos to NFS errnos
++ * @errno: POSIX(-ish) error code to be mapped
++ *
++ * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If
++ * it's an error we don't expect, log it once and return nfserr_io.
++ */
++__be32
++nfserrno (int errno)
++{
++ static struct {
++ __be32 nfserr;
++ int syserr;
++ } nfs_errtbl[] = {
++ { nfs_ok, 0 },
++ { nfserr_perm, -EPERM },
++ { nfserr_noent, -ENOENT },
++ { nfserr_io, -EIO },
++ { nfserr_nxio, -ENXIO },
++ { nfserr_fbig, -E2BIG },
++ { nfserr_stale, -EBADF },
++ { nfserr_acces, -EACCES },
++ { nfserr_exist, -EEXIST },
++ { nfserr_xdev, -EXDEV },
++ { nfserr_mlink, -EMLINK },
++ { nfserr_nodev, -ENODEV },
++ { nfserr_notdir, -ENOTDIR },
++ { nfserr_isdir, -EISDIR },
++ { nfserr_inval, -EINVAL },
++ { nfserr_fbig, -EFBIG },
++ { nfserr_nospc, -ENOSPC },
++ { nfserr_rofs, -EROFS },
++ { nfserr_mlink, -EMLINK },
++ { nfserr_nametoolong, -ENAMETOOLONG },
++ { nfserr_notempty, -ENOTEMPTY },
++ { nfserr_dquot, -EDQUOT },
++ { nfserr_stale, -ESTALE },
++ { nfserr_jukebox, -ETIMEDOUT },
++ { nfserr_jukebox, -ERESTARTSYS },
++ { nfserr_jukebox, -EAGAIN },
++ { nfserr_jukebox, -EWOULDBLOCK },
++ { nfserr_jukebox, -ENOMEM },
++ { nfserr_io, -ETXTBSY },
++ { nfserr_notsupp, -EOPNOTSUPP },
++ { nfserr_toosmall, -ETOOSMALL },
++ { nfserr_serverfault, -ESERVERFAULT },
++ { nfserr_serverfault, -ENFILE },
++ { nfserr_io, -EREMOTEIO },
++ { nfserr_stale, -EOPENSTALE },
++ { nfserr_io, -EUCLEAN },
++ { nfserr_perm, -ENOKEY },
++ { nfserr_no_grace, -ENOGRACE},
++ };
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
++ if (nfs_errtbl[i].syserr == errno)
++ return nfs_errtbl[i].nfserr;
++ }
++ WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
++ return nfserr_io;
++}
++
+ /*
+ * Called from nfsd_lookup and encode_dirent. Check if we have crossed
+ * a mount point.
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 120521bc7b247..8ddd687f83599 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -60,6 +60,7 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
+ posix_acl_release(attrs->na_dpacl);
+ }
+
++__be32 nfserrno (int errno);
+ int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp);
+ __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+--
+2.43.0
+
--- /dev/null
+From 026255b4acd23ff7758be0d33ba77f541a4e8ce6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Feb 2022 12:16:27 -0500
+Subject: NFSD: Move svc_serv_ops::svo_function into struct svc_serv
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 37902c6313090235c847af89c5515591261ee338 ]
+
+Hoist svo_function back into svc_serv and remove struct
+svc_serv_ops, since the struct is now devoid of fields.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 6 +-----
+ fs/nfs/callback.c | 43 ++++++++++----------------------------
+ fs/nfsd/nfssvc.c | 7 +------
+ include/linux/sunrpc/svc.h | 14 ++++---------
+ net/sunrpc/svc.c | 37 ++++++++++++++++++++++----------
+ 5 files changed, 43 insertions(+), 64 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index bfde31124f3af..59ef8a1f843f3 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -349,10 +349,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
+ };
+ #endif
+
+-static const struct svc_serv_ops lockd_sv_ops = {
+- .svo_function = lockd,
+-};
+-
+ static int lockd_get(void)
+ {
+ struct svc_serv *serv;
+@@ -376,7 +372,7 @@ static int lockd_get(void)
+ nlm_timeout = LOCKD_DFLT_TIMEO;
+ nlmsvc_timeout = nlm_timeout * HZ;
+
+- serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops);
++ serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd);
+ if (!serv) {
+ printk(KERN_WARNING "lockd_up: create service failed\n");
+ return -ENOMEM;
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index a494f9e7bd0a0..456af7d230cf1 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -231,29 +231,10 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+ return ret;
+ }
+
+-static const struct svc_serv_ops nfs40_cb_sv_ops = {
+- .svo_function = nfs4_callback_svc,
+-};
+-#if defined(CONFIG_NFS_V4_1)
+-static const struct svc_serv_ops nfs41_cb_sv_ops = {
+- .svo_function = nfs41_callback_svc,
+-};
+-
+-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+- [0] = &nfs40_cb_sv_ops,
+- [1] = &nfs41_cb_sv_ops,
+-};
+-#else
+-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+- [0] = &nfs40_cb_sv_ops,
+- [1] = NULL,
+-};
+-#endif
+-
+ static struct svc_serv *nfs_callback_create_svc(int minorversion)
+ {
+ struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
+- const struct svc_serv_ops *sv_ops;
++ int (*threadfn)(void *data);
+ struct svc_serv *serv;
+
+ /*
+@@ -262,17 +243,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
+ if (cb_info->serv)
+ return svc_get(cb_info->serv);
+
+- switch (minorversion) {
+- case 0:
+- sv_ops = nfs4_cb_sv_ops[0];
+- break;
+- default:
+- sv_ops = nfs4_cb_sv_ops[1];
+- }
+-
+- if (sv_ops == NULL)
+- return ERR_PTR(-ENOTSUPP);
+-
+ /*
+ * Sanity check: if there's no task,
+ * we should be the first user ...
+@@ -281,7 +251,16 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
+ printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
+ cb_info->users);
+
+- serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
++ threadfn = nfs4_callback_svc;
++#if defined(CONFIG_NFS_V4_1)
++ if (minorversion)
++ threadfn = nfs41_callback_svc;
++#else
++ if (minorversion)
++ return ERR_PTR(-ENOTSUPP);
++#endif
++ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
++ threadfn);
+ if (!serv) {
+ printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
+ return ERR_PTR(-ENOMEM);
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index d25d4c12a499a..2f74be98ff2d9 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -612,10 +612,6 @@ static int nfsd_get_default_max_blksize(void)
+ return ret;
+ }
+
+-static const struct svc_serv_ops nfsd_thread_sv_ops = {
+- .svo_function = nfsd,
+-};
+-
+ void nfsd_shutdown_threads(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+@@ -654,8 +650,7 @@ int nfsd_create_serv(struct net *net)
+ if (nfsd_max_blksize == 0)
+ nfsd_max_blksize = nfsd_get_default_max_blksize();
+ nfsd_reset_versions(nn);
+- serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+- &nfsd_thread_sv_ops);
++ serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
+ if (serv == NULL)
+ return -ENOMEM;
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 61768495354a0..1d9a81bab3fa2 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -52,13 +52,6 @@ struct svc_pool {
+ unsigned long sp_flags;
+ } ____cacheline_aligned_in_smp;
+
+-struct svc_serv;
+-
+-struct svc_serv_ops {
+- /* function for service threads to run */
+- int (*svo_function)(void *);
+-};
+-
+ /*
+ * RPC service.
+ *
+@@ -91,7 +84,8 @@ struct svc_serv {
+
+ unsigned int sv_nrpools; /* number of thread pools */
+ struct svc_pool * sv_pools; /* array of thread pools */
+- const struct svc_serv_ops *sv_ops; /* server operations */
++ int (*sv_threadfn)(void *data);
++
+ #if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ struct list_head sv_cb_list; /* queue for callback requests
+ * that arrive over the same
+@@ -494,7 +488,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
+ void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
+ int svc_bind(struct svc_serv *serv, struct net *net);
+ struct svc_serv *svc_create(struct svc_program *, unsigned int,
+- const struct svc_serv_ops *);
++ int (*threadfn)(void *data));
+ struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
+ struct svc_pool *pool, int node);
+ void svc_rqst_replace_page(struct svc_rqst *rqstp,
+@@ -502,7 +496,7 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp,
+ void svc_rqst_free(struct svc_rqst *);
+ void svc_exit_thread(struct svc_rqst *);
+ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+- const struct svc_serv_ops *);
++ int (*threadfn)(void *data));
+ int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
+ int svc_process(struct svc_rqst *);
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 239d10018216a..87da3ff46ce9a 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -448,7 +448,7 @@ __svc_init_bc(struct svc_serv *serv)
+ */
+ static struct svc_serv *
+ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+- const struct svc_serv_ops *ops)
++ int (*threadfn)(void *data))
+ {
+ struct svc_serv *serv;
+ unsigned int vers;
+@@ -465,7 +465,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+ bufsize = RPCSVC_MAXPAYLOAD;
+ serv->sv_max_payload = bufsize? bufsize : 4096;
+ serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
+- serv->sv_ops = ops;
++ serv->sv_threadfn = threadfn;
+ xdrsize = 0;
+ while (prog) {
+ prog->pg_lovers = prog->pg_nvers-1;
+@@ -511,22 +511,37 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+ return serv;
+ }
+
+-struct svc_serv *
+-svc_create(struct svc_program *prog, unsigned int bufsize,
+- const struct svc_serv_ops *ops)
++/**
++ * svc_create - Create an RPC service
++ * @prog: the RPC program the new service will handle
++ * @bufsize: maximum message size for @prog
++ * @threadfn: a function to service RPC requests for @prog
++ *
++ * Returns an instantiated struct svc_serv object or NULL.
++ */
++struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
++ int (*threadfn)(void *data))
+ {
+- return __svc_create(prog, bufsize, /*npools*/1, ops);
++ return __svc_create(prog, bufsize, 1, threadfn);
+ }
+ EXPORT_SYMBOL_GPL(svc_create);
+
+-struct svc_serv *
+-svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
+- const struct svc_serv_ops *ops)
++/**
++ * svc_create_pooled - Create an RPC service with pooled threads
++ * @prog: the RPC program the new service will handle
++ * @bufsize: maximum message size for @prog
++ * @threadfn: a function to service RPC requests for @prog
++ *
++ * Returns an instantiated struct svc_serv object or NULL.
++ */
++struct svc_serv *svc_create_pooled(struct svc_program *prog,
++ unsigned int bufsize,
++ int (*threadfn)(void *data))
+ {
+ struct svc_serv *serv;
+ unsigned int npools = svc_pool_map_get();
+
+- serv = __svc_create(prog, bufsize, npools, ops);
++ serv = __svc_create(prog, bufsize, npools, threadfn);
+ if (!serv)
+ goto out_err;
+ return serv;
+@@ -736,7 +751,7 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ if (IS_ERR(rqstp))
+ return PTR_ERR(rqstp);
+
+- task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
++ task = kthread_create_on_node(serv->sv_threadfn, rqstp,
+ node, "%s", serv->sv_name);
+ if (IS_ERR(task)) {
+ svc_exit_thread(rqstp);
+--
+2.43.0
+
--- /dev/null
+From 8dfd94e04e2cd7144153134ecc061ffdc7d3ed27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: NFSD: narrow nfsd_mutex protection in nfsd thread
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 9d3792aefdcda71d20c2b1ecc589c17ae71eb523 ]
+
+There is nothing happening in the start of nfsd() that requires
+protection by the mutex, so don't take it until shutting down the thread
+- which does still require protection - but only for nfsd_put().
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+[ cel: address merge conflict with fd2468fa1301 ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 16884a90e1ab0..eb8cc4d914fee 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -932,9 +932,6 @@ nfsd(void *vrqstp)
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int err;
+
+- /* Lock module and set up kernel thread */
+- mutex_lock(&nfsd_mutex);
+-
+ /* At this point, the thread shares current->fs
+ * with the init process. We need to create files with the
+ * umask as defined by the client instead of init's umask. */
+@@ -954,7 +951,6 @@ nfsd(void *vrqstp)
+ allow_signal(SIGINT);
+ allow_signal(SIGQUIT);
+
+- mutex_unlock(&nfsd_mutex);
+ atomic_inc(&nfsdstats.th_cnt);
+
+ set_freezable();
+@@ -983,7 +979,6 @@ nfsd(void *vrqstp)
+ flush_signals(current);
+
+ atomic_dec(&nfsdstats.th_cnt);
+- mutex_lock(&nfsd_mutex);
+
+ out:
+ /* Take an extra ref so that the svc_put in svc_exit_thread()
+@@ -995,10 +990,11 @@ nfsd(void *vrqstp)
+ svc_exit_thread(rqstp);
+
+ /* Now if needed we call svc_destroy in appropriate context */
++ mutex_lock(&nfsd_mutex);
+ nfsd_put(net);
++ mutex_unlock(&nfsd_mutex);
+
+ /* Release module */
+- mutex_unlock(&nfsd_mutex);
+ module_put_and_kthread_exit(0);
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 9957ce298c887effcb955e834d4424b05a060b71 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:30 -0400
+Subject: NFSD: Never call nfsd_file_gc() in foreground paths
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 6df19411367a5fb4ef61854cbd1af269c077f917 ]
+
+The checks in nfsd_file_acquire() and nfsd_file_put() that directly
+invoke filecache garbage collection are intended to keep cache
+occupancy between a low- and high-watermark. The reason to limit the
+capacity of the filecache is to keep filecache lookups reasonably
+fast.
+
+However, invoking garbage collection at those points has some
+undesirable negative impacts. Files that are held open by NFSv4
+clients often push the occupancy of the filecache over these
+watermarks. At that point:
+
+- Every call to nfsd_file_acquire() and nfsd_file_put() results in
+ an LRU walk. This has the same effect on lookup latency as long
+ chains in the hash table.
+- Garbage collection will then run on every nfsd thread, causing a
+ lot of unnecessary lock contention.
+- Limiting cache capacity pushes out files used only by NFSv3
+ clients, which are the type of files the filecache is supposed to
+ help.
+
+To address those negative impacts, remove the direct calls to the
+garbage collector. Subsequent patches will address maintaining
+lookup efficiency as cache capacity increases.
+
+Suggested-by: Wang Yugui <wangyugui@e16-tech.com>
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 10 +---------
+ 1 file changed, 1 insertion(+), 9 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 849c010c6ef61..7a02ff11b9ec1 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -28,8 +28,6 @@
+ #define NFSD_LAUNDRETTE_DELAY (2 * HZ)
+
+ #define NFSD_FILE_SHUTDOWN (1)
+-#define NFSD_FILE_LRU_THRESHOLD (4096UL)
+-#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
+
+ /* We only care about NFSD_MAY_READ/WRITE for this cache */
+ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
+@@ -65,8 +63,6 @@ static struct fsnotify_group *nfsd_file_fsnotify_group;
+ static atomic_long_t nfsd_filecache_count;
+ static struct delayed_work nfsd_filecache_laundrette;
+
+-static void nfsd_file_gc(void);
+-
+ static void
+ nfsd_file_schedule_laundrette(void)
+ {
+@@ -343,9 +339,6 @@ nfsd_file_put(struct nfsd_file *nf)
+ nfsd_file_schedule_laundrette();
+ } else
+ nfsd_file_put_noref(nf);
+-
+- if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+- nfsd_file_gc();
+ }
+
+ struct nfsd_file *
+@@ -1054,8 +1047,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+ nfsd_file_hashtbl[hashval].nfb_count);
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+- if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
+- nfsd_file_gc();
++ atomic_long_inc(&nfsd_filecache_count);
+
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+ if (nf->nf_mark) {
+--
+2.43.0
+
--- /dev/null
+From d90620a488bf2daa9d58ae53f77911e6f590ee48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:03 -0400
+Subject: NFSD: nfsd_file_hash_remove can compute hashval
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit cb7ec76e73ff6640241c8f1f2f35c81d4005a2d6 ]
+
+Remove an unnecessary use of nf_hashval.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 3925df9124c39..dd59deec8b011 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -287,6 +287,18 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
+ atomic_long_dec(&nfsd_filecache_count);
+ }
+
++static void
++nfsd_file_hash_remove(struct nfsd_file *nf)
++{
++ struct inode *inode = nf->nf_inode;
++ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
++ NFSD_FILE_HASH_BITS);
++
++ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
++ nfsd_file_do_unhash(nf);
++ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
++}
++
+ static bool
+ nfsd_file_unhash(struct nfsd_file *nf)
+ {
+@@ -506,11 +518,8 @@ static void nfsd_file_gc_dispose_list(struct list_head *dispose)
+ {
+ struct nfsd_file *nf;
+
+- list_for_each_entry(nf, dispose, nf_lru) {
+- spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+- nfsd_file_do_unhash(nf);
+- spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+- }
++ list_for_each_entry(nf, dispose, nf_lru)
++ nfsd_file_hash_remove(nf);
+ nfsd_file_dispose_list_delayed(dispose);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 2746dca85b8d81bd0f0cb8531029dbb3b889e6b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:39:00 -0500
+Subject: nfsd: NFSD_FILE_KEY_INODE only needs to find GC'ed entries
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 6c31e4c98853a4ba47355ea151b36a77c42b7734 ]
+
+Since v4 files are expected to be long-lived, there's little value in
+closing them out of the cache when there is conflicting access.
+
+Change the comparator to also match the gc value in the key. Change both
+of the current users of that key to set the gc value in the key to
+"true".
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 677a8d935ccc2..4ddc82b84f7c4 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -174,6 +174,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+
+ switch (key->type) {
+ case NFSD_FILE_KEY_INODE:
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
++ return 1;
+ if (nf->nf_inode != key->inode)
+ return 1;
+ break;
+@@ -694,6 +696,7 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
++ .gc = true,
+ };
+ struct nfsd_file *nf;
+
+@@ -1048,6 +1051,7 @@ nfsd_file_is_cached(struct inode *inode)
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
++ .gc = true,
+ };
+ bool ret = false;
+
+--
+2.43.0
+
--- /dev/null
+From 62d98d73ffd8d62052ea82eb1a401efcc0f81da2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 May 2022 13:02:21 -0400
+Subject: NFSD: nfsd_file_put() can sleep
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 08af54b3e5729bc1d56ad3190af811301bdc37a1 ]
+
+Now that there are no more callers of nfsd_file_put() that might
+hold a spin lock, ensure the lockdep infrastructure can catch
+newly introduced calls to nfsd_file_put() made while a spinlock
+is held.
+
+Link: https://lore.kernel.org/linux-nfs/ece7fd1d-5fb3-5155-54ba-347cfc19bd9a@oracle.com/T/#mf1855552570cf9a9c80d1e49d91438cd9085aada
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 0863bf5050935..27952e2f3aa14 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -296,6 +296,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
+ void
+ nfsd_file_put(struct nfsd_file *nf)
+ {
++ might_sleep();
++
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
+ nfsd_file_flush(nf);
+--
+2.43.0
+
--- /dev/null
+From 0ecd2d589730dd7433691535686d837262ebc1b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:50 -0400
+Subject: NFSD: nfsd_file_unhash can compute hashval from nf->nf_inode
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 8755326399f471ec3b31e2ab8c5074c0d28a0fb5 ]
+
+Remove an unnecessary usage of nf_hashval.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 6a01de8677959..d7c74b51eabf3 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -272,13 +272,17 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf)
+ static void
+ nfsd_file_do_unhash(struct nfsd_file *nf)
+ {
+- lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
++ struct inode *inode = nf->nf_inode;
++ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
++ NFSD_FILE_HASH_BITS);
++
++ lockdep_assert_held(&nfsd_file_hashtbl[hashval].nfb_lock);
+
+ trace_nfsd_file_unhash(nf);
+
+ if (nfsd_file_check_write_error(nf))
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+- --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
++ --nfsd_file_hashtbl[hashval].nfb_count;
+ hlist_del_rcu(&nf->nf_node);
+ atomic_long_dec(&nfsd_filecache_count);
+ }
+--
+2.43.0
+
--- /dev/null
+From 52c15708669df28be6990109126b45d3d3cb7111 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:09 -0400
+Subject: NFSD: nfserrno(-ENOMEM) is nfserr_jukebox
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit bb4d842722b84a2731257054b6405f2d866fc5f3 ]
+
+Suggested-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index a5ab6ea475423..fb891249694c3 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -1810,7 +1810,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
+ for (i = 0; i < test_stateid->ts_num_ids; i++) {
+ stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
+ if (!stateid)
+- return nfserrno(-ENOMEM); /* XXX: not jukebox? */
++ return nfserr_jukebox;
+ INIT_LIST_HEAD(&stateid->ts_id_list);
+ list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
+ status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid);
+@@ -1933,7 +1933,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+
+ ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL);
+ if (ns_dummy == NULL)
+- return nfserrno(-ENOMEM); /* XXX: jukebox? */
++ return nfserr_jukebox;
+ for (i = 0; i < count - 1; i++) {
+ status = nfsd4_decode_nl4_server(argp, ns_dummy);
+ if (status) {
+--
+2.43.0
+
--- /dev/null
+From 742382db20f28b4598c2e2143ee77610e9f052fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:27:02 -0400
+Subject: NFSD: NFSv4 CLOSE should release an nfsd_file immediately
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5e138c4a750dc140d881dab4a8804b094bbc08d2 ]
+
+The last close of a file should enable other accessors to open and
+use that file immediately. Leaving the file open in the filecache
+prevents other users from accessing that file until the filecache
+garbage-collects the file -- sometimes that takes several seconds.
+
+Reported-by: Wang Yugui <wangyugui@e16-tech.com>
+Link: https://bugzilla.linux-nfs.org/show_bug.cgi?387
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 18 ++++++++++++++++++
+ fs/nfsd/filecache.h | 1 +
+ fs/nfsd/nfs4state.c | 4 ++--
+ 3 files changed, 21 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 26cfae138b906..7ad27655db699 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -451,6 +451,24 @@ nfsd_file_put(struct nfsd_file *nf)
+ nfsd_file_put_noref(nf);
+ }
+
++/**
++ * nfsd_file_close - Close an nfsd_file
++ * @nf: nfsd_file to close
++ *
++ * If this is the final reference for @nf, free it immediately.
++ * This reflects an on-the-wire CLOSE or DELEGRETURN into the
++ * VFS and exported filesystem.
++ */
++void nfsd_file_close(struct nfsd_file *nf)
++{
++ nfsd_file_put(nf);
++ if (refcount_dec_if_one(&nf->nf_ref)) {
++ nfsd_file_unhash(nf);
++ nfsd_file_lru_remove(nf);
++ nfsd_file_free(nf);
++ }
++}
++
+ struct nfsd_file *
+ nfsd_file_get(struct nfsd_file *nf)
+ {
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index ee9ed99d8b8fa..28145f1628923 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -52,6 +52,7 @@ void nfsd_file_cache_shutdown(void);
+ int nfsd_file_cache_start_net(struct net *net);
+ void nfsd_file_cache_shutdown_net(struct net *net);
+ void nfsd_file_put(struct nfsd_file *nf);
++void nfsd_file_close(struct nfsd_file *nf);
+ struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+ void nfsd_file_close_inode_sync(struct inode *inode);
+ bool nfsd_file_is_cached(struct inode *inode);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index d349abf0821d6..923eec2716d75 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -831,9 +831,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
+ swap(f2, fp->fi_fds[O_RDWR]);
+ spin_unlock(&fp->fi_lock);
+ if (f1)
+- nfsd_file_put(f1);
++ nfsd_file_close(f1);
+ if (f2)
+- nfsd_file_put(f2);
++ nfsd_file_close(f2);
+ }
+ }
+
+--
+2.43.0
+
--- /dev/null
+From b7ec1986b4cf9f3273746fd47e1f9b36fadd9c98 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:37 -0400
+Subject: NFSD: No longer record nf_hashval in the trace log
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 54f7df7094b329ca35d9f9808692bb16c48b13e9 ]
+
+I'm about to replace nfsd_file_hashtbl with an rhashtable. The
+individual hash values will no longer be visible or relevant, so
+remove them from the tracepoints.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 15 ++++++++-------
+ fs/nfsd/trace.h | 45 +++++++++++++++++++++------------------------
+ 2 files changed, 29 insertions(+), 31 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 7a02ff11b9ec1..2d013a88e3565 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -588,7 +588,7 @@ nfsd_file_close_inode_sync(struct inode *inode)
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+- trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
++ trace_nfsd_file_close_inode_sync(inode, !list_empty(&dispose));
+ nfsd_file_dispose_list_sync(&dispose);
+ }
+
+@@ -608,7 +608,7 @@ nfsd_file_close_inode(struct inode *inode)
+ LIST_HEAD(dispose);
+
+ __nfsd_file_close_inode(inode, hashval, &dispose);
+- trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
++ trace_nfsd_file_close_inode(inode, !list_empty(&dispose));
+ nfsd_file_dispose_list_delayed(&dispose);
+ }
+
+@@ -962,7 +962,7 @@ nfsd_file_is_cached(struct inode *inode)
+ }
+ }
+ rcu_read_unlock();
+- trace_nfsd_file_is_cached(inode, hashval, (int)ret);
++ trace_nfsd_file_is_cached(inode, (int)ret);
+ return ret;
+ }
+
+@@ -994,9 +994,8 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ new = nfsd_file_alloc(inode, may_flags, hashval, net);
+ if (!new) {
+- trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
+- NULL, nfserr_jukebox);
+- return nfserr_jukebox;
++ status = nfserr_jukebox;
++ goto out_status;
+ }
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+@@ -1034,8 +1033,10 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ nf = NULL;
+ }
+
+- trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
++out_status:
++ trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
+ return status;
++
+ open_file:
+ nf = new;
+ /* Take reference for the hashtable */
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index cc55a2b32e8cd..655b56c87600b 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -747,7 +747,6 @@ DECLARE_EVENT_CLASS(nfsd_file_class,
+ TP_PROTO(struct nfsd_file *nf),
+ TP_ARGS(nf),
+ TP_STRUCT__entry(
+- __field(unsigned int, nf_hashval)
+ __field(void *, nf_inode)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+@@ -755,15 +754,13 @@ DECLARE_EVENT_CLASS(nfsd_file_class,
+ __field(struct file *, nf_file)
+ ),
+ TP_fast_assign(
+- __entry->nf_hashval = nf->nf_hashval;
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+- TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p",
+- __entry->nf_hashval,
++ TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p",
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+@@ -783,15 +780,18 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked);
+
+ TRACE_EVENT(nfsd_file_acquire,
+- TP_PROTO(struct svc_rqst *rqstp, unsigned int hash,
+- struct inode *inode, unsigned int may_flags,
+- struct nfsd_file *nf, __be32 status),
++ TP_PROTO(
++ struct svc_rqst *rqstp,
++ struct inode *inode,
++ unsigned int may_flags,
++ struct nfsd_file *nf,
++ __be32 status
++ ),
+
+- TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
++ TP_ARGS(rqstp, inode, may_flags, nf, status),
+
+ TP_STRUCT__entry(
+ __field(u32, xid)
+- __field(unsigned int, hash)
+ __field(void *, inode)
+ __field(unsigned long, may_flags)
+ __field(int, nf_ref)
+@@ -803,7 +803,6 @@ TRACE_EVENT(nfsd_file_acquire,
+
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+- __entry->hash = hash;
+ __entry->inode = inode;
+ __entry->may_flags = may_flags;
+ __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
+@@ -813,8 +812,8 @@ TRACE_EVENT(nfsd_file_acquire,
+ __entry->status = be32_to_cpu(status);
+ ),
+
+- TP_printk("xid=0x%x hash=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u",
+- __entry->xid, __entry->hash, __entry->inode,
++ TP_printk("xid=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u",
++ __entry->xid, __entry->inode,
+ show_nfsd_may_flags(__entry->may_flags),
+ __entry->nf_ref, show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may),
+@@ -825,7 +824,6 @@ TRACE_EVENT(nfsd_file_open,
+ TP_PROTO(struct nfsd_file *nf, __be32 status),
+ TP_ARGS(nf, status),
+ TP_STRUCT__entry(
+- __field(unsigned int, nf_hashval)
+ __field(void *, nf_inode) /* cannot be dereferenced */
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+@@ -833,15 +831,13 @@ TRACE_EVENT(nfsd_file_open,
+ __field(void *, nf_file) /* cannot be dereferenced */
+ ),
+ TP_fast_assign(
+- __entry->nf_hashval = nf->nf_hashval;
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+- TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p",
+- __entry->nf_hashval,
++ TP_printk("inode=%p ref=%d flags=%s may=%s file=%p",
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+@@ -850,26 +846,27 @@ TRACE_EVENT(nfsd_file_open,
+ )
+
+ DECLARE_EVENT_CLASS(nfsd_file_search_class,
+- TP_PROTO(struct inode *inode, unsigned int hash, int found),
+- TP_ARGS(inode, hash, found),
++ TP_PROTO(
++ struct inode *inode,
++ int found
++ ),
++ TP_ARGS(inode, found),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+- __field(unsigned int, hash)
+ __field(int, found)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+- __entry->hash = hash;
+ __entry->found = found;
+ ),
+- TP_printk("hash=0x%x inode=%p found=%d", __entry->hash,
+- __entry->inode, __entry->found)
++ TP_printk("inode=%p found=%d",
++ __entry->inode, __entry->found)
+ );
+
+ #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
+ DEFINE_EVENT(nfsd_file_search_class, name, \
+- TP_PROTO(struct inode *inode, unsigned int hash, int found), \
+- TP_ARGS(inode, hash, found))
++ TP_PROTO(struct inode *inode, int found), \
++ TP_ARGS(inode, found))
+
+ DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+ DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+--
+2.43.0
+
--- /dev/null
+From 325495c4b028a5326191f2ee000299fdec104e2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: only call fh_unlock() once in nfsd_link()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit e18bcb33bc5b69bccc2b532075aa00bb49cc01c5 ]
+
+On non-error paths, nfsd_link() calls fh_unlock() twice. This is safe
+because fh_unlock() records that the unlock has been done and doesn't
+repeat it.
+However it makes the code a little confusing and interferes with changes
+that are planned for directory locking.
+
+So rearrange the code to ensure fh_unlock() is called exactly once if
+fh_lock() was called.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 4b1304fe718fd..ac716ced1fd5f 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1542,9 +1542,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+ dirp = d_inode(ddir);
+
+ dnew = lookup_one_len(name, ddir, len);
+- host_err = PTR_ERR(dnew);
+- if (IS_ERR(dnew))
+- goto out_nfserr;
++ if (IS_ERR(dnew)) {
++ err = nfserrno(PTR_ERR(dnew));
++ goto out_unlock;
++ }
+
+ dold = tfhp->fh_dentry;
+
+@@ -1563,17 +1564,17 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+ else
+ err = nfserrno(host_err);
+ }
+-out_dput:
+ dput(dnew);
+-out_unlock:
+- fh_unlock(ffhp);
++out_drop_write:
+ fh_drop_write(tfhp);
+ out:
+ return err;
+
+-out_nfserr:
+- err = nfserrno(host_err);
+- goto out_unlock;
++out_dput:
++ dput(dnew);
++out_unlock:
++ fh_unlock(ffhp);
++ goto out_drop_write;
+ }
+
+ static void
+--
+2.43.0
+
--- /dev/null
+From 37536ffe8a76bc6fbbdb327b846da38afc81b1e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Sep 2022 12:38:44 -0400
+Subject: nfsd: only fill out return pointer on success in nfsd4_lookup_stateid
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 4d01416ab41540bb13ec4a39ac4e6c4aa5934bc9 ]
+
+In the case of a revoked delegation, we still fill out the pointer even
+when returning an error, which is bad form. Only overwrite the pointer
+on success.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index f427f95ab934e..1e9245303c0f2 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -6279,6 +6279,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
+ struct nfs4_stid **s, struct nfsd_net *nn)
+ {
+ __be32 status;
++ struct nfs4_stid *stid;
+ bool return_revoked = false;
+
+ /*
+@@ -6301,15 +6302,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
+ }
+ if (status)
+ return status;
+- *s = find_stateid_by_type(cstate->clp, stateid, typemask);
+- if (!*s)
++ stid = find_stateid_by_type(cstate->clp, stateid, typemask);
++ if (!stid)
+ return nfserr_bad_stateid;
+- if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+- nfs4_put_stid(*s);
++ if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
++ nfs4_put_stid(stid);
+ if (cstate->minorversion)
+ return nfserr_deleg_revoked;
+ return nfserr_bad_stateid;
+ }
++ *s = stid;
+ return nfs_ok;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From c0e6697e54c50f20cbdbc6ea0c9023ec6821ee1b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Sep 2021 15:25:21 -0400
+Subject: NFSD: Optimize DRC bucket pruning
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 8847ecc9274a14114385d1cb4030326baa0766eb ]
+
+DRC bucket pruning is done by nfsd_cache_lookup(), which is part of
+every NFSv2 and NFSv3 dispatch (ie, it's done while the client is
+waiting).
+
+I added a trace_printk() in prune_bucket() to see just how long
+it takes to prune. Here are two ends of the spectrum:
+
+ prune_bucket: Scanned 1 and freed 0 in 90 ns, 62 entries remaining
+ prune_bucket: Scanned 2 and freed 1 in 716 ns, 63 entries remaining
+...
+ prune_bucket: Scanned 75 and freed 74 in 34149 ns, 1 entries remaining
+
+Pruning latency is noticeable on fast transports with fast storage.
+By noticeable, I mean that the latency measured here in the worst
+case is the same order of magnitude as the round trip time for
+cached server operations.
+
+We could do something like moving expired entries to an expired list
+and then free them later instead of freeing them right in
+prune_bucket(). But simply limiting the number of entries that can
+be pruned by a lookup is simple and retains more entries in the
+cache, making the DRC somewhat more effective.
+
+Comparison with a 70/30 fio 8KB 12 thread direct I/O test:
+
+Before:
+
+ write: IOPS=61.6k, BW=481MiB/s (505MB/s)(14.1GiB/30001msec); 0 zone resets
+
+WRITE:
+ 1848726 ops (30%)
+ avg bytes sent per op: 8340 avg bytes received per op: 136
+ backlog wait: 0.635158 RTT: 0.128525 total execute time: 0.827242 (milliseconds)
+
+After:
+
+ write: IOPS=63.0k, BW=492MiB/s (516MB/s)(14.4GiB/30001msec); 0 zone resets
+
+WRITE:
+ 1891144 ops (30%)
+ avg bytes sent per op: 8340 avg bytes received per op: 136
+ backlog wait: 0.616114 RTT: 0.126842 total execute time: 0.805348 (milliseconds)
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfscache.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index 830bb8493c7fd..6b9ef15c9c03b 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -241,8 +241,8 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+ list_move_tail(&rp->c_lru, &b->lru_head);
+ }
+
+-static long
+-prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
++static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
++ unsigned int max)
+ {
+ struct svc_cacherep *rp, *tmp;
+ long freed = 0;
+@@ -258,11 +258,17 @@ prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
+ time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
+ break;
+ nfsd_reply_cache_free_locked(b, rp, nn);
+- freed++;
++ if (max && freed++ > max)
++ break;
+ }
+ return freed;
+ }
+
++static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
++{
++ return prune_bucket(b, nn, 3);
++}
++
+ /*
+ * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
+ * Also prune the oldest ones when the total exceeds the max number of entries.
+@@ -279,7 +285,7 @@ prune_cache_entries(struct nfsd_net *nn)
+ if (list_empty(&b->lru_head))
+ continue;
+ spin_lock(&b->cache_lock);
+- freed += prune_bucket(b, nn);
++ freed += prune_bucket(b, nn, 0);
+ spin_unlock(&b->cache_lock);
+ }
+ return freed;
+@@ -453,8 +459,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
+ atomic_inc(&nn->num_drc_entries);
+ nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
+
+- /* go ahead and prune the cache */
+- prune_bucket(b, nn);
++ nfsd_prune_bucket(b, nn);
+
+ out_unlock:
+ spin_unlock(&b->cache_lock);
+--
+2.43.0
+
--- /dev/null
+From 5f525d354b3f9e68e5ed1f963209d27c2ffc9c69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:08:45 -0400
+Subject: NFSD: Optimize nfsd4_encode_fattr()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit ab04de60ae1cc64ae16b77feae795311b97720c7 ]
+
+write_bytes_to_xdr_buf() is a generic way to place a variable-length
+data item in an already-reserved spot in the encoding buffer.
+
+However, it is costly. In nfsd4_encode_fattr(), it is unnecessary
+because the data item is fixed in size and the buffer destination
+address is always word-aligned.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 9eba9afe13794..d5a4aa0da32be 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2825,10 +2825,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ struct kstat stat;
+ struct svc_fh *tempfh = NULL;
+ struct kstatfs statfs;
+- __be32 *p;
++ __be32 *p, *attrlen_p;
+ int starting_len = xdr->buf->len;
+ int attrlen_offset;
+- __be32 attrlen;
+ u32 dummy;
+ u64 dummy64;
+ u32 rdattr_err = 0;
+@@ -2916,10 +2915,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ goto out;
+
+ attrlen_offset = xdr->buf->len;
+- p = xdr_reserve_space(xdr, 4);
+- if (!p)
++ attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
++ if (!attrlen_p)
+ goto out_resource;
+- p++; /* to be backfilled later */
+
+ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ u32 supp[3];
+@@ -3341,8 +3339,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ *p++ = cpu_to_be32(err == 0);
+ }
+
+- attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
+- write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
++ *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+ status = nfs_ok;
+
+ out:
+--
+2.43.0
+
--- /dev/null
+From cdf8c4c3c8977de301605432dfe74686ad47b864 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:08:38 -0400
+Subject: NFSD: Optimize nfsd4_encode_operation()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 095a764b7afb06c9499b798c04eaa3cbf70ebe2d ]
+
+write_bytes_to_xdr_buf() is a generic way to place a variable-length
+data item in an already-reserved spot in the encoding buffer.
+However, it is costly, and here, it is unnecessary because the
+data item is fixed in size, the buffer destination address is
+always word-aligned, and the destination location is already in
+@p.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 07f891d7fa0ae..9eba9afe13794 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5378,8 +5378,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+ so->so_replay.rp_buf, len);
+ }
+ status:
+- /* Note that op->status is already in network byte order: */
+- write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4);
++ *p = op->status;
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From c9e8c69c71105f61994d917397509794ce1652ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:09:04 -0400
+Subject: NFSD: Optimize nfsd4_encode_readv()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 28d5bc468efe74b790e052f758ce083a5015c665 ]
+
+write_bytes_to_xdr_buf() is pretty expensive to use for inserting
+an XDR data item that is always 1 XDR_UNIT at an address that is
+always XDR word-aligned.
+
+Since both the readv and splice read paths encode EOF and maxcount
+values, move both to a common code path.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 18 ++++++------------
+ 1 file changed, 6 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index b31103221fee9..b7a3c770d436b 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3888,7 +3888,6 @@ static __be32 nfsd4_encode_splice_read(
+ struct xdr_buf *buf = xdr->buf;
+ int status, space_left;
+ __be32 nfserr;
+- __be32 *p = xdr->p - 2;
+
+ /* Make sure there will be room for padding if needed */
+ if (xdr->end - xdr->p < 1)
+@@ -3907,9 +3906,6 @@ static __be32 nfsd4_encode_splice_read(
+ goto out_err;
+ }
+
+- *(p++) = htonl(read->rd_eof);
+- *(p++) = htonl(maxcount);
+-
+ buf->page_len = maxcount;
+ buf->len += maxcount;
+ xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
+@@ -3970,11 +3966,6 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ return nfserr_io;
+ xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount));
+
+- tmp = htonl(read->rd_eof);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
+- tmp = htonl(maxcount);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+-
+ tmp = xdr_zero;
+ pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
+@@ -4016,11 +4007,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ else
+ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+-
+- if (nfserr)
++ if (nfserr) {
+ xdr_truncate_encode(xdr, starting_len);
++ return nfserr;
++ }
+
+- return nfserr;
++ p = xdr_encode_bool(p, read->rd_eof);
++ *p = cpu_to_be32(read->rd_length);
++ return nfs_ok;
+ }
+
+ static __be32
+--
+2.43.0
+
--- /dev/null
+From 9619dd24d1adda57c1ba401d71f4b112c980f9c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:23:36 -0400
+Subject: NFSD: Pack struct nfsd4_compoundres
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 9f553e61bd36c1048543ac2f6945103dd2f742be ]
+
+Remove a couple of 4-byte holes on platforms with 64-bit pointers.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/xdr4.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index b2bc85421b507..0eb00105d845b 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -732,8 +732,8 @@ struct nfsd4_compoundres {
+ struct svc_rqst * rqstp;
+
+ __be32 *statusp;
+- u32 taglen;
+ char * tag;
++ u32 taglen;
+ u32 opcnt;
+
+ struct nfsd4_compound_state cstate;
+--
+2.43.0
+
--- /dev/null
+From 2169d32ffde12be8678ff900cd6586dbc608eb61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:46:38 -0400
+Subject: NFSD: Pass the target nfsd_file to nfsd_commit()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c252849082ff525af18b4f253b3c9ece94e951ed ]
+
+In a moment I'm going to introduce separate nfsd_file types, one of
+which is garbage-collected; the other, not. The garbage-collected
+variety is to be used by NFSv2 and v3, and the non-garbage-collected
+variety is to be used by NFSv4.
+
+nfsd_commit() is invoked by both NFSv3 and NFSv4 consumers. We want
+nfsd_commit() to find and use the correct variety of cached
+nfsd_file object for the NFS version that is in use.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 10 +++++++++-
+ fs/nfsd/nfs4proc.c | 11 ++++++++++-
+ fs/nfsd/vfs.c | 15 ++++-----------
+ fs/nfsd/vfs.h | 3 ++-
+ 4 files changed, 25 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 923d9a80df92c..ff29205463332 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -13,6 +13,7 @@
+ #include "cache.h"
+ #include "xdr3.h"
+ #include "vfs.h"
++#include "filecache.h"
+
+ #define NFSDDBG_FACILITY NFSDDBG_PROC
+
+@@ -763,6 +764,7 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_commitargs *argp = rqstp->rq_argp;
+ struct nfsd3_commitres *resp = rqstp->rq_resp;
++ struct nfsd_file *nf;
+
+ dprintk("nfsd: COMMIT(3) %s %u@%Lu\n",
+ SVCFH_fmt(&argp->fh),
+@@ -770,8 +772,14 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
+ (unsigned long long) argp->offset);
+
+ fh_copy(&resp->fh, &argp->fh);
+- resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset,
++ resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE |
++ NFSD_MAY_NOT_BREAK_LEASE, &nf);
++ if (resp->status)
++ goto out;
++ resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset,
+ argp->count, resp->verf);
++ nfsd_file_put(nf);
++out:
+ return rpc_success;
+ }
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 2e8f8b9fa3aeb..6ed0baa119433 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -731,10 +731,19 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+ {
+ struct nfsd4_commit *commit = &u->commit;
++ struct nfsd_file *nf;
++ __be32 status;
+
+- return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
++ status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE |
++ NFSD_MAY_NOT_BREAK_LEASE, &nf);
++ if (status != nfs_ok)
++ return status;
++
++ status = nfsd_commit(rqstp, &cstate->current_fh, nf, commit->co_offset,
+ commit->co_count,
+ (__be32 *)commit->co_verf.data);
++ nfsd_file_put(nf);
++ return status;
+ }
+
+ static __be32
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 70a967789a611..3c43a51e17865 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1197,6 +1197,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
++ * @nf: target file
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
+@@ -1213,19 +1214,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ * An nfsstat value in network byte order.
+ */
+ __be32
+-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+- u32 count, __be32 *verf)
++nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
++ u64 offset, u32 count, __be32 *verf)
+ {
++ __be32 err = nfs_ok;
+ u64 maxbytes;
+ loff_t start, end;
+ struct nfsd_net *nn;
+- struct nfsd_file *nf;
+- __be32 err;
+-
+- err = nfsd_file_acquire(rqstp, fhp,
+- NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf);
+- if (err)
+- goto out;
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+@@ -1266,8 +1261,6 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ } else
+ nfsd_copy_write_verifier(verf, nn);
+
+- nfsd_file_put(nf);
+-out:
+ return err;
+ }
+
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 8ddd687f83599..dbdfef7ae85bb 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -89,7 +89,8 @@ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+ __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd_attrs *iap);
+ __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+- u64 offset, u32 count, __be32 *verf);
++ struct nfsd_file *nf, u64 offset, u32 count,
++ __be32 *verf);
+ #ifdef CONFIG_NFSD_V4
+ __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *name, void **bufp, int *lenp);
+--
+2.43.0
+
--- /dev/null
+From c901bdaf105bc4e664ab903f32a9ef35e7418038 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 1 Sep 2022 07:27:19 +0200
+Subject: nfsd: Propagate some error code returned by memdup_user()
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+[ Upstream commit 30a30fcc3fc1ad4c5d017c9fcb75dc8f59e7bdad ]
+
+Propagate the error code returned by memdup_user() instead of a hard coded
+-EFAULT.
+
+Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4recover.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
+index 4edfc95806412..5d680045fa2c7 100644
+--- a/fs/nfsd/nfs4recover.c
++++ b/fs/nfsd/nfs4recover.c
+@@ -808,7 +808,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ return -EFAULT;
+ name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+ if (IS_ERR(name.data))
+- return -EFAULT;
++ return PTR_ERR(name.data);
+ name.len = namelen;
+ get_user(princhashlen, &ci->cc_princhash.cp_len);
+ if (princhashlen > 0) {
+@@ -817,7 +817,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ princhashlen);
+ if (IS_ERR(princhash.data)) {
+ kfree(name.data);
+- return -EFAULT;
++ return PTR_ERR(princhash.data);
+ }
+ princhash.len = princhashlen;
+ } else
+@@ -830,7 +830,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ return -EFAULT;
+ name.data = memdup_user(&cnm->cn_id, namelen);
+ if (IS_ERR(name.data))
+- return -EFAULT;
++ return PTR_ERR(name.data);
+ name.len = namelen;
+ }
+ if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
+--
+2.43.0
+
--- /dev/null
+From ebc74b5d9857500a63ed51b432ab6a179799291e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Mar 2023 10:43:47 -0500
+Subject: NFSD: Protect against filesystem freezing
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit fd9a2e1d513823e840960cb3bc26d8b7749d4ac2 ]
+
+Flole observes this WARNING on occasion:
+
+[1210423.486503] WARNING: CPU: 8 PID: 1524732 at fs/ext4/ext4_jbd2.c:75 ext4_journal_check_start+0x68/0xb0
+
+Reported-by: <flole@flole.de>
+Suggested-by: Jan Kara <jack@suse.cz>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217123
+Fixes: 73da852e3831 ("nfsd: use vfs_iter_read/write")
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index db7f0119433cf..690191b3d997c 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1101,7 +1101,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ since = READ_ONCE(file->f_wb_err);
+ if (verf)
+ nfsd_copy_write_verifier(verf, nn);
++ file_start_write(file);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
++ file_end_write(file);
+ if (host_err < 0) {
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+--
+2.43.0
+
--- /dev/null
+From d0f5a5edc9bdf2d5805b5fc8a46adc7310a49c20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Nov 2022 11:23:11 -0500
+Subject: nfsd: put the export reference in nfsd4_verify_deleg_dentry
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 50256e4793a5e5ab77703c82a47344ad2e774a59 ]
+
+nfsd_lookup_dentry returns an export reference in addition to the dentry
+ref. Ensure that we put it too.
+
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2138866
+Fixes: 876c553cb410 ("NFSD: verify the opened dentry after setting a delegation")
+Reported-by: Yongcheng Yang <yoyang@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index d19629de2af5d..7cfc92aa2a236 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5397,6 +5397,7 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
+ if (err)
+ return -EAGAIN;
+
++ exp_put(exp);
+ dput(child);
+ if (child != file_dentry(fp->fi_deleg_file->nf_file))
+ return -EAGAIN;
+--
+2.43.0
+
--- /dev/null
+From d0112fbb4c75220b6045e0688fd0d4c020663037 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:45 -0400
+Subject: NFSD: Record number of flush calls
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit df2aff524faceaf743b7c5ab0f4fb86cb511f782 ]
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index e5bd9f06492c8..b9941d4ef20d6 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -45,6 +45,7 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
++static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
+
+ struct nfsd_fcache_disposal {
+@@ -242,7 +243,12 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
+ static void
+ nfsd_file_flush(struct nfsd_file *nf)
+ {
+- if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
++ struct file *file = nf->nf_file;
++
++ if (!file || !(file->f_mode & FMODE_WRITE))
++ return;
++ this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages);
++ if (vfs_fsync(file, 1) != 0)
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ }
+
+@@ -1069,7 +1075,8 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+- unsigned long hits = 0, acquisitions = 0, releases = 0, evictions = 0;
++ unsigned long releases = 0, pages_flushed = 0, evictions = 0;
++ unsigned long hits = 0, acquisitions = 0;
+ unsigned int i, count = 0, longest = 0;
+ unsigned long lru = 0, total_age = 0;
+
+@@ -1094,6 +1101,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ releases += per_cpu(nfsd_file_releases, i);
+ total_age += per_cpu(nfsd_file_total_age, i);
+ evictions += per_cpu(nfsd_file_evictions, i);
++ pages_flushed += per_cpu(nfsd_file_pages_flushed, i);
+ }
+
+ seq_printf(m, "total entries: %u\n", count);
+@@ -1107,6 +1115,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
+ else
+ seq_printf(m, "mean age (ms): -\n");
++ seq_printf(m, "pages flushed: %lu\n", pages_flushed);
+ return 0;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From cf9d17de164859e4e41188d7b7cd3db2f5f37c16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:22:44 -0400
+Subject: NFSD: Reduce amount of struct nfsd4_compoundargs that needs clearing
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3fdc546462348b8a497c72bc894e0cde9f10fc40 ]
+
+Have SunRPC clear everything except for the iops array. Then have
+each NFSv4 XDR decoder clear it's own argument before decoding.
+
+Now individual operations may have a large argument struct while not
+penalizing the vast majority of operations with a small struct.
+
+And, clearing the argument structure occurs as the argument fields
+are initialized, enabling the CPU to do write combining on that
+memory. In some cases, clearing is not even necessary because all
+of the fields in the argument structure are initialized by the
+decoder.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/nfs4xdr.c | 61 +++++++++++++++++++++++++++++++++++++---------
+ 2 files changed, 51 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 8aae0fb4846bc..7fdede420e65b 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -3590,7 +3590,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
+ .pc_decode = nfs4svc_decode_compoundargs,
+ .pc_encode = nfs4svc_encode_compoundres,
+ .pc_argsize = sizeof(struct nfsd4_compoundargs),
+- .pc_argzero = sizeof(struct nfsd4_compoundargs),
++ .pc_argzero = offsetof(struct nfsd4_compoundargs, iops),
+ .pc_ressize = sizeof(struct nfsd4_compoundres),
+ .pc_release = nfsd4_release_compoundargs,
+ .pc_cachetype = RC_NOCACHE,
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 3ad9b41c51730..6c43cf52a885f 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -793,6 +793,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
+ return nfserr_bad_xdr;
++ memset(&commit->co_verf, 0, sizeof(commit->co_verf));
+ return nfs_ok;
+ }
+
+@@ -801,6 +802,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
+ {
+ __be32 *p, status;
+
++ memset(create, 0, sizeof(*create));
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0)
+ return nfserr_bad_xdr;
+ switch (create->cr_type) {
+@@ -850,6 +852,7 @@ nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegretu
+ static inline __be32
+ nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
+ {
++ memset(getattr, 0, sizeof(*getattr));
+ return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
+ ARRAY_SIZE(getattr->ga_bmval));
+ }
+@@ -857,6 +860,7 @@ nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *geta
+ static __be32
+ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
+ {
++ memset(link, 0, sizeof(*link));
+ return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
+ }
+
+@@ -905,6 +909,7 @@ nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+ static __be32
+ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+ {
++ memset(lock, 0, sizeof(*lock));
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
+ return nfserr_bad_xdr;
+ if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
+@@ -921,6 +926,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+ static __be32
+ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+ {
++ memset(lockt, 0, sizeof(*lockt));
+ if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
+ return nfserr_bad_xdr;
+ if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
+@@ -1142,11 +1148,8 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+ __be32 status;
+ u32 dummy;
+
+- memset(open->op_bmval, 0, sizeof(open->op_bmval));
+- open->op_iattr.ia_valid = 0;
+- open->op_openowner = NULL;
++ memset(open, 0, sizeof(*open));
+
+- open->op_xdr_error = 0;
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0)
+ return nfserr_bad_xdr;
+ /* deleg_want is ignored */
+@@ -1181,6 +1184,8 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
+ if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0)
+ return nfserr_bad_xdr;
+
++ memset(&open_conf->oc_resp_stateid, 0,
++ sizeof(open_conf->oc_resp_stateid));
+ return nfs_ok;
+ }
+
+@@ -1189,6 +1194,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
+ {
+ __be32 status;
+
++ memset(open_down, 0, sizeof(*open_down));
+ status = nfsd4_decode_stateid4(argp, &open_down->od_stateid);
+ if (status)
+ return status;
+@@ -1218,6 +1224,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+ if (!putfh->pf_fhval)
+ return nfserr_jukebox;
+
++ putfh->no_verify = false;
+ return nfs_ok;
+ }
+
+@@ -1234,6 +1241,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+ {
+ __be32 status;
+
++ memset(read, 0, sizeof(*read));
+ status = nfsd4_decode_stateid4(argp, &read->rd_stateid);
+ if (status)
+ return status;
+@@ -1250,6 +1258,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
+ {
+ __be32 status;
+
++ memset(readdir, 0, sizeof(*readdir));
+ if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_verifier4(argp, &readdir->rd_verf);
+@@ -1269,6 +1278,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
+ static __be32
+ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
+ {
++ memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo));
+ return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
+ }
+
+@@ -1277,6 +1287,7 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
+ {
+ __be32 status;
+
++ memset(rename, 0, sizeof(*rename));
+ status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen);
+ if (status)
+ return status;
+@@ -1293,6 +1304,7 @@ static __be32
+ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
+ struct nfsd4_secinfo *secinfo)
+ {
++ secinfo->si_exp = NULL;
+ return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
+ }
+
+@@ -1301,6 +1313,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
+ {
+ __be32 status;
+
++ memset(setattr, 0, sizeof(*setattr));
+ status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid);
+ if (status)
+ return status;
+@@ -1315,6 +1328,8 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
+ {
+ __be32 *p, status;
+
++ memset(setclientid, 0, sizeof(*setclientid));
++
+ if (argp->minorversion >= 1)
+ return nfserr_notsupp;
+
+@@ -1371,6 +1386,8 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
+ {
+ __be32 *p, status;
+
++ memset(verify, 0, sizeof(*verify));
++
+ status = nfsd4_decode_bitmap4(argp, verify->ve_bmval,
+ ARRAY_SIZE(verify->ve_bmval));
+ if (status)
+@@ -1410,6 +1427,9 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+ if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen))
+ return nfserr_bad_xdr;
+
++ write->wr_bytes_written = 0;
++ write->wr_how_written = 0;
++ memset(&write->wr_verifier, 0, sizeof(write->wr_verifier));
+ return nfs_ok;
+ }
+
+@@ -1434,6 +1454,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
+
+ static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
+ {
++ memset(bc, 0, sizeof(*bc));
+ if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+@@ -1444,6 +1465,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
+ u32 use_conn_in_rdma_mode;
+ __be32 status;
+
++ memset(bcts, 0, sizeof(*bcts));
+ status = nfsd4_decode_sessionid4(argp, &bcts->sessionid);
+ if (status)
+ return status;
+@@ -1585,6 +1607,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
+ {
+ __be32 status;
+
++ memset(exid, 0, sizeof(*exid));
+ status = nfsd4_decode_verifier4(argp, &exid->verifier);
+ if (status)
+ return status;
+@@ -1637,6 +1660,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+ {
+ __be32 status;
+
++ memset(sess, 0, sizeof(*sess));
+ status = nfsd4_decode_clientid4(argp, &sess->clientid);
+ if (status)
+ return status;
+@@ -1652,11 +1676,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0)
+ return nfserr_bad_xdr;
+- status = nfsd4_decode_cb_sec(argp, &sess->cb_sec);
+- if (status)
+- return status;
+-
+- return nfs_ok;
++ return nfsd4_decode_cb_sec(argp, &sess->cb_sec);
+ }
+
+ static __be32
+@@ -1680,6 +1700,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+ {
+ __be32 status;
+
++ memset(gdev, 0, sizeof(*gdev));
+ status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid);
+ if (status)
+ return status;
+@@ -1700,6 +1721,7 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+ {
+ __be32 *p, status;
+
++ memset(lcp, 0, sizeof(*lcp));
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0)
+@@ -1735,6 +1757,7 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+ {
+ __be32 status;
+
++ memset(lgp, 0, sizeof(*lgp));
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0)
+@@ -1760,6 +1783,7 @@ static __be32
+ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+ struct nfsd4_layoutreturn *lrp)
+ {
++ memset(lrp, 0, sizeof(*lrp));
+ if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0)
+@@ -1775,6 +1799,8 @@ static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+ {
+ if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
+ return nfserr_bad_xdr;
++
++ sin->sin_exp = NULL;
+ return nfs_ok;
+ }
+
+@@ -1795,6 +1821,7 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+ seq->maxslots = be32_to_cpup(p++);
+ seq->cachethis = be32_to_cpup(p);
+
++ seq->status_flags = 0;
+ return nfs_ok;
+ }
+
+@@ -1805,6 +1832,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
+ __be32 status;
+ u32 i;
+
++ memset(test_stateid, 0, sizeof(*test_stateid));
+ if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0)
+ return nfserr_bad_xdr;
+
+@@ -1902,6 +1930,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+ struct nl4_server *ns_dummy;
+ __be32 status;
+
++ memset(copy, 0, sizeof(*copy));
+ status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid);
+ if (status)
+ return status;
+@@ -1957,6 +1986,7 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+ {
+ __be32 status;
+
++ memset(cn, 0, sizeof(*cn));
+ cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src));
+ if (cn->cpn_src == NULL)
+ return nfserr_jukebox;
+@@ -1974,6 +2004,8 @@ static __be32
+ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
+ struct nfsd4_offload_status *os)
+ {
++ os->count = 0;
++ os->status = 0;
+ return nfsd4_decode_stateid4(argp, &os->stateid);
+ }
+
+@@ -1990,6 +2022,8 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+ if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0)
+ return nfserr_bad_xdr;
+
++ seek->seek_eof = 0;
++ seek->seek_pos = 0;
+ return nfs_ok;
+ }
+
+@@ -2125,6 +2159,7 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
+ __be32 status;
+ u32 maxcount;
+
++ memset(getxattr, 0, sizeof(*getxattr));
+ status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name);
+ if (status)
+ return status;
+@@ -2133,8 +2168,7 @@ nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
+ maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount);
+
+ getxattr->getxa_len = maxcount;
+-
+- return status;
++ return nfs_ok;
+ }
+
+ static __be32
+@@ -2144,6 +2178,8 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+ u32 flags, maxcount, size;
+ __be32 status;
+
++ memset(setxattr, 0, sizeof(*setxattr));
++
+ if (xdr_stream_decode_u32(argp->xdr, &flags) < 0)
+ return nfserr_bad_xdr;
+
+@@ -2182,6 +2218,8 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
+ {
+ u32 maxcount;
+
++ memset(listxattrs, 0, sizeof(*listxattrs));
++
+ if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0)
+ return nfserr_bad_xdr;
+
+@@ -2209,6 +2247,7 @@ static __be32
+ nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp,
+ struct nfsd4_removexattr *removexattr)
+ {
++ memset(removexattr, 0, sizeof(*removexattr));
+ return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 6a30c0db05ad9b16d1c7be887176e1b955b5fac2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: reduce locking in nfsd_lookup()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 19d008b46941b8c668402170522e0f7a9258409c ]
+
+nfsd_lookup() takes an exclusive lock on the parent inode, but no
+callers want the lock and it may not be needed at all if the
+result is in the dcache.
+
+Change nfsd_lookup_dentry() to not take the lock, and call
+lookup_one_len_locked() which takes lock only if needed.
+
+nfsd4_open() currently expects the lock to still be held, but that isn't
+necessary as nfsd_validate_delegated_dentry() provides required
+guarantees without the lock.
+
+NOTE: NFSv4 requires directory changeinfo for OPEN even when a create
+ wasn't requested and no change happened. Now that nfsd_lookup()
+ doesn't use fh_lock(), we need to explicitly fill the attributes
+ when no create happens. A new fh_fill_both_attrs() is provided
+ for that task.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 20 ++++++++++++--------
+ fs/nfsd/nfs4state.c | 3 ---
+ fs/nfsd/nfsfh.c | 19 +++++++++++++++++++
+ fs/nfsd/nfsfh.h | 2 +-
+ fs/nfsd/vfs.c | 34 ++++++++++++++--------------------
+ 5 files changed, 46 insertions(+), 32 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 26cd2479e30cf..b6df56fb6755d 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -302,6 +302,11 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (d_really_is_positive(child)) {
+ status = nfs_ok;
+
++ /* NFSv4 protocol requires change attributes even though
++ * no change happened.
++ */
++ fh_fill_both_attrs(fhp);
++
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ if (!d_is_reg(child))
+@@ -417,15 +422,15 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
+ if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0)
+ open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_MODIFY);
+- } else
+- /*
+- * Note this may exit with the parent still locked.
+- * We will hold the lock until nfsd4_open's final
+- * lookup, to prevent renames or unlinks until we've had
+- * a chance to an acquire a delegation if appropriate.
+- */
++ } else {
+ status = nfsd_lookup(rqstp, current_fh,
+ open->op_fname, open->op_fnamelen, *resfh);
++ if (!status)
++ /* NFSv4 protocol requires change attributes even though
++ * no change happened.
++ */
++ fh_fill_both_attrs(current_fh);
++ }
+ if (status)
+ goto out;
+ status = nfsd_check_obj_isreg(*resfh);
+@@ -1043,7 +1048,6 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ &exp, &dentry);
+ if (err)
+ return err;
+- fh_unlock(&cstate->current_fh);
+ if (d_really_is_negative(dentry)) {
+ exp_put(exp);
+ err = nfserr_noent;
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index a299aeaa0de07..f66fb39714893 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5321,9 +5321,6 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
+ struct dentry *child;
+ __be32 err;
+
+- /* parent may already be locked, and it may get unlocked by
+- * this call, but that is safe.
+- */
+ err = nfsd_lookup_dentry(open->op_rqstp, parent,
+ open->op_fname, open->op_fnamelen,
+ &exp, &child);
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index 5e2ed4b2a925c..cd2946a88d727 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -672,6 +672,25 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
+ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+ }
+
++/**
++ * fh_fill_both_attrs - Fill pre-op and post-op attributes
++ * @fhp: file handle to be updated
++ *
++ * This is used when the directory wasn't changed, but wcc attributes
++ * are needed anyway.
++ */
++void fh_fill_both_attrs(struct svc_fh *fhp)
++{
++ fh_fill_post_attrs(fhp);
++ if (!fhp->fh_post_saved)
++ return;
++ fhp->fh_pre_change = fhp->fh_post_change;
++ fhp->fh_pre_mtime = fhp->fh_post_attr.mtime;
++ fhp->fh_pre_ctime = fhp->fh_post_attr.ctime;
++ fhp->fh_pre_size = fhp->fh_post_attr.size;
++ fhp->fh_pre_saved = true;
++}
++
+ /*
+ * Release a file handle.
+ */
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index fb9d358a267e5..28a4f9a94e2c8 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -322,7 +322,7 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
+
+ extern void fh_fill_pre_attrs(struct svc_fh *fhp);
+ extern void fh_fill_post_attrs(struct svc_fh *fhp);
+-
++extern void fh_fill_both_attrs(struct svc_fh *fhp);
+
+ /*
+ * Lock a file handle/inode
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index ac716ced1fd5f..c07fe50d6bdfb 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -198,27 +198,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto out_nfserr;
+ }
+ } else {
+- /*
+- * In the nfsd4_open() case, this may be held across
+- * subsequent open and delegation acquisition which may
+- * need to take the child's i_mutex:
+- */
+- fh_lock_nested(fhp, I_MUTEX_PARENT);
+- dentry = lookup_one_len(name, dparent, len);
++ dentry = lookup_one_len_unlocked(name, dparent, len);
+ host_err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ goto out_nfserr;
+ if (nfsd_mountpoint(dentry, exp)) {
+- /*
+- * We don't need the i_mutex after all. It's
+- * still possible we could open this (regular
+- * files can be mountpoints too), but the
+- * i_mutex is just there to prevent renames of
+- * something that we might be about to delegate,
+- * and a mountpoint won't be renamed:
+- */
+- fh_unlock(fhp);
+- if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
++ host_err = nfsd_cross_mnt(rqstp, &dentry, &exp);
++ if (host_err) {
+ dput(dentry);
+ goto out_nfserr;
+ }
+@@ -233,7 +219,15 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ return nfserrno(host_err);
+ }
+
+-/*
++/**
++ * nfsd_lookup - look up a single path component for nfsd
++ *
++ * @rqstp: the request context
++ * @fhp: the file handle of the directory
++ * @name: the component name, or %NULL to look up parent
++ * @len: length of name to examine
++ * @resfh: pointer to pre-initialised filehandle to hold result.
++ *
+ * Look up one component of a pathname.
+ * N.B. After this call _both_ fhp and resfh need an fh_put
+ *
+@@ -243,11 +237,11 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ * clients and is explicitly disallowed for NFSv3
+- * NeilBrown <neilb@cse.unsw.edu.au>
++ *
+ */
+ __be32
+ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+- unsigned int len, struct svc_fh *resfh)
++ unsigned int len, struct svc_fh *resfh)
+ {
+ struct svc_export *exp;
+ struct dentry *dentry;
+--
+2.43.0
+
--- /dev/null
+From c11ec07d48fce23e94058de4f917ccadcc3565aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:57 -0400
+Subject: NFSD: Refactor __nfsd_file_close_inode()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit a845511007a63467fee575353c706806c21218b1 ]
+
+The code that computes the hashval is the same in both callers.
+
+To prevent them from going stale, reframe the documenting comments
+to remove descriptions of the underlying hash table structure, which
+is about to be replaced.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 40 +++++++++++++++++++++-------------------
+ fs/nfsd/trace.h | 44 +++++++++++++++++++++++++++++++++-----------
+ 2 files changed, 54 insertions(+), 30 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index d7c74b51eabf3..3925df9124c39 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -558,39 +558,44 @@ static struct shrinker nfsd_file_shrinker = {
+ .seeks = 1,
+ };
+
+-static void
+-__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
+- struct list_head *dispose)
++/*
++ * Find all cache items across all net namespaces that match @inode and
++ * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire().
++ */
++static unsigned int
++__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
+ {
++ unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
++ NFSD_FILE_HASH_BITS);
++ unsigned int count = 0;
+ struct nfsd_file *nf;
+ struct hlist_node *tmp;
+
+ spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+ hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
+- if (inode == nf->nf_inode)
++ if (inode == nf->nf_inode) {
+ nfsd_file_unhash_and_release_locked(nf, dispose);
++ count++;
++ }
+ }
+ spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
++ return count;
+ }
+
+ /**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+- * Walk the whole hash bucket, looking for any files that correspond to "inode".
+- * If any do, then unhash them and put the hashtable reference to them and
+- * destroy any that had their last reference put. Also ensure that any of the
+- * fputs also have their final __fput done as well.
++ * Unhash and put, then flush and fput all cache items associated with @inode.
+ */
+ void
+ nfsd_file_close_inode_sync(struct inode *inode)
+ {
+- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+- NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
++ unsigned int count;
+
+- __nfsd_file_close_inode(inode, hashval, &dispose);
+- trace_nfsd_file_close_inode_sync(inode, !list_empty(&dispose));
++ count = __nfsd_file_close_inode(inode, &dispose);
++ trace_nfsd_file_close_inode_sync(inode, count);
+ nfsd_file_dispose_list_sync(&dispose);
+ }
+
+@@ -598,19 +603,16 @@ nfsd_file_close_inode_sync(struct inode *inode)
+ * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+- * Walk the whole hash bucket, looking for any files that correspond to "inode".
+- * If any do, then unhash them and put the hashtable reference to them and
+- * destroy any that had their last reference put.
++ * Unhash and put all cache item associated with @inode.
+ */
+ static void
+ nfsd_file_close_inode(struct inode *inode)
+ {
+- unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
+- NFSD_FILE_HASH_BITS);
+ LIST_HEAD(dispose);
++ unsigned int count;
+
+- __nfsd_file_close_inode(inode, hashval, &dispose);
+- trace_nfsd_file_close_inode(inode, !list_empty(&dispose));
++ count = __nfsd_file_close_inode(inode, &dispose);
++ trace_nfsd_file_close_inode(inode, count);
+ nfsd_file_dispose_list_delayed(&dispose);
+ }
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 655b56c87600b..e82ea1abfbd46 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -847,30 +847,52 @@ TRACE_EVENT(nfsd_file_open,
+
+ DECLARE_EVENT_CLASS(nfsd_file_search_class,
+ TP_PROTO(
+- struct inode *inode,
+- int found
++ const struct inode *inode,
++ unsigned int count
+ ),
+- TP_ARGS(inode, found),
++ TP_ARGS(inode, count),
+ TP_STRUCT__entry(
+- __field(struct inode *, inode)
+- __field(int, found)
++ __field(const struct inode *, inode)
++ __field(unsigned int, count)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+- __entry->found = found;
++ __entry->count = count;
+ ),
+- TP_printk("inode=%p found=%d",
+- __entry->inode, __entry->found)
++ TP_printk("inode=%p count=%u",
++ __entry->inode, __entry->count)
+ );
+
+ #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
+ DEFINE_EVENT(nfsd_file_search_class, name, \
+- TP_PROTO(struct inode *inode, int found), \
+- TP_ARGS(inode, found))
++ TP_PROTO( \
++ const struct inode *inode, \
++ unsigned int count \
++ ), \
++ TP_ARGS(inode, count))
+
+ DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+ DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+-DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached);
++
++TRACE_EVENT(nfsd_file_is_cached,
++ TP_PROTO(
++ const struct inode *inode,
++ int found
++ ),
++ TP_ARGS(inode, found),
++ TP_STRUCT__entry(
++ __field(const struct inode *, inode)
++ __field(int, found)
++ ),
++ TP_fast_assign(
++ __entry->inode = inode;
++ __entry->found = found;
++ ),
++ TP_printk("inode=%p is %scached",
++ __entry->inode,
++ __entry->found ? "" : "not "
++ )
++);
+
+ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+ TP_PROTO(struct inode *inode, u32 mask),
+--
+2.43.0
+
--- /dev/null
+From e689093a85007cc0339585f20ab1141264a5439e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:22:56 -0400
+Subject: NFSD: Refactor common code out of dirlist helpers
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 98124f5bd6c76699d514fbe491dd95265369cc99 ]
+
+The dust has settled a bit and it's become obvious what code is
+totally common between nfsd_init_dirlist_pages() and
+nfsd3_init_dirlist_pages(). Move that common code to SUNRPC.
+
+The new helper brackets the existing xdr_init_decode_pages() API.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 10 +---------
+ fs/nfsd/nfsproc.c | 10 +---------
+ include/linux/sunrpc/xdr.h | 2 ++
+ net/sunrpc/xdr.c | 22 ++++++++++++++++++++++
+ 4 files changed, 26 insertions(+), 18 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 58695e4e18b46..923d9a80df92c 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -574,15 +574,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
+ buf->pages = rqstp->rq_next_page;
+ rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+- /* This is xdr_init_encode(), but it assumes that
+- * the head kvec has already been consumed. */
+- xdr_set_scratch_buffer(xdr, NULL, 0);
+- xdr->buf = buf;
+- xdr->page_ptr = buf->pages;
+- xdr->iov = NULL;
+- xdr->p = page_address(*buf->pages);
+- xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
+- xdr->rqst = NULL;
++ xdr_init_encode_pages(xdr, buf, buf->pages, NULL);
+ }
+
+ /*
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 49778ff410e32..82b3ddeacc338 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -575,15 +575,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
+ buf->pages = rqstp->rq_next_page;
+ rqstp->rq_next_page++;
+
+- /* This is xdr_init_encode(), but it assumes that
+- * the head kvec has already been consumed. */
+- xdr_set_scratch_buffer(xdr, NULL, 0);
+- xdr->buf = buf;
+- xdr->page_ptr = buf->pages;
+- xdr->iov = NULL;
+- xdr->p = page_address(*buf->pages);
+- xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
+- xdr->rqst = NULL;
++ xdr_init_encode_pages(xdr, buf, buf->pages, NULL);
+ }
+
+ /*
+diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
+index 3a2c714d6b629..98e197376a0d9 100644
+--- a/include/linux/sunrpc/xdr.h
++++ b/include/linux/sunrpc/xdr.h
+@@ -240,6 +240,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+
+ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
+ __be32 *p, struct rpc_rqst *rqst);
++extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
++ struct page **pages, struct rpc_rqst *rqst);
+ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+ extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
+ size_t nbytes);
+diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
+index f0a0a4ad6d525..b227d0c8471ff 100644
+--- a/net/sunrpc/xdr.c
++++ b/net/sunrpc/xdr.c
+@@ -918,6 +918,28 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+ }
+ EXPORT_SYMBOL_GPL(xdr_init_encode);
+
++/**
++ * xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages
++ * @xdr: pointer to xdr_stream struct
++ * @buf: pointer to XDR buffer into which to encode data
++ * @pages: list of pages to decode into
++ * @rqst: pointer to controlling rpc_rqst, for debugging
++ *
++ */
++void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
++ struct page **pages, struct rpc_rqst *rqst)
++{
++ xdr_reset_scratch_buffer(xdr);
++
++ xdr->buf = buf;
++ xdr->page_ptr = pages;
++ xdr->iov = NULL;
++ xdr->p = page_address(*pages);
++ xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
++ xdr->rqst = rqst;
++}
++EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
++
+ /**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+--
+2.43.0
+
--- /dev/null
+From 4aa96232f5ed065bb824ab0a508846738ebeb09f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:47 -0400
+Subject: NFSD: Refactor find_file()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 15424748001a9b5ea62b3e6ad45f0a8b27f01df9 ]
+
+find_file() is now the only caller of find_file_locked(), so just
+fold these two together.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 36 +++++++++++++++---------------------
+ 1 file changed, 15 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 192e721525665..4f2ad5bf1f1b8 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4682,31 +4682,24 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
+ nfs4_put_stid(&last->st_stid);
+ }
+
+-/* search file_hashtbl[] for file */
+-static struct nfs4_file *
+-find_file_locked(const struct svc_fh *fh, unsigned int hashval)
++static noinline_for_stack struct nfs4_file *
++nfsd4_file_hash_lookup(const struct svc_fh *fhp)
+ {
+- struct nfs4_file *fp;
++ unsigned int hashval = file_hashval(fhp);
++ struct nfs4_file *fi;
+
+- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
+- lockdep_is_held(&state_lock)) {
+- if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
+- if (refcount_inc_not_zero(&fp->fi_ref))
+- return fp;
++ rcu_read_lock();
++ hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash,
++ lockdep_is_held(&state_lock)) {
++ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
++ if (refcount_inc_not_zero(&fi->fi_ref)) {
++ rcu_read_unlock();
++ return fi;
++ }
+ }
+ }
+- return NULL;
+-}
+-
+-static struct nfs4_file * find_file(struct svc_fh *fh)
+-{
+- struct nfs4_file *fp;
+- unsigned int hashval = file_hashval(fh);
+-
+- rcu_read_lock();
+- fp = find_file_locked(fh, hashval);
+ rcu_read_unlock();
+- return fp;
++ return NULL;
+ }
+
+ /*
+@@ -4757,9 +4750,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+ struct nfs4_file *fp;
+ __be32 ret = nfs_ok;
+
+- fp = find_file(current_fh);
++ fp = nfsd4_file_hash_lookup(current_fh);
+ if (!fp)
+ return ret;
++
+ /* Check for conflicting share reservations */
+ spin_lock(&fp->fi_lock);
+ if (fp->fi_share_deny & deny_type)
+--
+2.43.0
+
--- /dev/null
+From fae6793b0898c798f8ebde482ecd9d2d43d90579 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:47 -0400
+Subject: NFSD: Refactor nfsd4_cleanup_inter_ssc() (1/2)
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 24d796ea383b8a4c8234e06d1b14bbcd371192ea ]
+
+The @src parameter is sometimes a pointer to a struct nfsd_file and
+sometimes a pointer to struct file hiding in a phony struct
+nfsd_file. Refactor nfsd4_cleanup_inter_ssc() so the @src parameter
+is always an explicit struct file.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 4fd6611d29ce4..238df435b395d 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1550,7 +1550,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ }
+
+ static void
+-nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
++nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
+ struct nfsd_file *dst)
+ {
+ bool found = false;
+@@ -1559,9 +1559,9 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+ struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
+
+- nfs42_ssc_close(src->nf_file);
++ nfs42_ssc_close(filp);
+ nfsd_file_put(dst);
+- fput(src->nf_file);
++ fput(filp);
+
+ if (!nn) {
+ mntput(ss_mnt);
+@@ -1604,7 +1604,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ }
+
+ static void
+-nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
++nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
+ struct nfsd_file *dst)
+ {
+ }
+@@ -1718,7 +1718,7 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
+ }
+
+ if (nfsd4_ssc_is_inter(copy))
+- nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src,
++ nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file,
+ copy->nf_dst);
+ else
+ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+--
+2.43.0
+
--- /dev/null
+From 9650431d81488ba63ad3c618468e6a8384dd2144 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:53 -0400
+Subject: NFSD: Refactor nfsd4_cleanup_inter_ssc() (2/2)
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 478ed7b10d875da2743d1a22822b9f8a82df8f12 ]
+
+Move the nfsd4_cleanup_*() call sites out of nfsd4_do_copy(). A
+subsequent patch will modify one of the new call sites to avoid
+the need to manufacture the phony struct nfsd_file.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 238df435b395d..10130f0b088ef 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1716,13 +1716,6 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
+ nfsd4_init_copy_res(copy, sync);
+ status = nfs_ok;
+ }
+-
+- if (nfsd4_ssc_is_inter(copy))
+- nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file,
+- copy->nf_dst);
+- else
+- nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+-
+ return status;
+ }
+
+@@ -1778,9 +1771,14 @@ static int nfsd4_do_async_copy(void *data)
+ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
++ copy->nfserr = nfsd4_do_copy(copy, 0);
++ nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file,
++ copy->nf_dst);
++ } else {
++ copy->nfserr = nfsd4_do_copy(copy, 0);
++ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+ }
+
+- copy->nfserr = nfsd4_do_copy(copy, 0);
+ do_callback:
+ cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+ if (!cb_copy)
+@@ -1856,6 +1854,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ status = nfs_ok;
+ } else {
+ status = nfsd4_do_copy(copy, 1);
++ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+ }
+ out:
+ return status;
+--
+2.43.0
+
--- /dev/null
+From d1dd9b32997e5cd7bcd03fa961a346a5ff5131cf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:59 -0400
+Subject: NFSD: Refactor nfsd4_do_copy()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3b7bf5933cada732783554edf0dc61283551c6cf ]
+
+Refactor: Now that nfsd4_do_copy() no longer calls the cleanup
+helpers, plumb the use of struct file pointers all the way down to
+_nfsd_copy_file_range().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 10130f0b088ef..24c7d5e6c8c33 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1661,10 +1661,10 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
+ gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net);
+ }
+
+-static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
++static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
++ struct file *dst,
++ struct file *src)
+ {
+- struct file *dst = copy->nf_dst->nf_file;
+- struct file *src = copy->nf_src->nf_file;
+ errseq_t since;
+ ssize_t bytes_copied = 0;
+ u64 bytes_total = copy->cp_count;
+@@ -1701,12 +1701,15 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
+ return bytes_copied;
+ }
+
+-static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
++static __be32 nfsd4_do_copy(struct nfsd4_copy *copy,
++ struct file *src, struct file *dst,
++ bool sync)
+ {
+ __be32 status;
+ ssize_t bytes;
+
+- bytes = _nfsd_copy_file_range(copy);
++ bytes = _nfsd_copy_file_range(copy, dst, src);
++
+ /* for async copy, we ignore the error, client can always retry
+ * to get the error
+ */
+@@ -1771,11 +1774,13 @@ static int nfsd4_do_async_copy(void *data)
+ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
+- copy->nfserr = nfsd4_do_copy(copy, 0);
++ copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
++ copy->nf_dst->nf_file, false);
+ nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file,
+ copy->nf_dst);
+ } else {
+- copy->nfserr = nfsd4_do_copy(copy, 0);
++ copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
++ copy->nf_dst->nf_file, false);
+ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+ }
+
+@@ -1853,7 +1858,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ wake_up_process(async_copy->copy_task);
+ status = nfs_ok;
+ } else {
+- status = nfsd4_do_copy(copy, 1);
++ status = nfsd4_do_copy(copy, copy->nf_src->nf_file,
++ copy->nf_dst->nf_file, true);
+ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+ }
+ out:
+--
+2.43.0
+
--- /dev/null
+From 1a6043728c1914411bd9cfaddb9ee40714f636e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Mar 2022 16:10:17 -0400
+Subject: NFSD: Refactor nfsd_create_setattr()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5f46e950c395b9c14c282b53ba78c5fd46d6c256 ]
+
+I'd like to move do_nfsd_create() out of vfs.c. Therefore
+nfsd_create_setattr() needs to be made publicly visible.
+
+Note that both call sites in vfs.c commit both the new object and
+its parent directory, so just combine those common metadata commits
+into nfsd_create_setattr().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 79 +++++++++++++++++++++++++++------------------------
+ fs/nfsd/vfs.h | 2 ++
+ 2 files changed, 44 insertions(+), 37 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index a46ab32216dee..e4f100a43ce52 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1189,14 +1189,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ return err;
+ }
+
+-static __be32
+-nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
+- struct iattr *iap)
++/**
++ * nfsd_create_setattr - Set a created file's attributes
++ * @rqstp: RPC transaction being executed
++ * @fhp: NFS filehandle of parent directory
++ * @resfhp: NFS filehandle of new object
++ * @iap: requested attributes of new object
++ *
++ * Returns nfs_ok on success, or an nfsstat in network byte order.
++ */
++__be32
++nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ struct svc_fh *resfhp, struct iattr *iap)
+ {
++ __be32 status;
++
+ /*
+- * Mode has already been set earlier in create:
++ * Mode has already been set by file creation.
+ */
+ iap->ia_valid &= ~ATTR_MODE;
++
+ /*
+ * Setting uid/gid works only for root. Irix appears to
+ * send along the gid on create when it tries to implement
+@@ -1204,10 +1216,31 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
+ */
+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
+ iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
++
++ /*
++ * Callers expect new file metadata to be committed even
++ * if the attributes have not changed.
++ */
+ if (iap->ia_valid)
+- return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
+- /* Callers expect file metadata to be committed here */
+- return nfserrno(commit_metadata(resfhp));
++ status = nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
++ else
++ status = nfserrno(commit_metadata(resfhp));
++
++ /*
++ * Transactional filesystems had a chance to commit changes
++ * for both parent and child simultaneously making the
++ * following commit_metadata a noop in many cases.
++ */
++ if (!status)
++ status = nfserrno(commit_metadata(fhp));
++
++ /*
++ * Update the new filehandle to pick up the new attributes.
++ */
++ if (!status)
++ status = fh_update(resfhp);
++
++ return status;
+ }
+
+ /* HPUX client sometimes creates a file in mode 000, and sets size to 0.
+@@ -1234,7 +1267,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct dentry *dentry, *dchild;
+ struct inode *dirp;
+ __be32 err;
+- __be32 err2;
+ int host_err;
+
+ dentry = fhp->fh_dentry;
+@@ -1307,22 +1339,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (host_err < 0)
+ goto out_nfserr;
+
+- err = nfsd_create_setattr(rqstp, resfhp, iap);
++ err = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
+
+- /*
+- * nfsd_create_setattr already committed the child. Transactional
+- * filesystems had a chance to commit changes for both parent and
+- * child simultaneously making the following commit_metadata a
+- * noop.
+- */
+- err2 = nfserrno(commit_metadata(fhp));
+- if (err2)
+- err = err2;
+- /*
+- * Update the file handle to get the new inode info.
+- */
+- if (!err)
+- err = fh_update(resfhp);
+ out:
+ dput(dchild);
+ return err;
+@@ -1513,20 +1531,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+
+ set_attr:
+- err = nfsd_create_setattr(rqstp, resfhp, iap);
+-
+- /*
+- * nfsd_create_setattr already committed the child
+- * (and possibly also the parent).
+- */
+- if (!err)
+- err = nfserrno(commit_metadata(fhp));
+-
+- /*
+- * Update the filehandle to get the new inode info.
+- */
+- if (!err)
+- err = fh_update(resfhp);
++ err = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
+
+ out:
+ fh_unlock(fhp);
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index ccb87b2864f64..1f32a83456b03 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -69,6 +69,8 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
+ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
++__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ struct svc_fh *resfhp, struct iattr *iap);
+ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ struct svc_fh *res, int createmode,
+--
+2.43.0
+
--- /dev/null
+From 38ee92925af9a1275f749caf287d3b3d47ab2109 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:25 -0400
+Subject: NFSD: Refactor nfsd_file_gc()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3bc6d3470fe412f818f9bff6b71d1be3a76af8f3 ]
+
+Refactor nfsd_file_gc() to use the new list_lru helper.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index ffe46f3f33495..656c94c779417 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -491,7 +491,11 @@ nfsd_file_lru_walk_list(struct shrink_control *sc)
+ static void
+ nfsd_file_gc(void)
+ {
+- nfsd_file_lru_walk_list(NULL);
++ LIST_HEAD(dispose);
++
++ list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
++ &dispose, LONG_MAX);
++ nfsd_file_gc_dispose_list(&dispose);
+ }
+
+ static void
+--
+2.43.0
+
--- /dev/null
+From 04540c8fa772396fc71e5823def76c02fb10c341 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:31 -0400
+Subject: NFSD: Refactor nfsd_file_lru_scan()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 39f1d1ff8148902c5692ffb0e1c4479416ab44a7 ]
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 25 +++++++------------------
+ 1 file changed, 7 insertions(+), 18 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 656c94c779417..1d94491e5ddad 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -471,23 +471,6 @@ static void nfsd_file_gc_dispose_list(struct list_head *dispose)
+ nfsd_file_dispose_list_delayed(dispose);
+ }
+
+-static unsigned long
+-nfsd_file_lru_walk_list(struct shrink_control *sc)
+-{
+- LIST_HEAD(head);
+- unsigned long ret;
+-
+- if (sc)
+- ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+- nfsd_file_lru_cb, &head);
+- else
+- ret = list_lru_walk(&nfsd_file_lru,
+- nfsd_file_lru_cb,
+- &head, LONG_MAX);
+- nfsd_file_gc_dispose_list(&head);
+- return ret;
+-}
+-
+ static void
+ nfsd_file_gc(void)
+ {
+@@ -514,7 +497,13 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+ static unsigned long
+ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+ {
+- return nfsd_file_lru_walk_list(sc);
++ LIST_HEAD(dispose);
++ unsigned long ret;
++
++ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
++ nfsd_file_lru_cb, &dispose);
++ nfsd_file_gc_dispose_list(&dispose);
++ return ret;
+ }
+
+ static struct shrinker nfsd_file_shrinker = {
+--
+2.43.0
+
--- /dev/null
+From 0026da51b50cccd61065cbda4750dac067179bda Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:14:07 -0400
+Subject: NFSD: Refactor nfsd_setattr()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c0aa1913db57219e91a0a8832363cbafb3a9cf8f ]
+
+Move code that will be retried (in a subsequent patch) into a helper
+function.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 97 ++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 57 insertions(+), 40 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 77f8ab3826d75..392df2353556e 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -344,8 +344,61 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ return nfserrno(get_write_access(inode));
+ }
+
+-/*
+- * Set various file attributes. After this call fhp needs an fh_put.
++static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
++{
++ int host_err;
++
++ if (iap->ia_valid & ATTR_SIZE) {
++ /*
++ * RFC5661, Section 18.30.4:
++ * Changing the size of a file with SETATTR indirectly
++ * changes the time_modify and change attributes.
++ *
++ * (and similar for the older RFCs)
++ */
++ struct iattr size_attr = {
++ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
++ .ia_size = iap->ia_size,
++ };
++
++ if (iap->ia_size < 0)
++ return -EFBIG;
++
++ host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
++ if (host_err)
++ return host_err;
++ iap->ia_valid &= ~ATTR_SIZE;
++
++ /*
++ * Avoid the additional setattr call below if the only other
++ * attribute that the client sends is the mtime, as we update
++ * it as part of the size change above.
++ */
++ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
++ return 0;
++ }
++
++ if (!iap->ia_valid)
++ return 0;
++
++ iap->ia_valid |= ATTR_CTIME;
++ return notify_change(&init_user_ns, dentry, iap, NULL);
++}
++
++/**
++ * nfsd_setattr - Set various file attributes.
++ * @rqstp: controlling RPC transaction
++ * @fhp: filehandle of target
++ * @attr: attributes to set
++ * @check_guard: set to 1 if guardtime is a valid timestamp
++ * @guardtime: do not act if ctime.tv_sec does not match this timestamp
++ *
++ * This call may adjust the contents of @attr (in particular, this
++ * call may change the bits in the na_iattr.ia_valid field).
++ *
++ * Returns nfs_ok on success, otherwise an NFS status code is
++ * returned. Caller must release @fhp by calling fh_put in either
++ * case.
+ */
+ __be32
+ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+@@ -358,7 +411,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+- int host_err = 0;
++ int host_err;
+ bool get_write_count;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
+
+@@ -415,43 +468,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+
+ inode_lock(inode);
+- if (size_change) {
+- /*
+- * RFC5661, Section 18.30.4:
+- * Changing the size of a file with SETATTR indirectly
+- * changes the time_modify and change attributes.
+- *
+- * (and similar for the older RFCs)
+- */
+- struct iattr size_attr = {
+- .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+- .ia_size = iap->ia_size,
+- };
+-
+- host_err = -EFBIG;
+- if (iap->ia_size < 0)
+- goto out_unlock;
+-
+- host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
+- if (host_err)
+- goto out_unlock;
+- iap->ia_valid &= ~ATTR_SIZE;
+-
+- /*
+- * Avoid the additional setattr call below if the only other
+- * attribute that the client sends is the mtime, as we update
+- * it as part of the size change above.
+- */
+- if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+- goto out_unlock;
+- }
+-
+- if (iap->ia_valid) {
+- iap->ia_valid |= ATTR_CTIME;
+- host_err = notify_change(&init_user_ns, dentry, iap, NULL);
+- }
+-
+-out_unlock:
++ host_err = __nfsd_setattr(dentry, iap);
+ if (attr->na_seclabel && attr->na_seclabel->len)
+ attr->na_labelerr = security_inode_setsecctx(dentry,
+ attr->na_seclabel->data, attr->na_seclabel->len);
+--
+2.43.0
+
--- /dev/null
+From b94a908be6f8da15cf1ec917c94cb3d820fcb332 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Mar 2022 13:29:23 -0400
+Subject: NFSD: Refactor NFSv3 CREATE
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit df9606abddfb01090d5ece7dcc2441d848f690f0 ]
+
+The NFSv3 CREATE and NFSv4 OPEN(CREATE) use cases are about to
+diverge such that it makes sense to split do_nfsd_create() into one
+version for NFSv3 and one for NFSv4.
+
+As a first step, copy do_nfsd_create() to nfs3proc.c and remove
+NFSv4-specific logic.
+
+One immediate legibility benefit is that the logic for handling
+NFSv3 createhow is now quite straightforward. NFSv4 createhow
+has some subtleties that IMO do not belong in generic code.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 127 ++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 121 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 86163ecbb015d..57854ca022d18 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -8,6 +8,7 @@
+ #include <linux/fs.h>
+ #include <linux/ext2_fs.h>
+ #include <linux/magic.h>
++#include <linux/namei.h>
+
+ #include "cache.h"
+ #include "xdr3.h"
+@@ -220,10 +221,126 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
+ }
+
+ /*
+- * With NFSv3, CREATE processing is a lot easier than with NFSv2.
+- * At least in theory; we'll see how it fares in practice when the
+- * first reports about SunOS compatibility problems start to pour in...
++ * Implement NFSv3's unchecked, guarded, and exclusive CREATE
++ * semantics for regular files. Except for the created file,
++ * this operation is stateless on the server.
++ *
++ * Upon return, caller must release @fhp and @resfhp.
+ */
++static __be32
++nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ struct svc_fh *resfhp, struct nfsd3_createargs *argp)
++{
++ struct iattr *iap = &argp->attrs;
++ struct dentry *parent, *child;
++ __u32 v_mtime, v_atime;
++ struct inode *inode;
++ __be32 status;
++ int host_err;
++
++ if (isdotent(argp->name, argp->len))
++ return nfserr_exist;
++ if (!(iap->ia_valid & ATTR_MODE))
++ iap->ia_mode = 0;
++
++ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
++ if (status != nfs_ok)
++ return status;
++
++ parent = fhp->fh_dentry;
++ inode = d_inode(parent);
++
++ host_err = fh_want_write(fhp);
++ if (host_err)
++ return nfserrno(host_err);
++
++ fh_lock_nested(fhp, I_MUTEX_PARENT);
++
++ child = lookup_one_len(argp->name, parent, argp->len);
++ if (IS_ERR(child)) {
++ status = nfserrno(PTR_ERR(child));
++ goto out;
++ }
++
++ if (d_really_is_negative(child)) {
++ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
++ if (status != nfs_ok)
++ goto out;
++ }
++
++ status = fh_compose(resfhp, fhp->fh_export, child, fhp);
++ if (status != nfs_ok)
++ goto out;
++
++ v_mtime = 0;
++ v_atime = 0;
++ if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
++ u32 *verifier = (u32 *)argp->verf;
++
++ /*
++ * Solaris 7 gets confused (bugid 4218508) if these have
++ * the high bit set, as do xfs filesystems without the
++ * "bigtime" feature. So just clear the high bits.
++ */
++ v_mtime = verifier[0] & 0x7fffffff;
++ v_atime = verifier[1] & 0x7fffffff;
++ }
++
++ if (d_really_is_positive(child)) {
++ status = nfs_ok;
++
++ switch (argp->createmode) {
++ case NFS3_CREATE_UNCHECKED:
++ if (!d_is_reg(child))
++ break;
++ iap->ia_valid &= ATTR_SIZE;
++ goto set_attr;
++ case NFS3_CREATE_GUARDED:
++ status = nfserr_exist;
++ break;
++ case NFS3_CREATE_EXCLUSIVE:
++ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
++ d_inode(child)->i_atime.tv_sec == v_atime &&
++ d_inode(child)->i_size == 0) {
++ break;
++ }
++ status = nfserr_exist;
++ }
++ goto out;
++ }
++
++ if (!IS_POSIXACL(inode))
++ iap->ia_mode &= ~current_umask();
++
++ host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
++ if (host_err < 0) {
++ status = nfserrno(host_err);
++ goto out;
++ }
++
++ /* A newly created file already has a file size of zero. */
++ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
++ iap->ia_valid &= ~ATTR_SIZE;
++ if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
++ iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
++ ATTR_MTIME_SET | ATTR_ATIME_SET;
++ iap->ia_mtime.tv_sec = v_mtime;
++ iap->ia_atime.tv_sec = v_atime;
++ iap->ia_mtime.tv_nsec = 0;
++ iap->ia_atime.tv_nsec = 0;
++ }
++
++set_attr:
++ status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
++
++out:
++ fh_unlock(fhp);
++ if (child && !IS_ERR(child))
++ dput(child);
++ fh_drop_write(fhp);
++ return status;
++}
++
+ static __be32
+ nfsd3_proc_create(struct svc_rqst *rqstp)
+ {
+@@ -239,9 +356,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
+ dirfhp = fh_copy(&resp->dirfh, &argp->fh);
+ newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
+
+- resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
+- &argp->attrs, newfhp, argp->createmode,
+- (u32 *)argp->verf, NULL, NULL);
++ resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
+ return rpc_success;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 9f3b6b4a98f7f777c623106f11554e9cfe30f559 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Mar 2022 14:47:34 -0400
+Subject: NFSD: Refactor NFSv4 OPEN(CREATE)
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 254454a5aa4a9f696d6bae080c08d5863e650f49 ]
+
+Copy do_nfsd_create() to nfs4proc.c and remove NFSv3-specific logic.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 162 ++++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 152 insertions(+), 10 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 73c62561580a1..489cdcd8f8c9a 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -37,6 +37,8 @@
+ #include <linux/falloc.h>
+ #include <linux/slab.h>
+ #include <linux/kthread.h>
++#include <linux/namei.h>
++
+ #include <linux/sunrpc/addr.h>
+ #include <linux/nfs_ssc.h>
+
+@@ -235,6 +237,154 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate
+ &resfh->fh_handle);
+ }
+
++static inline bool nfsd4_create_is_exclusive(int createmode)
++{
++ return createmode == NFS4_CREATE_EXCLUSIVE ||
++ createmode == NFS4_CREATE_EXCLUSIVE4_1;
++}
++
++/*
++ * Implement NFSv4's unchecked, guarded, and exclusive create
++ * semantics for regular files. Open state for this new file is
++ * subsequently fabricated in nfsd4_process_open2().
++ *
++ * Upon return, caller must release @fhp and @resfhp.
++ */
++static __be32
++nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ struct svc_fh *resfhp, struct nfsd4_open *open)
++{
++ struct iattr *iap = &open->op_iattr;
++ struct dentry *parent, *child;
++ __u32 v_mtime, v_atime;
++ struct inode *inode;
++ __be32 status;
++ int host_err;
++
++ if (isdotent(open->op_fname, open->op_fnamelen))
++ return nfserr_exist;
++ if (!(iap->ia_valid & ATTR_MODE))
++ iap->ia_mode = 0;
++
++ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
++ if (status != nfs_ok)
++ return status;
++ parent = fhp->fh_dentry;
++ inode = d_inode(parent);
++
++ host_err = fh_want_write(fhp);
++ if (host_err)
++ return nfserrno(host_err);
++
++ fh_lock_nested(fhp, I_MUTEX_PARENT);
++
++ child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
++ if (IS_ERR(child)) {
++ status = nfserrno(PTR_ERR(child));
++ goto out;
++ }
++
++ if (d_really_is_negative(child)) {
++ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
++ if (status != nfs_ok)
++ goto out;
++ }
++
++ status = fh_compose(resfhp, fhp->fh_export, child, fhp);
++ if (status != nfs_ok)
++ goto out;
++
++ v_mtime = 0;
++ v_atime = 0;
++ if (nfsd4_create_is_exclusive(open->op_createmode)) {
++ u32 *verifier = (u32 *)open->op_verf.data;
++
++ /*
++ * Solaris 7 gets confused (bugid 4218508) if these have
++ * the high bit set, as do xfs filesystems without the
++ * "bigtime" feature. So just clear the high bits. If this
++ * is ever changed to use different attrs for storing the
++ * verifier, then do_open_lookup() will also need to be
++ * fixed accordingly.
++ */
++ v_mtime = verifier[0] & 0x7fffffff;
++ v_atime = verifier[1] & 0x7fffffff;
++ }
++
++ if (d_really_is_positive(child)) {
++ status = nfs_ok;
++
++ switch (open->op_createmode) {
++ case NFS4_CREATE_UNCHECKED:
++ if (!d_is_reg(child))
++ break;
++
++ /*
++ * In NFSv4, we don't want to truncate the file
++ * now. This would be wrong if the OPEN fails for
++ * some other reason. Furthermore, if the size is
++ * nonzero, we should ignore it according to spec!
++ */
++ open->op_truncate = (iap->ia_valid & ATTR_SIZE) &&
++ !iap->ia_size;
++ break;
++ case NFS4_CREATE_GUARDED:
++ status = nfserr_exist;
++ break;
++ case NFS4_CREATE_EXCLUSIVE:
++ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
++ d_inode(child)->i_atime.tv_sec == v_atime &&
++ d_inode(child)->i_size == 0) {
++ open->op_created = true;
++ break; /* subtle */
++ }
++ status = nfserr_exist;
++ break;
++ case NFS4_CREATE_EXCLUSIVE4_1:
++ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
++ d_inode(child)->i_atime.tv_sec == v_atime &&
++ d_inode(child)->i_size == 0) {
++ open->op_created = true;
++ goto set_attr; /* subtle */
++ }
++ status = nfserr_exist;
++ }
++ goto out;
++ }
++
++ if (!IS_POSIXACL(inode))
++ iap->ia_mode &= ~current_umask();
++
++ host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
++ if (host_err < 0) {
++ status = nfserrno(host_err);
++ goto out;
++ }
++ open->op_created = true;
++
++ /* A newly created file already has a file size of zero. */
++ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
++ iap->ia_valid &= ~ATTR_SIZE;
++ if (nfsd4_create_is_exclusive(open->op_createmode)) {
++ iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
++ ATTR_MTIME_SET|ATTR_ATIME_SET;
++ iap->ia_mtime.tv_sec = v_mtime;
++ iap->ia_atime.tv_sec = v_atime;
++ iap->ia_mtime.tv_nsec = 0;
++ iap->ia_atime.tv_nsec = 0;
++ }
++
++set_attr:
++ status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
++
++out:
++ fh_unlock(fhp);
++ if (child && !IS_ERR(child))
++ dput(child);
++ fh_drop_write(fhp);
++ return status;
++}
++
+ static __be32
+ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
+ {
+@@ -264,16 +414,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
+ * yes | yes | GUARDED4 | GUARDED4
+ */
+
+- /*
+- * Note: create modes (UNCHECKED,GUARDED...) are the same
+- * in NFSv4 as in v3 except EXCLUSIVE4_1.
+- */
+ current->fs->umask = open->op_umask;
+- status = do_nfsd_create(rqstp, current_fh, open->op_fname,
+- open->op_fnamelen, &open->op_iattr,
+- *resfh, open->op_createmode,
+- (u32 *)open->op_verf.data,
+- &open->op_truncate, &open->op_created);
++ status = nfsd4_create_file(rqstp, current_fh, *resfh, open);
+ current->fs->umask = 0;
+
+ if (!status && open->op_label.len)
+@@ -284,7 +426,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
+ * use the returned bitmask to indicate which attributes
+ * we used to store the verifier:
+ */
+- if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
++ if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0)
+ open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_MODIFY);
+ } else
+--
+2.43.0
+
--- /dev/null
+From 33bca88a8733a8120869136d5959b7e4263a9643 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 19:44:45 -0800
+Subject: NFSD: refactoring courtesy_client_reaper to a generic low memory
+ shrinker
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit a1049eb47f20b9eabf9afb218578fff16b4baca6 ]
+
+Refactoring courtesy_client_reaper to generic low memory
+shrinker so it can be used for other purposes.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 524865c7211ef..34ae4a3d86f3e 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4361,7 +4361,7 @@ nfsd4_init_slabs(void)
+ }
+
+ static unsigned long
+-nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
++nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+ {
+ int cnt;
+ struct nfsd_net *nn = container_of(shrink,
+@@ -4374,7 +4374,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
+ }
+
+ static unsigned long
+-nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
++nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+ {
+ return SHRINK_STOP;
+ }
+@@ -4401,8 +4401,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
+ nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
+
+ atomic_set(&nn->nfsd_courtesy_clients, 0);
+- nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
+- nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
++ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
++ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+ return register_shrinker(&nn->nfsd_client_shrinker);
+ }
+@@ -6151,17 +6151,24 @@ laundromat_main(struct work_struct *laundry)
+ }
+
+ static void
+-courtesy_client_reaper(struct work_struct *reaper)
++courtesy_client_reaper(struct nfsd_net *nn)
+ {
+ struct list_head reaplist;
+- struct delayed_work *dwork = to_delayed_work(reaper);
+- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+- nfsd_shrinker_work);
+
+ nfs4_get_courtesy_client_reaplist(nn, &reaplist);
+ nfs4_process_client_reaplist(&reaplist);
+ }
+
++static void
++nfsd4_state_shrinker_worker(struct work_struct *work)
++{
++ struct delayed_work *dwork = to_delayed_work(work);
++ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
++ nfsd_shrinker_work);
++
++ courtesy_client_reaper(nn);
++}
++
+ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
+ {
+ if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
+@@ -7983,7 +7990,7 @@ static int nfs4_state_create_net(struct net *net)
+ INIT_LIST_HEAD(&nn->blocked_locks_lru);
+
+ INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
++ INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ get_net(net);
+
+ return 0;
+--
+2.43.0
+
--- /dev/null
+From 106abeb91bc5342eca22efa8c02308cc3de0ff9d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 16:54:51 -0700
+Subject: NFSD: refactoring v4 specific code to a helper in nfs4state.c
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 6867137ebcf4155fe25f2ecf7c29b9fb90a76d1d ]
+
+This patch moves the v4 specific code from nfsd_init_net() to
+nfsd4_init_leases_net() helper in nfs4state.c
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 12 ++++++++++++
+ fs/nfsd/nfsctl.c | 9 +--------
+ fs/nfsd/nfsd.h | 4 ++++
+ 3 files changed, 17 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 9d344164f814f..a75f3f7c94d50 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4347,6 +4347,18 @@ nfsd4_init_slabs(void)
+ return -ENOMEM;
+ }
+
++void nfsd4_init_leases_net(struct nfsd_net *nn)
++{
++ nn->nfsd4_lease = 90; /* default lease time */
++ nn->nfsd4_grace = 90;
++ nn->somebody_reclaimed = false;
++ nn->track_reclaim_completes = false;
++ nn->clverifier_counter = prandom_u32();
++ nn->clientid_base = prandom_u32();
++ nn->clientid_counter = nn->clientid_base + 1;
++ nn->s2s_cp_cl_id = nn->clientid_counter++;
++}
++
+ static void init_nfs4_replay(struct nfs4_replay *rp)
+ {
+ rp->rp_status = nfserr_serverfault;
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 7002edbf26870..164c822ae3ae9 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1484,14 +1484,7 @@ static __net_init int nfsd_init_net(struct net *net)
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_drc_error;
+- nn->nfsd4_lease = 90; /* default lease time */
+- nn->nfsd4_grace = 90;
+- nn->somebody_reclaimed = false;
+- nn->track_reclaim_completes = false;
+- nn->clverifier_counter = prandom_u32();
+- nn->clientid_base = prandom_u32();
+- nn->clientid_counter = nn->clientid_base + 1;
+- nn->s2s_cp_cl_id = nn->clientid_counter++;
++ nfsd4_init_leases_net(nn);
+
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 9a8b09afc1733..ef8087691138a 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -496,12 +496,16 @@ extern void unregister_cld_notifier(void);
+ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
+ #endif
+
++extern void nfsd4_init_leases_net(struct nfsd_net *nn);
++
+ #else /* CONFIG_NFSD_V4 */
+ static inline int nfsd4_is_junction(struct dentry *dentry)
+ {
+ return 0;
+ }
+
++static inline void nfsd4_init_leases_net(struct nfsd_net *nn) {};
++
+ #define register_cld_notifier() 0
+ #define unregister_cld_notifier() do { } while(0)
+
+--
+2.43.0
+
--- /dev/null
+From 73cf1e7da1fc17ff11947097707bb87954164524 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Jan 2023 12:17:09 -0800
+Subject: NFSD: register/unregister of nfsd-client shrinker at nfsd
+ startup/shutdown time
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit f385f7d244134246f984975ed34cd75f77de479f ]
+
+Currently the nfsd-client shrinker is registered and unregistered at
+the time the nfsd module is loaded and unloaded. The problem with this
+is the shrinker is being registered before all of the relevant fields
+in nfsd_net are initialized when nfsd is started. This can lead to an
+oops when memory is low and the shrinker is called while nfsd is not
+running.
+
+This patch moves the register/unregister of nfsd-client shrinker from
+module load/unload time to nfsd startup/shutdown time.
+
+Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
+Reported-by: Mike Galbraith <efault@gmx.de>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 19 +++++++++++--------
+ fs/nfsd/nfsctl.c | 7 +------
+ fs/nfsd/nfsd.h | 6 ++----
+ 3 files changed, 14 insertions(+), 18 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 485e7055e52ec..ca0a1816500c3 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4421,7 +4421,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+ return SHRINK_STOP;
+ }
+
+-int
++void
+ nfsd4_init_leases_net(struct nfsd_net *nn)
+ {
+ struct sysinfo si;
+@@ -4446,13 +4446,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
+ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
+- return register_shrinker(&nn->nfsd_client_shrinker);
+-}
+-
+-void
+-nfsd4_leases_net_shutdown(struct nfsd_net *nn)
+-{
+- unregister_shrinker(&nn->nfsd_client_shrinker);
+ }
+
+ static void init_nfs4_replay(struct nfs4_replay *rp)
+@@ -8067,8 +8060,17 @@ static int nfs4_state_create_net(struct net *net)
+ INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ get_net(net);
+
++ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
++ nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count;
++ nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
++
++ if (register_shrinker(&nn->nfsd_client_shrinker))
++ goto err_shrinker;
+ return 0;
+
++err_shrinker:
++ put_net(net);
++ kfree(nn->sessionid_hashtbl);
+ err_sessionid:
+ kfree(nn->unconf_id_hashtbl);
+ err_unconf_id:
+@@ -8161,6 +8163,7 @@ nfs4_state_shutdown_net(struct net *net)
+ struct list_head *pos, *next, reaplist;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
++ unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_delayed_work_sync(&nn->laundromat_work);
+ locks_end_grace(&nn->nfsd4_manager);
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index d1e581a60480c..c2577ee7ffb22 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1457,9 +1457,7 @@ static __net_init int nfsd_init_net(struct net *net)
+ goto out_idmap_error;
+ nn->nfsd_versions = NULL;
+ nn->nfsd4_minorversions = NULL;
+- retval = nfsd4_init_leases_net(nn);
+- if (retval)
+- goto out_drc_error;
++ nfsd4_init_leases_net(nn);
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+ goto out_cache_error;
+@@ -1469,8 +1467,6 @@ static __net_init int nfsd_init_net(struct net *net)
+ return 0;
+
+ out_cache_error:
+- nfsd4_leases_net_shutdown(nn);
+-out_drc_error:
+ nfsd_idmap_shutdown(net);
+ out_idmap_error:
+ nfsd_export_shutdown(net);
+@@ -1486,7 +1482,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
+ nfsd_idmap_shutdown(net);
+ nfsd_export_shutdown(net);
+ nfsd_netns_free_versions(net_generic(net, nfsd_net_id));
+- nfsd4_leases_net_shutdown(nn);
+ }
+
+ static struct pernet_operations nfsd_net_ops = {
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 93b42ef9ed91b..fa0144a742678 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void);
+ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
+ #endif
+
+-extern int nfsd4_init_leases_net(struct nfsd_net *nn);
+-extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn);
++extern void nfsd4_init_leases_net(struct nfsd_net *nn);
+
+ #else /* CONFIG_NFSD_V4 */
+ static inline int nfsd4_is_junction(struct dentry *dentry)
+@@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry)
+ return 0;
+ }
+
+-static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; };
+-static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {};
++static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
+
+ #define register_cld_notifier() 0
+ #define unregister_cld_notifier() do { } while(0)
+--
+2.43.0
+
--- /dev/null
+From 3c3f81f26940ef95d4259575c0d2a1e6033ac8ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Sep 2021 19:10:03 -0400
+Subject: NFSD: Remove be32_to_cpu() from DRC hash function
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 7578b2f628db27281d3165af0aa862311883a858 ]
+
+Commit 7142b98d9fd7 ("nfsd: Clean up drc cache in preparation for
+global spinlock elimination"), billed as a clean-up, added
+be32_to_cpu() to the DRC hash function without explanation. That
+commit removed two comments that state that byte-swapping in the
+hash function is unnecessary without explaining whether there was
+a need for that change.
+
+On some Intel CPUs, the swab32 instruction is known to cause a CPU
+pipeline stall. be32_to_cpu() does not add extra randomness, since
+the hash multiplication is done /before/ shifting to the high-order
+bits of the result.
+
+As a micro-optimization, remove the unnecessary transform from the
+DRC hash function.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfscache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index 6b9ef15c9c03b..a838909502907 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -87,7 +87,7 @@ nfsd_hashsize(unsigned int limit)
+ static u32
+ nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
+ {
+- return hash_32(be32_to_cpu(xid), nn->maskbits);
++ return hash_32((__force u32)xid, nn->maskbits);
+ }
+
+ static struct svc_cacherep *
+--
+2.43.0
+
--- /dev/null
+From 5a2ee73331c0e4b3d8fbd1fb24aff86e2eb4bebb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Feb 2022 12:25:47 -0500
+Subject: NFSD: Remove CONFIG_NFSD_V3
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5f9a62ff7d2808c7b56c0ec90f3b7eae5872afe6 ]
+
+Eventually support for NFSv2 in the Linux NFS server is to be
+deprecated and then removed.
+
+However, NFSv2 is the "always supported" version that is available
+as soon as CONFIG_NFSD is set. Before NFSv2 support can be removed,
+we need to choose a different "always supported" version.
+
+This patch removes CONFIG_NFSD_V3 so that NFSv3 is always supported,
+as NFSv2 is today. When NFSv2 support is removed, NFSv3 will become
+the only "always supported" NFS version.
+
+The defconfigs still need to be updated to remove CONFIG_NFSD_V3=y.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/Kconfig | 2 +-
+ fs/nfsd/Kconfig | 12 +-----------
+ fs/nfsd/Makefile | 3 +--
+ fs/nfsd/nfsfh.c | 4 ----
+ fs/nfsd/nfsfh.h | 20 --------------------
+ fs/nfsd/nfssvc.c | 2 --
+ fs/nfsd/vfs.c | 9 ---------
+ fs/nfsd/vfs.h | 2 --
+ 8 files changed, 3 insertions(+), 51 deletions(-)
+
+diff --git a/fs/Kconfig b/fs/Kconfig
+index 971339ecc1a2b..9ea9614107a48 100644
+--- a/fs/Kconfig
++++ b/fs/Kconfig
+@@ -344,7 +344,7 @@ config LOCKD
+
+ config LOCKD_V4
+ bool
+- depends on NFSD_V3 || NFS_V3
++ depends on NFSD || NFS_V3
+ depends on FILE_LOCKING
+ default y
+
+diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
+index 6e9ea4ee0f737..b83a6e3bf8080 100644
+--- a/fs/nfsd/Kconfig
++++ b/fs/nfsd/Kconfig
+@@ -35,18 +35,9 @@ config NFSD_V2_ACL
+ bool
+ depends on NFSD
+
+-config NFSD_V3
+- bool "NFS server support for NFS version 3"
+- depends on NFSD
+- help
+- This option enables support in your system's NFS server for
+- version 3 of the NFS protocol (RFC 1813).
+-
+- If unsure, say Y.
+-
+ config NFSD_V3_ACL
+ bool "NFS server support for the NFSv3 ACL protocol extension"
+- depends on NFSD_V3
++ depends on NFSD
+ select NFSD_V2_ACL
+ help
+ Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
+@@ -70,7 +61,6 @@ config NFSD_V3_ACL
+ config NFSD_V4
+ bool "NFS server support for NFS version 4"
+ depends on NFSD && PROC_FS
+- select NFSD_V3
+ select FS_POSIX_ACL
+ select SUNRPC_GSS
+ select CRYPTO
+diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
+index 3f0983e93a998..805c06d5f1b4b 100644
+--- a/fs/nfsd/Makefile
++++ b/fs/nfsd/Makefile
+@@ -12,9 +12,8 @@ nfsd-y += trace.o
+
+ nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+ export.o auth.o lockd.o nfscache.o nfsxdr.o \
+- stats.o filecache.o
++ stats.o filecache.o nfs3proc.o nfs3xdr.o
+ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
+-nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
+ nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
+ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ nfs4acl.o nfs4callback.o nfs4recover.o
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index 145208bcb9bd4..c29baa03dfafd 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -611,8 +611,6 @@ fh_update(struct svc_fh *fhp)
+ return nfserr_serverfault;
+ }
+
+-#ifdef CONFIG_NFSD_V3
+-
+ /**
+ * fh_fill_pre_attrs - Fill in pre-op attributes
+ * @fhp: file handle to be updated
+@@ -673,8 +671,6 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
+ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+ }
+
+-#endif /* CONFIG_NFSD_V3 */
+-
+ /*
+ * Release a file handle.
+ */
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index 434930d8a946e..fb9d358a267e5 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -90,7 +90,6 @@ typedef struct svc_fh {
+ * operation
+ */
+ int fh_flags; /* FH flags */
+-#ifdef CONFIG_NFSD_V3
+ bool fh_post_saved; /* post-op attrs saved */
+ bool fh_pre_saved; /* pre-op attrs saved */
+
+@@ -107,7 +106,6 @@ typedef struct svc_fh {
+ /* Post-op attributes saved in fh_unlock */
+ struct kstat fh_post_attr; /* full attrs after operation */
+ u64 fh_post_change; /* nfsv4 change; see above */
+-#endif /* CONFIG_NFSD_V3 */
+ } svc_fh;
+ #define NFSD4_FH_FOREIGN (1<<0)
+ #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
+@@ -283,8 +281,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
+ }
+ #endif
+
+-#ifdef CONFIG_NFSD_V3
+-
+ /**
+ * fh_clear_pre_post_attrs - Reset pre/post attributes
+ * @fhp: file handle to be updated
+@@ -327,22 +323,6 @@ static inline u64 nfsd4_change_attribute(struct kstat *stat,
+ extern void fh_fill_pre_attrs(struct svc_fh *fhp);
+ extern void fh_fill_post_attrs(struct svc_fh *fhp);
+
+-#else /* !CONFIG_NFSD_V3 */
+-
+-static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
+-{
+-}
+-
+-static inline void fh_fill_pre_attrs(struct svc_fh *fhp)
+-{
+-}
+-
+-static inline void fh_fill_post_attrs(struct svc_fh *fhp)
+-{
+-}
+-
+-#endif /* !CONFIG_NFSD_V3 */
+-
+
+ /*
+ * Lock a file handle/inode
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 2f74be98ff2d9..011c556caa1e7 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -117,9 +117,7 @@ static struct svc_stat nfsd_acl_svcstats = {
+
+ static const struct svc_version *nfsd_version[] = {
+ [2] = &nfsd_version2,
+-#if defined(CONFIG_NFSD_V3)
+ [3] = &nfsd_version3,
+-#endif
+ #if defined(CONFIG_NFSD_V4)
+ [4] = &nfsd_version4,
+ #endif
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 4d07a506164b0..00e956bdefaae 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -32,9 +32,7 @@
+ #include <linux/writeback.h>
+ #include <linux/security.h>
+
+-#ifdef CONFIG_NFSD_V3
+ #include "xdr3.h"
+-#endif /* CONFIG_NFSD_V3 */
+
+ #ifdef CONFIG_NFSD_V4
+ #include "../internal.h"
+@@ -616,7 +614,6 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ }
+ #endif /* defined(CONFIG_NFSD_V4) */
+
+-#ifdef CONFIG_NFSD_V3
+ /*
+ * Check server access rights to a file system object
+ */
+@@ -728,7 +725,6 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
+ out:
+ return error;
+ }
+-#endif /* CONFIG_NFSD_V3 */
+
+ int nfsd_open_break_lease(struct inode *inode, int access)
+ {
+@@ -1121,7 +1117,6 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ return err;
+ }
+
+-#ifdef CONFIG_NFSD_V3
+ /**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+@@ -1199,7 +1194,6 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ out:
+ return err;
+ }
+-#endif /* CONFIG_NFSD_V3 */
+
+ static __be32
+ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
+@@ -1389,8 +1383,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ rdev, resfhp);
+ }
+
+-#ifdef CONFIG_NFSD_V3
+-
+ /*
+ * NFSv3 and NFSv4 version of nfsd_create
+ */
+@@ -1556,7 +1548,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ err = nfserrno(host_err);
+ goto out;
+ }
+-#endif /* CONFIG_NFSD_V3 */
+
+ /*
+ * Read a symlink. On entry, *lenp must contain the maximum path length that
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 2c43d10e3cab4..ccb87b2864f64 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -68,7 +68,6 @@ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
+ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
+-#ifdef CONFIG_NFSD_V3
+ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+@@ -76,7 +75,6 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
+ u32 *verifier, bool *truncp, bool *created);
+ __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
+-#endif /* CONFIG_NFSD_V3 */
+ #ifdef CONFIG_NFSD_V4
+ __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *name, void **bufp, int *lenp);
+--
+2.43.0
+
--- /dev/null
+From fcd4e3c5a6f4b315cc4bc05083031e5aba7a11ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Mar 2022 15:36:58 -0400
+Subject: NFSD: Remove do_nfsd_create()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 1c388f27759c5d9271d4fca081f7ee138986eb7d ]
+
+Now that its two callers have their own version-specific instance of
+this function, do_nfsd_create() is no longer used.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 150 --------------------------------------------------
+ fs/nfsd/vfs.h | 10 ----
+ 2 files changed, 160 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index e4f100a43ce52..9dd14c0eaebd1 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1395,156 +1395,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ rdev, resfhp);
+ }
+
+-/*
+- * NFSv3 and NFSv4 version of nfsd_create
+- */
+-__be32
+-do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- char *fname, int flen, struct iattr *iap,
+- struct svc_fh *resfhp, int createmode, u32 *verifier,
+- bool *truncp, bool *created)
+-{
+- struct dentry *dentry, *dchild = NULL;
+- struct inode *dirp;
+- __be32 err;
+- int host_err;
+- __u32 v_mtime=0, v_atime=0;
+-
+- err = nfserr_perm;
+- if (!flen)
+- goto out;
+- err = nfserr_exist;
+- if (isdotent(fname, flen))
+- goto out;
+- if (!(iap->ia_valid & ATTR_MODE))
+- iap->ia_mode = 0;
+- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+- if (err)
+- goto out;
+-
+- dentry = fhp->fh_dentry;
+- dirp = d_inode(dentry);
+-
+- host_err = fh_want_write(fhp);
+- if (host_err)
+- goto out_nfserr;
+-
+- fh_lock_nested(fhp, I_MUTEX_PARENT);
+-
+- /*
+- * Compose the response file handle.
+- */
+- dchild = lookup_one_len(fname, dentry, flen);
+- host_err = PTR_ERR(dchild);
+- if (IS_ERR(dchild))
+- goto out_nfserr;
+-
+- /* If file doesn't exist, check for permissions to create one */
+- if (d_really_is_negative(dchild)) {
+- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+- if (err)
+- goto out;
+- }
+-
+- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+- if (err)
+- goto out;
+-
+- if (nfsd_create_is_exclusive(createmode)) {
+- /* solaris7 gets confused (bugid 4218508) if these have
+- * the high bit set, as do xfs filesystems without the
+- * "bigtime" feature. So just clear the high bits. If this is
+- * ever changed to use different attrs for storing the
+- * verifier, then do_open_lookup() will also need to be fixed
+- * accordingly.
+- */
+- v_mtime = verifier[0]&0x7fffffff;
+- v_atime = verifier[1]&0x7fffffff;
+- }
+-
+- if (d_really_is_positive(dchild)) {
+- err = 0;
+-
+- switch (createmode) {
+- case NFS3_CREATE_UNCHECKED:
+- if (! d_is_reg(dchild))
+- goto out;
+- else if (truncp) {
+- /* in nfsv4, we need to treat this case a little
+- * differently. we don't want to truncate the
+- * file now; this would be wrong if the OPEN
+- * fails for some other reason. furthermore,
+- * if the size is nonzero, we should ignore it
+- * according to spec!
+- */
+- *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
+- }
+- else {
+- iap->ia_valid &= ATTR_SIZE;
+- goto set_attr;
+- }
+- break;
+- case NFS3_CREATE_EXCLUSIVE:
+- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
+- && d_inode(dchild)->i_atime.tv_sec == v_atime
+- && d_inode(dchild)->i_size == 0 ) {
+- if (created)
+- *created = true;
+- break;
+- }
+- fallthrough;
+- case NFS4_CREATE_EXCLUSIVE4_1:
+- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
+- && d_inode(dchild)->i_atime.tv_sec == v_atime
+- && d_inode(dchild)->i_size == 0 ) {
+- if (created)
+- *created = true;
+- goto set_attr;
+- }
+- fallthrough;
+- case NFS3_CREATE_GUARDED:
+- err = nfserr_exist;
+- }
+- goto out;
+- }
+-
+- if (!IS_POSIXACL(dirp))
+- iap->ia_mode &= ~current_umask();
+-
+- host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
+- if (host_err < 0)
+- goto out_nfserr;
+- if (created)
+- *created = true;
+-
+- nfsd_check_ignore_resizing(iap);
+-
+- if (nfsd_create_is_exclusive(createmode)) {
+- /* Cram the verifier into atime/mtime */
+- iap->ia_valid = ATTR_MTIME|ATTR_ATIME
+- | ATTR_MTIME_SET|ATTR_ATIME_SET;
+- /* XXX someone who knows this better please fix it for nsec */
+- iap->ia_mtime.tv_sec = v_mtime;
+- iap->ia_atime.tv_sec = v_atime;
+- iap->ia_mtime.tv_nsec = 0;
+- iap->ia_atime.tv_nsec = 0;
+- }
+-
+- set_attr:
+- err = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
+-
+- out:
+- fh_unlock(fhp);
+- if (dchild && !IS_ERR(dchild))
+- dput(dchild);
+- fh_drop_write(fhp);
+- return err;
+-
+- out_nfserr:
+- err = nfserrno(host_err);
+- goto out;
+-}
+-
+ /*
+ * Read a symlink. On entry, *lenp must contain the maximum path length that
+ * fits into the buffer. On return, it contains the true length.
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index 1f32a83456b03..f99794b033a55 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -71,10 +71,6 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
+ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+ __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct iattr *iap);
+-__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
+- char *name, int len, struct iattr *attrs,
+- struct svc_fh *res, int createmode,
+- u32 *verifier, bool *truncp, bool *created);
+ __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ u64 offset, u32 count, __be32 *verf);
+ #ifdef CONFIG_NFSD_V4
+@@ -161,10 +157,4 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
+ AT_STATX_SYNC_AS_STAT));
+ }
+
+-static inline int nfsd_create_is_exclusive(int createmode)
+-{
+- return createmode == NFS3_CREATE_EXCLUSIVE
+- || createmode == NFS4_CREATE_EXCLUSIVE4_1;
+-}
+-
+ #endif /* LINUX_NFSD_VFS_H */
+--
+2.43.0
+
--- /dev/null
+From 039694e3a774b9c3f8185998c2d3a437ec1ad436 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Mar 2022 14:28:51 -0400
+Subject: NFSD: Remove dprintk call sites from tail of nfsd4_open()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit f67a16b147045815b6aaafeef8663e5faeb6d569 ]
+
+Clean up: These relics are not likely to benefit server
+administrators.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index f0cb92466da84..611aedeab406b 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -622,13 +622,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ break;
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+- dprintk("NFSD: unsupported OPEN claim type %d\n",
+- open->op_claim_type);
+ status = nfserr_notsupp;
+ goto out;
+ default:
+- dprintk("NFSD: Invalid OPEN claim type %d\n",
+- open->op_claim_type);
+ status = nfserr_inval;
+ goto out;
+ }
+--
+2.43.0
+
--- /dev/null
+From 837fc86be272e63ee7a01011f8879c643f4db682 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:23:25 -0400
+Subject: NFSD: Remove "inline" directives on op_rsize_bop helpers
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 6604148cf961b57fc735e4204f8996536da9253c ]
+
+These helpers are always invoked indirectly, so the compiler can't
+inline these anyway. While we're updating the synopses of these
+helpers, defensively convert their parameters to const pointers.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 121 ++++++++++++++++++++++++++++-----------------
+ fs/nfsd/xdr4.h | 3 +-
+ 2 files changed, 77 insertions(+), 47 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 7fdede420e65b..c8d299bc9e55a 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2765,28 +2765,33 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+
+ #define op_encode_channel_attrs_maxsz (6 + 1 + 1)
+
+-static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ /* ac_supported, ac_resp_access */
+ return (op_encode_hdr_size + 2)* sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_change_info_maxsz
+ + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
+@@ -2797,10 +2802,10 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
+ * the op prematurely if the estimate is too large. We may turn off splice
+ * reads unnecessarily.
+ */
+-static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+- u32 *bmap = op->u.getattr.ga_bmval;
++ const u32 *bmap = op->u.getattr.ga_bmval;
+ u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2];
+ u32 ret = 0;
+
+@@ -2835,24 +2840,28 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
+ return ret;
+ }
+
+-static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
+ }
+
+-static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
+ * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_stateid_maxsz
+ + op_encode_change_info_maxsz + 1
+@@ -2860,7 +2869,8 @@ static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+ + op_encode_delegation_maxsz) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ u32 maxcount = 0, rlen = 0;
+
+@@ -2870,7 +2880,8 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+ return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ u32 maxcount = svc_max_payload(rqstp);
+ u32 rlen = min(op->u.read.rd_length, maxcount);
+@@ -2884,7 +2895,8 @@ static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op
+ return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ u32 maxcount = 0, rlen = 0;
+
+@@ -2895,59 +2907,68 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
+ XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
+ }
+
+-static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_change_info_maxsz
+ + op_encode_change_info_maxsz) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size
+ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
+ * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
+ (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
+ sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
+ 1 + 1 + /* eir_flags, spr_how */\
+@@ -2961,14 +2982,16 @@ static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_o
+ 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
+ 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
+@@ -2977,7 +3000,8 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
+ op_encode_channel_attrs_maxsz) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size +
+ 1 /* wr_callback */ +
+@@ -2989,16 +3013,16 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+ 1 /* cr_synchronous */) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size +
+ 2 /* osr_count */ +
+ 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size +
+ 3 /* cnr_lease_time */ +
+@@ -3013,7 +3037,8 @@ static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+ }
+
+ #ifdef CONFIG_NFSD_PNFS
+-static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ u32 maxcount = 0, rlen = 0;
+
+@@ -3031,7 +3056,8 @@ static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4
+ * so we need to define an arbitrary upper bound here.
+ */
+ #define MAX_LAYOUT_SIZE 128
+-static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size +
+ 1 /* logr_return_on_close */ +
+@@ -3040,14 +3066,16 @@ static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op
+ MAX_LAYOUT_SIZE) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size +
+ 1 /* locr_newsize */ +
+ 2 /* ns_size */) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size +
+ 1 /* lrs_stateid */ +
+@@ -3056,13 +3084,14 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
+ #endif /* CONFIG_NFSD_PNFS */
+
+
+-static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
++static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + 3) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ u32 maxcount, rlen;
+
+@@ -3072,14 +3101,14 @@ static inline u32 nfsd4_getxattr_rsize(struct svc_rqst *rqstp,
+ return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_setxattr_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+ }
+-static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ u32 maxcount, rlen;
+
+@@ -3089,8 +3118,8 @@ static inline u32 nfsd4_listxattrs_rsize(struct svc_rqst *rqstp,
+ return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+ }
+
+-static inline u32 nfsd4_removexattr_rsize(struct svc_rqst *rqstp,
+- struct nfsd4_op *op)
++static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op)
+ {
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 466e2786fc976..7fcbc7a46c157 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -889,7 +889,8 @@ struct nfsd4_operation {
+ u32 op_flags;
+ char *op_name;
+ /* Try to get response size before operation */
+- u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
++ u32 (*op_rsize_bop)(const struct svc_rqst *rqstp,
++ const struct nfsd4_op *op);
+ void (*op_get_currentstateid)(struct nfsd4_compound_state *,
+ union nfsd4_op_u *);
+ void (*op_set_currentstateid)(struct nfsd4_compound_state *,
+--
+2.43.0
+
--- /dev/null
+From 38f98a651105745e5098a0815e9427c9ea4284f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:41:06 -0400
+Subject: NFSD: Remove kmalloc from nfsd4_do_async_copy()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit ad1e46c9b07b13659635ee5405f83ad0df143116 ]
+
+Instead of manufacturing a phony struct nfsd_file, pass the
+struct file returned by nfs42_ssc_open() directly to
+nfsd4_do_copy().
+
+[ cel: adjusted to apply to v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 28 ++++++++++++++--------------
+ 1 file changed, 14 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 24c7d5e6c8c33..f63c3c4c10ca7 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1755,29 +1755,31 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
+ nfs4_put_copy(copy);
+ }
+
++/**
++ * nfsd4_do_async_copy - kthread function for background server-side COPY
++ * @data: arguments for COPY operation
++ *
++ * Return values:
++ * %0: Copy operation is done.
++ */
+ static int nfsd4_do_async_copy(void *data)
+ {
+ struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
+ struct nfsd4_copy *cb_copy;
+
+ if (nfsd4_ssc_is_inter(copy)) {
+- copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL);
+- if (!copy->nf_src) {
+- copy->nfserr = nfserr_serverfault;
+- /* ss_mnt will be unmounted by the laundromat */
+- goto do_callback;
+- }
+- copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh,
+- ©->stateid);
+- if (IS_ERR(copy->nf_src->nf_file)) {
++ struct file *filp;
++
++ filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh,
++ ©->stateid);
++ if (IS_ERR(filp)) {
+ copy->nfserr = nfserr_offload_denied;
+ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
+- copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
++ copy->nfserr = nfsd4_do_copy(copy, filp,
+ copy->nf_dst->nf_file, false);
+- nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src->nf_file,
+- copy->nf_dst);
++ nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst);
+ } else {
+ copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, false);
+@@ -1799,8 +1801,6 @@ static int nfsd4_do_async_copy(void *data)
+ ©->fh, copy->cp_count, copy->nfserr);
+ nfsd4_run_cb(&cb_copy->cp_cb);
+ out:
+- if (nfsd4_ssc_is_inter(copy))
+- kfree(copy->nf_src);
+ cleanup_async_copy(copy);
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 22ee0a72dc525d69fb6632e73417bbbba34ac703 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:44 -0400
+Subject: NFSD: Remove lockdep assertion from unhash_and_release_locked()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit f53cef15dddec7203df702cdc62e554190385450 ]
+
+IIUC, holding the hash bucket lock is needed only in
+nfsd_file_unhash, and there is already a lockdep assertion there.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 2d013a88e3565..6a01de8677959 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -299,8 +299,6 @@ nfsd_file_unhash(struct nfsd_file *nf)
+ static bool
+ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+ {
+- lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+-
+ trace_nfsd_file_unhash_and_release_locked(nf);
+ if (!nfsd_file_unhash(nf))
+ return false;
+--
+2.43.0
+
--- /dev/null
+From 209fab054e0089e583582124f8d18bf9abe718b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Sep 2022 14:59:10 +0800
+Subject: nfsd: remove nfsd4_prepare_cb_recall() declaration
+
+From: Gaosheng Cui <cuigaosheng1@huawei.com>
+
+[ Upstream commit 18224dc58d960c65446971930d0487fc72d00598 ]
+
+nfsd4_prepare_cb_recall() has been removed since
+commit 0162ac2b978e ("nfsd: introduce nfsd4_callback_ops"),
+so remove it.
+
+Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/state.h | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index 5d28beb290fef..4155be65d8069 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -697,7 +697,6 @@ extern int nfsd4_create_callback_queue(void);
+ extern void nfsd4_destroy_callback_queue(void);
+ extern void nfsd4_shutdown_callback(struct nfs4_client *);
+ extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
+-extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
+ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
+ struct xdr_netobj princhash, struct nfsd_net *nn);
+ extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
+--
+2.43.0
+
--- /dev/null
+From 70e075134cc4140c43200ec194168cfc62719902 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:10 -0400
+Subject: NFSD: Remove nfsd_file::nf_hashval
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit f0743c2b25c65debd4f599a7c861428cd9de5906 ]
+
+The value in this field can always be computed from nf_inode, thus
+it is no longer used.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 6 ++----
+ fs/nfsd/filecache.h | 1 -
+ 2 files changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index dd59deec8b011..29b1f57692a60 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -167,8 +167,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+ }
+
+ static struct nfsd_file *
+-nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+- struct net *net)
++nfsd_file_alloc(struct inode *inode, unsigned int may, struct net *net)
+ {
+ struct nfsd_file *nf;
+
+@@ -182,7 +181,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+ nf->nf_net = net;
+ nf->nf_flags = 0;
+ nf->nf_inode = inode;
+- nf->nf_hashval = hashval;
+ refcount_set(&nf->nf_ref, 1);
+ nf->nf_may = may & NFSD_FILE_MAY_MASK;
+ nf->nf_mark = NULL;
+@@ -1005,7 +1003,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (nf)
+ goto wait_for_construction;
+
+- new = nfsd_file_alloc(inode, may_flags, hashval, net);
++ new = nfsd_file_alloc(inode, may_flags, net);
+ if (!new) {
+ status = nfserr_jukebox;
+ goto out_status;
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index c6ad5fe47f12f..82051e1b8420d 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -40,7 +40,6 @@ struct nfsd_file {
+ #define NFSD_FILE_REFERENCED (2)
+ unsigned long nf_flags;
+ struct inode *nf_inode;
+- unsigned int nf_hashval;
+ refcount_t nf_ref;
+ unsigned char nf_may;
+ struct nfsd_file_mark *nf_mark;
+--
+2.43.0
+
--- /dev/null
+From 0777fb32af79836f83640589e505e2f128a12ba7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Oct 2021 14:53:30 -0400
+Subject: NFSD: Remove NFSD_PROC_ARGS_* macros
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c1a3f2ce66c80cd9f2a4376fa35a5c8d05441c73 ]
+
+Clean up.
+
+The PROC_ARGS macros were added when I thought that NFSD tracepoints
+would be reporting endpoint information. However, tracepoints in the
+RPC server now report transport endpoint information, so in general
+there's no need for the upper layers to do that any more, and these
+macros can be retired.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/trace.h | 28 +++++++++-------------------
+ 1 file changed, 9 insertions(+), 19 deletions(-)
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index c55fd77d43605..7f3f40f6c0ff3 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -13,22 +13,6 @@
+ #include "export.h"
+ #include "nfsfh.h"
+
+-#define NFSD_TRACE_PROC_ARG_FIELDS \
+- __field(unsigned int, netns_ino) \
+- __field(u32, xid) \
+- __array(unsigned char, server, sizeof(struct sockaddr_in6)) \
+- __array(unsigned char, client, sizeof(struct sockaddr_in6))
+-
+-#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \
+- do { \
+- __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
+- __entry->xid = be32_to_cpu(rqstp->rq_xid); \
+- memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
+- rqstp->rq_xprt->xpt_locallen); \
+- memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
+- rqstp->rq_xprt->xpt_remotelen); \
+- } while (0);
+-
+ #define NFSD_TRACE_PROC_RES_FIELDS \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid) \
+@@ -53,16 +37,22 @@ DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
+ ),
+ TP_ARGS(rqstp),
+ TP_STRUCT__entry(
+- NFSD_TRACE_PROC_ARG_FIELDS
+-
++ __field(unsigned int, netns_ino)
++ __field(u32, xid)
+ __field(u32, vers)
+ __field(u32, proc)
++ __sockaddr(server, rqstp->rq_xprt->xpt_locallen)
++ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
+ ),
+ TP_fast_assign(
+- NFSD_TRACE_PROC_ARG_ASSIGNMENTS
++ const struct svc_xprt *xprt = rqstp->rq_xprt;
+
++ __entry->netns_ino = xprt->xpt_net->ns.inum;
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->vers = rqstp->rq_vers;
+ __entry->proc = rqstp->rq_proc;
++ __assign_sockaddr(server, &xprt->xpt_local, xprt->xpt_locallen);
++ __assign_sockaddr(client, &xprt->xpt_remote, xprt->xpt_remotelen);
+ ),
+ TP_printk("xid=0x%08x vers=%u proc=%u",
+ __entry->xid, __entry->vers, __entry->proc
+--
+2.43.0
+
--- /dev/null
+From 4412810b7aa53cc0fbab519e73639aacfdad91b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Oct 2022 21:24:23 +0100
+Subject: NFSD: Remove redundant assignment to variable host_err
+
+From: Colin Ian King <colin.i.king@gmail.com>
+
+[ Upstream commit 69eed23baf877bbb1f14d7f4df54f89807c9ee2a ]
+
+Variable host_err is assigned a value that is never read, it is being
+re-assigned a value in every different execution path in the following
+switch statement. The assignment is redundant and can be removed.
+
+Cleans up clang-scan warning:
+warning: Value stored to 'host_err' is never read [deadcode.DeadStores]
+
+Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 9215350ad095c..88a2ad962a055 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1306,7 +1306,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ iap->ia_mode &= ~current_umask();
+
+ err = 0;
+- host_err = 0;
+ switch (type) {
+ case S_IFREG:
+ host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
+--
+2.43.0
+
--- /dev/null
+From b6d587ea2b1c6324fb5a745d9b719523f5101fbf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jun 2022 22:25:25 +0100
+Subject: nfsd: remove redundant assignment to variable len
+
+From: Colin Ian King <colin.i.king@gmail.com>
+
+[ Upstream commit 842e00ac3aa3b4a4f7f750c8ab54f8578fc875d3 ]
+
+Variable len is being assigned a value zero and this is never
+read, it is being re-assigned later. The assignment is redundant
+and can be removed.
+
+Cleans up clang scan-build warning:
+fs/nfsd/nfsctl.c:636:2: warning: Value stored to 'len' is never read
+
+Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 0621c2faf2424..66c352bf61b1d 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -633,7 +633,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
+ }
+
+ /* Now write current state into reply buffer */
+- len = 0;
+ sep = "";
+ remaining = SIMPLE_TRANSACTION_LIMIT;
+ for (num=2 ; num <= 4 ; num++) {
+--
+2.43.0
+
--- /dev/null
+From 8b08517b0ef2ff3960a2c6b42c79b479cfc99063 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 31 Aug 2022 14:20:02 +0000
+Subject: NFSD: remove redundant variable status
+
+From: Jinpeng Cui <cui.jinpeng2@zte.com.cn>
+
+[ Upstream commit 4ab3442ca384a02abf8b1f2b3449a6c547851873 ]
+
+Return value directly from fh_verify() do_open_permission()
+exp_pseudoroot() instead of getting value from
+redundant variable status.
+
+Reported-by: Zeal Robot <zealci@zte.com.cn>
+Signed-off-by: Jinpeng Cui <cui.jinpeng2@zte.com.cn>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 16 ++++------------
+ 1 file changed, 4 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index b2bfe540c1cb0..69d3013fb1b26 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -141,7 +141,6 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
+ static __be32
+ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
+ {
+- __be32 status;
+
+ if (open->op_truncate &&
+ !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+@@ -156,9 +155,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
+ if (open->op_share_deny & NFS4_SHARE_DENY_READ)
+ accmode |= NFSD_MAY_WRITE;
+
+- status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+-
+- return status;
++ return fh_verify(rqstp, current_fh, S_IFREG, accmode);
+ }
+
+ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
+@@ -454,7 +451,6 @@ static __be32
+ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+ {
+ struct svc_fh *current_fh = &cstate->current_fh;
+- __be32 status;
+ int accmode = 0;
+
+ /* We don't know the target directory, and therefore can not
+@@ -479,9 +475,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str
+ if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH)
+ accmode = NFSD_MAY_OWNER_OVERRIDE;
+
+- status = do_open_permission(rqstp, current_fh, open, accmode);
+-
+- return status;
++ return do_open_permission(rqstp, current_fh, open, accmode);
+ }
+
+ static void
+@@ -668,11 +662,9 @@ static __be32
+ nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+ {
+- __be32 status;
+-
+ fh_put(&cstate->current_fh);
+- status = exp_pseudoroot(rqstp, &cstate->current_fh);
+- return status;
++
++ return exp_pseudoroot(rqstp, &cstate->current_fh);
+ }
+
+ static __be32
+--
+2.43.0
+
--- /dev/null
+From 49525db20bd915652e3ba17ba91ff09cae2ed7ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Feb 2022 12:31:09 -0500
+Subject: NFSD: Remove svc_serv_ops::svo_module
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit f49169c97fceb21ad6a0aaf671c50b0f520f15a5 ]
+
+struct svc_serv_ops is about to be removed.
+
+Neil Brown says:
+> I suspect svo_module can go as well - I don't think the thread is
+> ever the thing that primarily keeps a module active.
+
+A random sample of kthread_create() callers shows sunrpc is the only
+one that manages module reference count in this way.
+
+Suggested-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 4 +---
+ fs/nfs/callback.c | 7 ++-----
+ fs/nfs/nfs4state.c | 1 -
+ fs/nfsd/nfssvc.c | 3 ---
+ include/linux/sunrpc/svc.h | 5 -----
+ kernel/module.c | 2 +-
+ net/sunrpc/svc.c | 2 --
+ 7 files changed, 4 insertions(+), 20 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index c83ec4a375bc1..bfde31124f3af 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -184,8 +184,7 @@ lockd(void *vrqstp)
+ dprintk("lockd_down: service stopped\n");
+
+ svc_exit_thread(rqstp);
+-
+- module_put_and_kthread_exit(0);
++ return 0;
+ }
+
+ static int create_lockd_listener(struct svc_serv *serv, const char *name,
+@@ -352,7 +351,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
+
+ static const struct svc_serv_ops lockd_sv_ops = {
+ .svo_function = lockd,
+- .svo_module = THIS_MODULE,
+ };
+
+ static int lockd_get(void)
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index c98c68513590f..a494f9e7bd0a0 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -17,7 +17,6 @@
+ #include <linux/errno.h>
+ #include <linux/mutex.h>
+ #include <linux/freezer.h>
+-#include <linux/kthread.h>
+ #include <linux/sunrpc/svcauth_gss.h>
+ #include <linux/sunrpc/bc_xprt.h>
+
+@@ -92,8 +91,8 @@ nfs4_callback_svc(void *vrqstp)
+ continue;
+ svc_process(rqstp);
+ }
++
+ svc_exit_thread(rqstp);
+- module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+@@ -136,8 +135,8 @@ nfs41_callback_svc(void *vrqstp)
+ finish_wait(&serv->sv_cb_waitq, &wq);
+ }
+ }
++
+ svc_exit_thread(rqstp);
+- module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+@@ -234,12 +233,10 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+
+ static const struct svc_serv_ops nfs40_cb_sv_ops = {
+ .svo_function = nfs4_callback_svc,
+- .svo_module = THIS_MODULE,
+ };
+ #if defined(CONFIG_NFS_V4_1)
+ static const struct svc_serv_ops nfs41_cb_sv_ops = {
+ .svo_function = nfs41_callback_svc,
+- .svo_module = THIS_MODULE,
+ };
+
+ static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
+index d7868cc527805..61050ffac93ef 100644
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -2766,6 +2766,5 @@ static int nfs4_run_state_manager(void *ptr)
+ goto again;
+
+ nfs_put_client(clp);
+- module_put_and_kthread_exit(0);
+ return 0;
+ }
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 38895372ec393..d25d4c12a499a 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -614,7 +614,6 @@ static int nfsd_get_default_max_blksize(void)
+
+ static const struct svc_serv_ops nfsd_thread_sv_ops = {
+ .svo_function = nfsd,
+- .svo_module = THIS_MODULE,
+ };
+
+ void nfsd_shutdown_threads(struct net *net)
+@@ -1018,8 +1017,6 @@ nfsd(void *vrqstp)
+ msleep(20);
+ }
+
+- /* Release module */
+- module_put_and_kthread_exit(0);
+ return 0;
+ }
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index fd7ccba415f51..61768495354a0 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -57,11 +57,6 @@ struct svc_serv;
+ struct svc_serv_ops {
+ /* function for service threads to run */
+ int (*svo_function)(void *);
+-
+- /* optional module to count when adding threads.
+- * Thread function must call module_put_and_kthread_exit() to exit.
+- */
+- struct module *svo_module;
+ };
+
+ /*
+diff --git a/kernel/module.c b/kernel/module.c
+index f2b8314546f17..2226b591b52e0 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -335,7 +335,7 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
+
+ /*
+ * A thread that wants to hold a reference to a module only while it
+- * is running can call this to safely exit. nfsd and lockd use this.
++ * is running can call this to safely exit.
+ */
+ void __noreturn __module_put_and_kthread_exit(struct module *mod, long code)
+ {
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 6f45f3f45514c..239d10018216a 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -736,11 +736,9 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ if (IS_ERR(rqstp))
+ return PTR_ERR(rqstp);
+
+- __module_get(serv->sv_ops->svo_module);
+ task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
+ node, "%s", serv->sv_name);
+ if (IS_ERR(task)) {
+- module_put(serv->sv_ops->svo_module);
+ svc_exit_thread(rqstp);
+ return PTR_ERR(task);
+ }
+--
+2.43.0
+
--- /dev/null
+From ded34dee9c3456a0c80b0dd6acfc32b6e00fe18f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 14:44:47 -0400
+Subject: nfsd: remove the pages_flushed statistic from filecache
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 1f696e230ea5198e393368b319eb55651828d687 ]
+
+We're counting mapping->nrpages, but not all of those are necessarily
+dirty. We don't really have a simple way to count just the dirty pages,
+so just remove this stat since it's not accurate.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 7 +------
+ 1 file changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index b43d2d7ac5957..b95b1be5b2e43 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -57,7 +57,6 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
+-static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
+
+ struct nfsd_fcache_disposal {
+@@ -395,7 +394,6 @@ nfsd_file_flush(struct nfsd_file *nf)
+
+ if (!file || !(file->f_mode & FMODE_WRITE))
+ return;
+- this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages);
+ if (vfs_fsync(file, 1) != 0)
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ }
+@@ -1022,7 +1020,6 @@ nfsd_file_cache_shutdown(void)
+ per_cpu(nfsd_file_acquisitions, i) = 0;
+ per_cpu(nfsd_file_releases, i) = 0;
+ per_cpu(nfsd_file_total_age, i) = 0;
+- per_cpu(nfsd_file_pages_flushed, i) = 0;
+ per_cpu(nfsd_file_evictions, i) = 0;
+ }
+ }
+@@ -1237,7 +1234,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+- unsigned long releases = 0, pages_flushed = 0, evictions = 0;
++ unsigned long releases = 0, evictions = 0;
+ unsigned long hits = 0, acquisitions = 0;
+ unsigned int i, count = 0, buckets = 0;
+ unsigned long lru = 0, total_age = 0;
+@@ -1265,7 +1262,6 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ releases += per_cpu(nfsd_file_releases, i);
+ total_age += per_cpu(nfsd_file_total_age, i);
+ evictions += per_cpu(nfsd_file_evictions, i);
+- pages_flushed += per_cpu(nfsd_file_pages_flushed, i);
+ }
+
+ seq_printf(m, "total entries: %u\n", count);
+@@ -1279,6 +1275,5 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
+ else
+ seq_printf(m, "mean age (ms): -\n");
+- seq_printf(m, "pages flushed: %lu\n", pages_flushed);
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From f7edcb57f7a9e32d73fd51c71eb3e9e94bb72c91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:23:30 -0400
+Subject: NFSD: Remove unused nfsd4_compoundargs::cachetype field
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 77e378cf2a595d8e39cddf28a31efe6afd9394a0 ]
+
+This field was added by commit 1091006c5eb1 ("nfsd: turn on reply
+cache for NFSv4") but was never put to use.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/xdr4.h | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 7fcbc7a46c157..b2bc85421b507 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -724,7 +724,6 @@ struct nfsd4_compoundargs {
+ u32 opcnt;
+ struct nfsd4_op *ops;
+ struct nfsd4_op iops[8];
+- int cachetype;
+ };
+
+ struct nfsd4_compoundres {
+--
+2.43.0
+
--- /dev/null
+From d63f1bf94be759ffca0cf85069f6f5c054243762 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Dec 2021 10:22:05 -0500
+Subject: NFSD: Rename boot verifier functions
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3988a57885eeac05ef89f0ab4d7e47b52fbcf630 ]
+
+Clean up: These functions handle what the specs call a write
+verifier, which in the Linux NFS server implementation is now
+divorced from the server's boot instance
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 2 +-
+ fs/nfsd/netns.h | 4 ++--
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/nfssvc.c | 16 ++++++++--------
+ fs/nfsd/vfs.c | 16 ++++++++--------
+ 5 files changed, 20 insertions(+), 20 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index b99852b30308a..94157b82b60e1 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -237,7 +237,7 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
+ trace_nfsd_file_unhash(nf);
+
+ if (nfsd_file_check_write_error(nf))
+- nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
++ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
+ hlist_del_rcu(&nf->nf_node);
+ atomic_long_dec(&nfsd_filecache_count);
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index a6ed300259849..1b1a962a18041 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -198,6 +198,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn);
+
+ extern unsigned int nfsd_net_id;
+
+-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn);
+-void nfsd_reset_boot_verifier(struct nfsd_net *nn);
++void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
++void nfsd_reset_write_verifier(struct nfsd_net *nn);
+ #endif /* __NFSD_NETNS_H__ */
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 861af46ebc6cf..a8ad7e6ace927 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -598,7 +598,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
+
+ BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
+
+- nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id));
++ nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
+ }
+
+ static __be32
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 5a60664695352..2efe9d33a2827 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -346,14 +346,14 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn)
+ }
+
+ /**
+- * nfsd_copy_boot_verifier - Atomically copy a write verifier
++ * nfsd_copy_write_verifier - Atomically copy a write verifier
+ * @verf: buffer in which to receive the verifier cookie
+ * @nn: NFS net namespace
+ *
+ * This function provides a wait-free mechanism for copying the
+- * namespace's boot verifier without tearing it.
++ * namespace's write verifier without tearing it.
+ */
+-void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
++void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
+ {
+ int seq = 0;
+
+@@ -364,7 +364,7 @@ void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+ done_seqretry(&nn->writeverf_lock, seq);
+ }
+
+-static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
++static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn)
+ {
+ struct timespec64 now;
+ u64 verf;
+@@ -379,7 +379,7 @@ static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+ }
+
+ /**
+- * nfsd_reset_boot_verifier - Generate a new boot verifier
++ * nfsd_reset_write_verifier - Generate a new write verifier
+ * @nn: NFS net namespace
+ *
+ * This function updates the ->writeverf field of @nn. This field
+@@ -391,10 +391,10 @@ static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+ * server and MUST be unique between instances of the NFSv4.1
+ * server."
+ */
+-void nfsd_reset_boot_verifier(struct nfsd_net *nn)
++void nfsd_reset_write_verifier(struct nfsd_net *nn)
+ {
+ write_seqlock(&nn->writeverf_lock);
+- nfsd_reset_boot_verifier_locked(nn);
++ nfsd_reset_write_verifier_locked(nn);
+ write_sequnlock(&nn->writeverf_lock);
+ }
+
+@@ -683,7 +683,7 @@ int nfsd_create_serv(struct net *net)
+ register_inet6addr_notifier(&nfsd_inet6addr_notifier);
+ #endif
+ }
+- nfsd_reset_boot_verifier(nn);
++ nfsd_reset_write_verifier(nn);
+ return 0;
+ }
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 721cf315551ad..d7035e3d1a229 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -565,8 +565,8 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
+- nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
+- nfsd_net_id));
++ nfsd_reset_write_verifier(net_generic(nf_dst->nf_net,
++ nfsd_net_id));
+ ret = nfserrno(status);
+ }
+ }
+@@ -1025,10 +1025,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
+ since = READ_ONCE(file->f_wb_err);
+ if (verf)
+- nfsd_copy_boot_verifier(verf, nn);
++ nfsd_copy_write_verifier(verf, nn);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0) {
+- nfsd_reset_boot_verifier(nn);
++ nfsd_reset_write_verifier(nn);
+ goto out_nfserr;
+ }
+ *cnt = host_err;
+@@ -1041,7 +1041,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ if (stable && use_wgather) {
+ host_err = wait_for_concurrent_writes(file);
+ if (host_err < 0)
+- nfsd_reset_boot_verifier(nn);
++ nfsd_reset_write_verifier(nn);
+ }
+
+ out_nfserr:
+@@ -1173,7 +1173,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
+ switch (err2) {
+ case 0:
+- nfsd_copy_boot_verifier(verf, nn);
++ nfsd_copy_write_verifier(verf, nn);
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
+ err = nfserrno(err2);
+@@ -1182,11 +1182,11 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ err = nfserr_notsupp;
+ break;
+ default:
+- nfsd_reset_boot_verifier(nn);
++ nfsd_reset_write_verifier(nn);
+ err = nfserrno(err2);
+ }
+ } else
+- nfsd_copy_boot_verifier(verf, nn);
++ nfsd_copy_write_verifier(verf, nn);
+
+ nfsd_file_put(nf);
+ out:
+--
+2.43.0
+
--- /dev/null
+From 65caa21833c6cf71c80fe271bac8785c66aa038a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Sep 2022 13:10:35 -0400
+Subject: NFSD: Rename the fields in copy_stateid_t
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 781fde1a2ba2391f31142f46f964cf1148ca1791 ]
+
+Code maintenance: The name of the copy_stateid_t::sc_count field
+collides with the sc_count field in struct nfs4_stid, making the
+latter difficult to grep for when auditing stateid reference
+counting.
+
+No behavior change expected.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 6 +++---
+ fs/nfsd/nfs4state.c | 30 +++++++++++++++---------------
+ fs/nfsd/state.h | 6 +++---
+ 3 files changed, 21 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index c8d299bc9e55a..59f675f194ebb 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1820,7 +1820,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (!nfs4_init_copy_state(nn, copy))
+ goto out_err;
+ refcount_set(&async_copy->refcount, 1);
+- memcpy(©->cp_res.cb_stateid, ©->cp_stateid.stid,
++ memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid,
+ sizeof(copy->cp_res.cb_stateid));
+ dup_copy_fields(copy, async_copy);
+ async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
+@@ -1858,7 +1858,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
+
+ spin_lock(&clp->async_lock);
+ list_for_each_entry(copy, &clp->async_copies, copies) {
+- if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE))
++ if (memcmp(©->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE))
+ continue;
+ refcount_inc(©->refcount);
+ spin_unlock(&clp->async_lock);
+@@ -1912,7 +1912,7 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ cps = nfs4_alloc_init_cpntf_state(nn, stid);
+ if (!cps)
+ goto out;
+- memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
++ memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t));
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index cc258f2988c73..f427f95ab934e 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -985,19 +985,19 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
+ * Create a unique stateid_t to represent each COPY.
+ */
+ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
+- unsigned char sc_type)
++ unsigned char cs_type)
+ {
+ int new_id;
+
+- stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+- stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+- stid->sc_type = sc_type;
++ stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
++ stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
++ stid->cs_type = cs_type;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&nn->s2s_cp_lock);
+ new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
+- stid->stid.si_opaque.so_id = new_id;
+- stid->stid.si_generation = 1;
++ stid->cs_stid.si_opaque.so_id = new_id;
++ stid->cs_stid.si_generation = 1;
+ spin_unlock(&nn->s2s_cp_lock);
+ idr_preload_end();
+ if (new_id < 0)
+@@ -1019,7 +1019,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+ if (!cps)
+ return NULL;
+ cps->cpntf_time = ktime_get_boottime_seconds();
+- refcount_set(&cps->cp_stateid.sc_count, 1);
++ refcount_set(&cps->cp_stateid.cs_count, 1);
+ if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
+ goto out_free;
+ spin_lock(&nn->s2s_cp_lock);
+@@ -1035,11 +1035,11 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy)
+ {
+ struct nfsd_net *nn;
+
+- WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID);
++ WARN_ON_ONCE(copy->cp_stateid.cs_type != NFS4_COPY_STID);
+ nn = net_generic(copy->cp_clp->net, nfsd_net_id);
+ spin_lock(&nn->s2s_cp_lock);
+ idr_remove(&nn->s2s_cp_stateids,
+- copy->cp_stateid.stid.si_opaque.so_id);
++ copy->cp_stateid.cs_stid.si_opaque.so_id);
+ spin_unlock(&nn->s2s_cp_lock);
+ }
+
+@@ -6044,7 +6044,7 @@ nfs4_laundromat(struct nfsd_net *nn)
+ spin_lock(&nn->s2s_cp_lock);
+ idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
+ cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
+- if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
++ if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
+ state_expired(<, cps->cpntf_time))
+ _free_cpntf_state_locked(nn, cps);
+ }
+@@ -6374,12 +6374,12 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
+ static void
+ _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+ {
+- WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID);
+- if (!refcount_dec_and_test(&cps->cp_stateid.sc_count))
++ WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID);
++ if (!refcount_dec_and_test(&cps->cp_stateid.cs_count))
+ return;
+ list_del(&cps->cp_list);
+ idr_remove(&nn->s2s_cp_stateids,
+- cps->cp_stateid.stid.si_opaque.so_id);
++ cps->cp_stateid.cs_stid.si_opaque.so_id);
+ kfree(cps);
+ }
+ /*
+@@ -6401,12 +6401,12 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ if (cps_t) {
+ state = container_of(cps_t, struct nfs4_cpntf_state,
+ cp_stateid);
+- if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) {
++ if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) {
+ state = NULL;
+ goto unlock;
+ }
+ if (!clp)
+- refcount_inc(&state->cp_stateid.sc_count);
++ refcount_inc(&state->cp_stateid.cs_count);
+ else
+ _free_cpntf_state_locked(nn, state);
+ }
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index 4155be65d8069..b3477087a9fc3 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -57,11 +57,11 @@ typedef struct {
+ } stateid_t;
+
+ typedef struct {
+- stateid_t stid;
++ stateid_t cs_stid;
+ #define NFS4_COPY_STID 1
+ #define NFS4_COPYNOTIFY_STID 2
+- unsigned char sc_type;
+- refcount_t sc_count;
++ unsigned char cs_type;
++ refcount_t cs_count;
+ } copy_stateid_t;
+
+ struct nfsd4_callback {
+--
+2.43.0
+
--- /dev/null
+From a2b9897860326fdf6a7860f2cbfea3c1de81ceea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:28 -0400
+Subject: NFSD: Reorder the fields in struct nfsd4_op
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit d314309425ad5dc1b6facdb2d456580fb5fa5e3a ]
+
+Pack the fields to reduce the size of struct nfsd4_op, which is used
+an array in struct nfsd4_compoundargs.
+
+sizeof(struct nfsd4_op):
+Before: /* size: 672, cachelines: 11, members: 5 */
+After: /* size: 640, cachelines: 10, members: 5 */
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/xdr4.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index ed90843a55293..87b4270af59ef 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -606,8 +606,9 @@ struct nfsd4_copy_notify {
+
+ struct nfsd4_op {
+ u32 opnum;
+- const struct nfsd4_operation * opdesc;
+ __be32 status;
++ const struct nfsd4_operation *opdesc;
++ struct nfs4_replay *replay;
+ union nfsd4_op_u {
+ struct nfsd4_access access;
+ struct nfsd4_close close;
+@@ -671,7 +672,6 @@ struct nfsd4_op {
+ struct nfsd4_listxattrs listxattrs;
+ struct nfsd4_removexattr removexattr;
+ } u;
+- struct nfs4_replay * replay;
+ };
+
+ bool nfsd4_cache_this_op(struct nfsd4_op *);
+--
+2.43.0
+
--- /dev/null
+From 4cc99ade736851a3fd9c2466bff9b599bf97c96e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 14:44:48 -0400
+Subject: nfsd: reorganize filecache.c
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 8214118589881b2d390284410c5ff275e7a5e03c ]
+
+In a coming patch, we're going to rework how the filecache refcounting
+works. Move some code around in the function to reduce the churn in the
+later patches, and rename some of the functions with (hopefully) clearer
+names: nfsd_file_flush becomes nfsd_file_fsync, and
+nfsd_file_unhash_and_dispose is renamed to nfsd_file_unhash_and_queue.
+
+Also, the nfsd_file_put_final tracepoint is renamed to nfsd_file_free,
+to better match the name of the function from which it's called.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 111 ++++++++++++++++++++++----------------------
+ fs/nfsd/trace.h | 4 +-
+ 2 files changed, 58 insertions(+), 57 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index b95b1be5b2e43..fb7ada3f7410e 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -334,16 +334,59 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+ return nf;
+ }
+
++static void
++nfsd_file_fsync(struct nfsd_file *nf)
++{
++ struct file *file = nf->nf_file;
++
++ if (!file || !(file->f_mode & FMODE_WRITE))
++ return;
++ if (vfs_fsync(file, 1) != 0)
++ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
++}
++
++static int
++nfsd_file_check_write_error(struct nfsd_file *nf)
++{
++ struct file *file = nf->nf_file;
++
++ if (!file || !(file->f_mode & FMODE_WRITE))
++ return 0;
++ return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
++}
++
++static void
++nfsd_file_hash_remove(struct nfsd_file *nf)
++{
++ trace_nfsd_file_unhash(nf);
++
++ if (nfsd_file_check_write_error(nf))
++ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
++ rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
++ nfsd_file_rhash_params);
++}
++
++static bool
++nfsd_file_unhash(struct nfsd_file *nf)
++{
++ if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
++ nfsd_file_hash_remove(nf);
++ return true;
++ }
++ return false;
++}
++
+ static bool
+ nfsd_file_free(struct nfsd_file *nf)
+ {
+ s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
+ bool flush = false;
+
++ trace_nfsd_file_free(nf);
++
+ this_cpu_inc(nfsd_file_releases);
+ this_cpu_add(nfsd_file_total_age, age);
+
+- trace_nfsd_file_put_final(nf);
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+@@ -377,27 +420,6 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
+ mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+ }
+
+-static int
+-nfsd_file_check_write_error(struct nfsd_file *nf)
+-{
+- struct file *file = nf->nf_file;
+-
+- if (!file || !(file->f_mode & FMODE_WRITE))
+- return 0;
+- return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
+-}
+-
+-static void
+-nfsd_file_flush(struct nfsd_file *nf)
+-{
+- struct file *file = nf->nf_file;
+-
+- if (!file || !(file->f_mode & FMODE_WRITE))
+- return;
+- if (vfs_fsync(file, 1) != 0)
+- nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+-}
+-
+ static void nfsd_file_lru_add(struct nfsd_file *nf)
+ {
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+@@ -411,31 +433,18 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf)
+ trace_nfsd_file_lru_del(nf);
+ }
+
+-static void
+-nfsd_file_hash_remove(struct nfsd_file *nf)
+-{
+- trace_nfsd_file_unhash(nf);
+-
+- if (nfsd_file_check_write_error(nf))
+- nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+- rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
+- nfsd_file_rhash_params);
+-}
+-
+-static bool
+-nfsd_file_unhash(struct nfsd_file *nf)
++struct nfsd_file *
++nfsd_file_get(struct nfsd_file *nf)
+ {
+- if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+- nfsd_file_hash_remove(nf);
+- return true;
+- }
+- return false;
++ if (likely(refcount_inc_not_zero(&nf->nf_ref)))
++ return nf;
++ return NULL;
+ }
+
+ static void
+-nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
++nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose)
+ {
+- trace_nfsd_file_unhash_and_dispose(nf);
++ trace_nfsd_file_unhash_and_queue(nf);
+ if (nfsd_file_unhash(nf)) {
+ /* caller must call nfsd_file_dispose_list() later */
+ nfsd_file_lru_remove(nf);
+@@ -473,7 +482,7 @@ nfsd_file_put(struct nfsd_file *nf)
+ nfsd_file_unhash_and_put(nf);
+
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+- nfsd_file_flush(nf);
++ nfsd_file_fsync(nf);
+ nfsd_file_put_noref(nf);
+ } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
+ nfsd_file_put_noref(nf);
+@@ -482,14 +491,6 @@ nfsd_file_put(struct nfsd_file *nf)
+ nfsd_file_put_noref(nf);
+ }
+
+-struct nfsd_file *
+-nfsd_file_get(struct nfsd_file *nf)
+-{
+- if (likely(refcount_inc_not_zero(&nf->nf_ref)))
+- return nf;
+- return NULL;
+-}
+-
+ static void
+ nfsd_file_dispose_list(struct list_head *dispose)
+ {
+@@ -498,7 +499,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del_init(&nf->nf_lru);
+- nfsd_file_flush(nf);
++ nfsd_file_fsync(nf);
+ nfsd_file_put_noref(nf);
+ }
+ }
+@@ -512,7 +513,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del_init(&nf->nf_lru);
+- nfsd_file_flush(nf);
++ nfsd_file_fsync(nf);
+ if (!refcount_dec_and_test(&nf->nf_ref))
+ continue;
+ if (nfsd_file_free(nf))
+@@ -712,7 +713,7 @@ __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
+ nfsd_file_rhash_params);
+ if (!nf)
+ break;
+- nfsd_file_unhash_and_dispose(nf, dispose);
++ nfsd_file_unhash_and_queue(nf, dispose);
+ count++;
+ } while (1);
+ rcu_read_unlock();
+@@ -914,7 +915,7 @@ __nfsd_file_cache_purge(struct net *net)
+ nf = rhashtable_walk_next(&iter);
+ while (!IS_ERR_OR_NULL(nf)) {
+ if (!net || nf->nf_net == net)
+- nfsd_file_unhash_and_dispose(nf, &dispose);
++ nfsd_file_unhash_and_queue(nf, &dispose);
+ nf = rhashtable_walk_next(&iter);
+ }
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index d50d4d6e822df..2c72a666aa9c2 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -905,10 +905,10 @@ DEFINE_EVENT(nfsd_file_class, name, \
+ TP_PROTO(struct nfsd_file *nf), \
+ TP_ARGS(nf))
+
+-DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
++DEFINE_NFSD_FILE_EVENT(nfsd_file_free);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+-DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose);
++DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue);
+
+ TRACE_EVENT(nfsd_file_alloc,
+ TP_PROTO(
+--
+2.43.0
+
--- /dev/null
+From ed5117bf95451e57c5b4d0078cca48b0d547e667 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:41 -0400
+Subject: NFSD: Replace boolean fields in struct nfsd4_copy
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 1913cdf56cb5bfbc8170873728d13598cbecda23 ]
+
+Clean up: saves 8 bytes, and we can replace check_and_set_stop_copy()
+with an atomic bitop.
+
+[ cel: adjusted to apply to v5.15.y ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 49 +++++++++++++++++-----------------------------
+ fs/nfsd/nfs4xdr.c | 12 ++++++------
+ fs/nfsd/xdr4.h | 33 ++++++++++++++++++++++++++-----
+ 3 files changed, 52 insertions(+), 42 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 3e4b0fb44c7b7..4fd6611d29ce4 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1295,23 +1295,9 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
+ kfree(copy);
+ }
+
+-static bool
+-check_and_set_stop_copy(struct nfsd4_copy *copy)
+-{
+- bool value;
+-
+- spin_lock(©->cp_clp->async_lock);
+- value = copy->stopped;
+- if (!copy->stopped)
+- copy->stopped = true;
+- spin_unlock(©->cp_clp->async_lock);
+- return value;
+-}
+-
+ static void nfsd4_stop_copy(struct nfsd4_copy *copy)
+ {
+- /* only 1 thread should stop the copy */
+- if (!check_and_set_stop_copy(copy))
++ if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags))
+ kthread_stop(copy->copy_task);
+ nfs4_put_copy(copy);
+ }
+@@ -1669,8 +1655,9 @@ static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = {
+ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
+ {
+ copy->cp_res.wr_stable_how =
+- copy->committed ? NFS_FILE_SYNC : NFS_UNSTABLE;
+- copy->cp_synchronous = sync;
++ test_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags) ?
++ NFS_FILE_SYNC : NFS_UNSTABLE;
++ nfsd4_copy_set_sync(copy, sync);
+ gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net);
+ }
+
+@@ -1700,16 +1687,16 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
+ copy->cp_res.wr_bytes_written += bytes_copied;
+ src_pos += bytes_copied;
+ dst_pos += bytes_copied;
+- } while (bytes_total > 0 && !copy->cp_synchronous);
++ } while (bytes_total > 0 && nfsd4_copy_is_async(copy));
+ /* for a non-zero asynchronous copy do a commit of data */
+- if (!copy->cp_synchronous && copy->cp_res.wr_bytes_written > 0) {
++ if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) {
+ since = READ_ONCE(dst->f_wb_err);
+ end = copy->cp_dst_pos + copy->cp_res.wr_bytes_written - 1;
+ status = vfs_fsync_range(dst, copy->cp_dst_pos, end, 0);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
+- copy->committed = true;
++ set_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags);
+ }
+ return bytes_copied;
+ }
+@@ -1730,7 +1717,7 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
+ status = nfs_ok;
+ }
+
+- if (!copy->cp_intra) /* Inter server SSC */
++ if (nfsd4_ssc_is_inter(copy))
+ nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src,
+ copy->nf_dst);
+ else
+@@ -1744,13 +1731,13 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+ dst->cp_src_pos = src->cp_src_pos;
+ dst->cp_dst_pos = src->cp_dst_pos;
+ dst->cp_count = src->cp_count;
+- dst->cp_synchronous = src->cp_synchronous;
++ dst->cp_flags = src->cp_flags;
+ memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
+ memcpy(&dst->fh, &src->fh, sizeof(src->fh));
+ dst->cp_clp = src->cp_clp;
+ dst->nf_dst = nfsd_file_get(src->nf_dst);
+- dst->cp_intra = src->cp_intra;
+- if (src->cp_intra) /* for inter, file_src doesn't exist yet */
++ /* for inter, nf_src doesn't exist yet */
++ if (!nfsd4_ssc_is_inter(src))
+ dst->nf_src = nfsd_file_get(src->nf_src);
+
+ memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
+@@ -1764,7 +1751,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
+ {
+ nfs4_free_copy_state(copy);
+ nfsd_file_put(copy->nf_dst);
+- if (copy->cp_intra)
++ if (!nfsd4_ssc_is_inter(copy))
+ nfsd_file_put(copy->nf_src);
+ spin_lock(©->cp_clp->async_lock);
+ list_del(©->copies);
+@@ -1777,7 +1764,7 @@ static int nfsd4_do_async_copy(void *data)
+ struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
+ struct nfsd4_copy *cb_copy;
+
+- if (!copy->cp_intra) { /* Inter server SSC */
++ if (nfsd4_ssc_is_inter(copy)) {
+ copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL);
+ if (!copy->nf_src) {
+ copy->nfserr = nfserr_serverfault;
+@@ -1809,7 +1796,7 @@ static int nfsd4_do_async_copy(void *data)
+ ©->fh, copy->cp_count, copy->nfserr);
+ nfsd4_run_cb(&cb_copy->cp_cb);
+ out:
+- if (!copy->cp_intra)
++ if (nfsd4_ssc_is_inter(copy))
+ kfree(copy->nf_src);
+ cleanup_async_copy(copy);
+ return 0;
+@@ -1823,8 +1810,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ __be32 status;
+ struct nfsd4_copy *async_copy = NULL;
+
+- if (!copy->cp_intra) { /* Inter server SSC */
+- if (!inter_copy_offload_enable || copy->cp_synchronous) {
++ if (nfsd4_ssc_is_inter(copy)) {
++ if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) {
+ status = nfserr_notsupp;
+ goto out;
+ }
+@@ -1841,7 +1828,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ copy->cp_clp = cstate->clp;
+ memcpy(©->fh, &cstate->current_fh.fh_handle,
+ sizeof(struct knfsd_fh));
+- if (!copy->cp_synchronous) {
++ if (nfsd4_copy_is_async(copy)) {
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ status = nfserrno(-ENOMEM);
+@@ -2607,7 +2594,7 @@ check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+ return;
+ }
+ putfh = (struct nfsd4_putfh *)&saved_op->u;
+- if (!copy->cp_intra)
++ if (nfsd4_ssc_is_inter(copy))
+ putfh->no_verify = true;
+ }
+ }
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index dad744d44b0a2..5aafbd0f7ae30 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -1896,8 +1896,8 @@ static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
+ static __be32
+ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+ {
++ u32 consecutive, i, count, sync;
+ struct nl4_server *ns_dummy;
+- u32 consecutive, i, count;
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid);
+@@ -1915,17 +1915,17 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+ /* ca_consecutive: we always do consecutive copies */
+ if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0)
+ return nfserr_bad_xdr;
+- if (xdr_stream_decode_u32(argp->xdr, ©->cp_synchronous) < 0)
++ if (xdr_stream_decode_bool(argp->xdr, &sync) < 0)
+ return nfserr_bad_xdr;
++ nfsd4_copy_set_sync(copy, sync);
+
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src));
+ if (copy->cp_src == NULL)
+ return nfserr_jukebox;
+- copy->cp_intra = false;
+ if (count == 0) { /* intra-server copy */
+- copy->cp_intra = true;
++ __set_bit(NFSD4_COPY_F_INTRA, ©->cp_flags);
+ return nfs_ok;
+ }
+
+@@ -4709,13 +4709,13 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+ __be32 *p;
+
+ nfserr = nfsd42_encode_write_res(resp, ©->cp_res,
+- !!copy->cp_synchronous);
++ nfsd4_copy_is_sync(copy));
+ if (nfserr)
+ return nfserr;
+
+ p = xdr_reserve_space(resp->xdr, 4 + 4);
+ *p++ = xdr_one; /* cr_consecutive */
+- *p++ = cpu_to_be32(copy->cp_synchronous);
++ *p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero;
+ return 0;
+ }
+
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 87b4270af59ef..37d1b6d0486b3 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -541,10 +541,12 @@ struct nfsd4_copy {
+ u64 cp_dst_pos;
+ u64 cp_count;
+ struct nl4_server *cp_src;
+- bool cp_intra;
+
+- /* both */
+- u32 cp_synchronous;
++ unsigned long cp_flags;
++#define NFSD4_COPY_F_STOPPED (0)
++#define NFSD4_COPY_F_INTRA (1)
++#define NFSD4_COPY_F_SYNCHRONOUS (2)
++#define NFSD4_COPY_F_COMMITTED (3)
+
+ /* response */
+ struct nfsd42_write_res cp_res;
+@@ -564,14 +566,35 @@ struct nfsd4_copy {
+ struct list_head copies;
+ struct task_struct *copy_task;
+ refcount_t refcount;
+- bool stopped;
+
+ struct vfsmount *ss_mnt;
+ struct nfs_fh c_fh;
+ nfs4_stateid stateid;
+- bool committed;
+ };
+
++static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync)
++{
++ if (sync)
++ set_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags);
++ else
++ clear_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags);
++}
++
++static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy)
++{
++ return test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags);
++}
++
++static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy)
++{
++ return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags);
++}
++
++static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy)
++{
++ return !test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags);
++}
++
+ struct nfsd4_seek {
+ /* request */
+ stateid_t seek_stateid;
+--
+2.43.0
+
--- /dev/null
+From c33009c9079558cc73e2e555c99c1eabf9b31bb9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 Jan 2023 16:06:51 -0800
+Subject: NFSD: replace delayed_work with work_struct for nfsd_client_shrinker
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit 7c24fa225081f31bc6da6a355c1ba801889ab29a ]
+
+Since nfsd4_state_shrinker_count always calls mod_delayed_work with
+0 delay, we can replace delayed_work with work_struct to save some
+space and overhead.
+
+Also add the call to cancel_work after unregister the shrinker
+in nfs4_state_shutdown_net.
+
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 2 +-
+ fs/nfsd/nfs4state.c | 8 ++++----
+ 2 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 8c854ba3285bb..51a4b7885cae2 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -195,7 +195,7 @@ struct nfsd_net {
+
+ atomic_t nfsd_courtesy_clients;
+ struct shrinker nfsd_client_shrinker;
+- struct delayed_work nfsd_shrinker_work;
++ struct work_struct nfsd_shrinker_work;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index ca0a1816500c3..22799f5ce686e 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4411,7 +4411,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+ if (!count)
+ count = atomic_long_read(&num_delegations);
+ if (count)
+- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
++ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
+ return (unsigned long)count;
+ }
+
+@@ -6228,8 +6228,7 @@ deleg_reaper(struct nfsd_net *nn)
+ static void
+ nfsd4_state_shrinker_worker(struct work_struct *work)
+ {
+- struct delayed_work *dwork = to_delayed_work(work);
+- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
++ struct nfsd_net *nn = container_of(work, struct nfsd_net,
+ nfsd_shrinker_work);
+
+ courtesy_client_reaper(nn);
+@@ -8057,7 +8056,7 @@ static int nfs4_state_create_net(struct net *net)
+ INIT_LIST_HEAD(&nn->blocked_locks_lru);
+
+ INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
++ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ get_net(net);
+
+ nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
+@@ -8164,6 +8163,7 @@ nfs4_state_shutdown_net(struct net *net)
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ unregister_shrinker(&nn->nfsd_client_shrinker);
++ cancel_work(&nn->nfsd_shrinker_work);
+ cancel_delayed_work_sync(&nn->laundromat_work);
+ locks_end_grace(&nn->nfsd4_manager);
+
+--
+2.43.0
+
--- /dev/null
+From b7d1493e5f127b0f92b38fbf653fec8b58549f8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:13:42 -0400
+Subject: NFSD: Replace dprintk() call site in fh_verify()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 948755efc951de75c87d4fa916d9d36b58299295 ]
+
+Record permission errors in the trace log. Note that the new trace
+event is conditional, so it will only record non-zero return values
+from nfsd_permission().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsfh.c | 8 +-------
+ fs/nfsd/trace.h | 48 +++++++++++++++++++++++++++++++++++++++++++++---
+ 2 files changed, 46 insertions(+), 10 deletions(-)
+
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index a5b71526cee0f..d73434200df98 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -392,13 +392,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+ skip_pseudoflavor_check:
+ /* Finally, check access permissions. */
+ error = nfsd_permission(rqstp, exp, dentry, access);
+-
+- if (error) {
+- dprintk("fh_verify: %pd2 permission failure, "
+- "acc=%x, error=%d\n",
+- dentry,
+- access, ntohl(error));
+- }
++ trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
+ out:
+ if (error == nfserr_stale)
+ nfsd_stats_fh_stale_inc(exp);
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index c824ab30a758e..297bf9ddc5090 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -195,7 +195,7 @@ TRACE_EVENT(nfsd_fh_verify,
+ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
+ __field(u32, xid)
+ __field(u32, fh_hash)
+- __field(void *, inode)
++ __field(const void *, inode)
+ __field(unsigned long, type)
+ __field(unsigned long, access)
+ ),
+@@ -211,13 +211,55 @@ TRACE_EVENT(nfsd_fh_verify,
+ __entry->type = type;
+ __entry->access = access;
+ ),
+- TP_printk("xid=0x%08x fh_hash=0x%08x inode=%p type=%s access=%s",
+- __entry->xid, __entry->fh_hash, __entry->inode,
++ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s",
++ __entry->xid, __entry->fh_hash,
+ show_fs_file_type(__entry->type),
+ show_nfsd_may_flags(__entry->access)
+ )
+ );
+
++TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
++ TP_PROTO(
++ const struct svc_rqst *rqstp,
++ const struct svc_fh *fhp,
++ umode_t type,
++ int access,
++ __be32 error
++ ),
++ TP_ARGS(rqstp, fhp, type, access, error),
++ TP_CONDITION(error),
++ TP_STRUCT__entry(
++ __field(unsigned int, netns_ino)
++ __sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
++ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
++ __field(u32, xid)
++ __field(u32, fh_hash)
++ __field(const void *, inode)
++ __field(unsigned long, type)
++ __field(unsigned long, access)
++ __field(int, error)
++ ),
++ TP_fast_assign(
++ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
++ __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local,
++ rqstp->rq_xprt->xpt_locallen);
++ __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote,
++ rqstp->rq_xprt->xpt_remotelen);
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
++ __entry->inode = d_inode(fhp->fh_dentry);
++ __entry->type = type;
++ __entry->access = access;
++ __entry->error = be32_to_cpu(error);
++ ),
++ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s error=%d",
++ __entry->xid, __entry->fh_hash,
++ show_fs_file_type(__entry->type),
++ show_nfsd_may_flags(__entry->access),
++ __entry->error
++ )
++);
++
+ DECLARE_EVENT_CLASS(nfsd_fh_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *fhp,
+--
+2.43.0
+
--- /dev/null
+From 4cb539b1ae97d32eff5896d8f6363063d43c7015 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:16 -0400
+Subject: NFSD: Replace the "init once" mechanism
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c7b824c3d06c85e054caf86e227255112c5e3c38 ]
+
+In a moment, the nfsd_file_hashtbl global will be replaced with an
+rhashtable. Replace the one or two spots that need to check if the
+hash table is available. We can easily reuse the SHUTDOWN flag for
+this purpose.
+
+Document that this mechanism relies on callers to hold the
+nfsd_mutex to prevent init, shutdown, and purging to run
+concurrently.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 42 ++++++++++++++++++++++++++----------------
+ 1 file changed, 26 insertions(+), 16 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 29b1f57692a60..33bb4d31b4972 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -27,7 +27,7 @@
+ #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
+ #define NFSD_LAUNDRETTE_DELAY (2 * HZ)
+
+-#define NFSD_FILE_SHUTDOWN (1)
++#define NFSD_FILE_CACHE_UP (0)
+
+ /* We only care about NFSD_MAY_READ/WRITE for this cache */
+ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
+@@ -58,7 +58,7 @@ static struct kmem_cache *nfsd_file_slab;
+ static struct kmem_cache *nfsd_file_mark_slab;
+ static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
+ static struct list_lru nfsd_file_lru;
+-static long nfsd_file_lru_flags;
++static unsigned long nfsd_file_flags;
+ static struct fsnotify_group *nfsd_file_fsnotify_group;
+ static atomic_long_t nfsd_filecache_count;
+ static struct delayed_work nfsd_filecache_laundrette;
+@@ -66,9 +66,8 @@ static struct delayed_work nfsd_filecache_laundrette;
+ static void
+ nfsd_file_schedule_laundrette(void)
+ {
+- long count = atomic_long_read(&nfsd_filecache_count);
+-
+- if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
++ if ((atomic_long_read(&nfsd_filecache_count) == 0) ||
++ test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
+ return;
+
+ queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+@@ -697,9 +696,8 @@ nfsd_file_cache_init(void)
+ int ret = -ENOMEM;
+ unsigned int i;
+
+- clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+-
+- if (nfsd_file_hashtbl)
++ lockdep_assert_held(&nfsd_mutex);
++ if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+ return 0;
+
+ nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
+@@ -785,8 +783,8 @@ nfsd_file_cache_init(void)
+ /*
+ * Note this can deadlock with nfsd_file_lru_cb.
+ */
+-void
+-nfsd_file_cache_purge(struct net *net)
++static void
++__nfsd_file_cache_purge(struct net *net)
+ {
+ unsigned int i;
+ struct nfsd_file *nf;
+@@ -794,9 +792,6 @@ nfsd_file_cache_purge(struct net *net)
+ LIST_HEAD(dispose);
+ bool del;
+
+- if (!nfsd_file_hashtbl)
+- return;
+-
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
+
+@@ -857,6 +852,19 @@ nfsd_file_cache_start_net(struct net *net)
+ return nn->fcache_disposal ? 0 : -ENOMEM;
+ }
+
++/**
++ * nfsd_file_cache_purge - Remove all cache items associated with @net
++ * @net: target net namespace
++ *
++ */
++void
++nfsd_file_cache_purge(struct net *net)
++{
++ lockdep_assert_held(&nfsd_mutex);
++ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
++ __nfsd_file_cache_purge(net);
++}
++
+ void
+ nfsd_file_cache_shutdown_net(struct net *net)
+ {
+@@ -869,7 +877,9 @@ nfsd_file_cache_shutdown(void)
+ {
+ int i;
+
+- set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
++ lockdep_assert_held(&nfsd_mutex);
++ if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
++ return;
+
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+ unregister_shrinker(&nfsd_file_shrinker);
+@@ -878,7 +888,7 @@ nfsd_file_cache_shutdown(void)
+ * calling nfsd_file_cache_purge
+ */
+ cancel_delayed_work_sync(&nfsd_filecache_laundrette);
+- nfsd_file_cache_purge(NULL);
++ __nfsd_file_cache_purge(NULL);
+ list_lru_destroy(&nfsd_file_lru);
+ rcu_barrier();
+ fsnotify_put_group(nfsd_file_fsnotify_group);
+@@ -1142,7 +1152,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ * don't end up racing with server shutdown
+ */
+ mutex_lock(&nfsd_mutex);
+- if (nfsd_file_hashtbl) {
++ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) {
+ for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+ count += nfsd_file_hashtbl[i].nfb_count;
+ longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+--
+2.43.0
+
--- /dev/null
+From 5f76c8bb2c5ee3c35a80394259dc28d932bd141f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:12 -0400
+Subject: NFSD: Report average age of filecache items
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 904940e94a887701db24401e3ed6928a1d4e329f ]
+
+This is a measure of how long items stay in the filecache, to help
+assess how efficient the cache is.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 11 ++++++++++-
+ fs/nfsd/filecache.h | 1 +
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 5d31622c23040..0cd72c20fc12d 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -44,6 +44,7 @@ struct nfsd_fcache_bucket {
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
++static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
+
+ struct nfsd_fcache_disposal {
+ struct work_struct work;
+@@ -177,6 +178,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+ if (nf) {
+ INIT_HLIST_NODE(&nf->nf_node);
+ INIT_LIST_HEAD(&nf->nf_lru);
++ nf->nf_birthtime = ktime_get();
+ nf->nf_file = NULL;
+ nf->nf_cred = get_current_cred();
+ nf->nf_net = net;
+@@ -194,9 +196,11 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
+ static bool
+ nfsd_file_free(struct nfsd_file *nf)
+ {
++ s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
+ bool flush = false;
+
+ this_cpu_inc(nfsd_file_releases);
++ this_cpu_add(nfsd_file_total_age, age);
+
+ trace_nfsd_file_put_final(nf);
+ if (nf->nf_mark)
+@@ -1054,7 +1058,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+ unsigned long hits = 0, acquisitions = 0, releases = 0;
+ unsigned int i, count = 0, longest = 0;
+- unsigned long lru = 0;
++ unsigned long lru = 0, total_age = 0;
+
+ /*
+ * No need for spinlocks here since we're not terribly interested in
+@@ -1075,6 +1079,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ hits += per_cpu(nfsd_file_cache_hits, i);
+ acquisitions += per_cpu(nfsd_file_acquisitions, i);
+ releases += per_cpu(nfsd_file_releases, i);
++ total_age += per_cpu(nfsd_file_total_age, i);
+ }
+
+ seq_printf(m, "total entries: %u\n", count);
+@@ -1083,6 +1088,10 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, "cache hits: %lu\n", hits);
+ seq_printf(m, "acquisitions: %lu\n", acquisitions);
+ seq_printf(m, "releases: %lu\n", releases);
++ if (releases)
++ seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
++ else
++ seq_printf(m, "mean age (ms): -\n");
+ return 0;
+ }
+
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index c9e3c6eb4776e..c6ad5fe47f12f 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -44,6 +44,7 @@ struct nfsd_file {
+ refcount_t nf_ref;
+ unsigned char nf_may;
+ struct nfsd_file_mark *nf_mark;
++ ktime_t nf_birthtime;
+ };
+
+ int nfsd_file_cache_init(void);
+--
+2.43.0
+
--- /dev/null
+From 8e6d23b0d66d45bca389404c92efdbfdb04fd865 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:23:59 -0400
+Subject: NFSD: Report count of calls to nfsd_file_acquire()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 29d4bdbbb910f33d6058d2c51278f00f656df325 ]
+
+Count the number of successful acquisitions that did not create a
+file (ie, acquisitions that do not result in a compulsory cache
+miss). This count can be compared directly with the reported hit
+count to compute a hit ratio.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 377d8211200ff..5a09b76ae25a8 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -42,6 +42,7 @@ struct nfsd_fcache_bucket {
+ };
+
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
++static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+
+ struct nfsd_fcache_disposal {
+ struct work_struct work;
+@@ -954,6 +955,8 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
+ out:
+ if (status == nfs_ok) {
++ if (open)
++ this_cpu_inc(nfsd_file_acquisitions);
+ *pnf = nf;
+ } else {
+ nfsd_file_put(nf);
+@@ -1046,8 +1049,9 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
++ unsigned long hits = 0, acquisitions = 0;
+ unsigned int i, count = 0, longest = 0;
+- unsigned long lru = 0, hits = 0;
++ unsigned long lru = 0;
+
+ /*
+ * No need for spinlocks here since we're not terribly interested in
+@@ -1064,13 +1068,16 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ }
+ mutex_unlock(&nfsd_mutex);
+
+- for_each_possible_cpu(i)
++ for_each_possible_cpu(i) {
+ hits += per_cpu(nfsd_file_cache_hits, i);
++ acquisitions += per_cpu(nfsd_file_acquisitions, i);
++ }
+
+ seq_printf(m, "total entries: %u\n", count);
+ seq_printf(m, "longest chain: %u\n", longest);
+ seq_printf(m, "lru entries: %lu\n", lru);
+ seq_printf(m, "cache hits: %lu\n", hits);
++ seq_printf(m, "acquisitions: %lu\n", acquisitions);
+ return 0;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 6fe4924b444bc25de6ce1e42d2845e9ac66cf798 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:05 -0400
+Subject: NFSD: Report count of freed filecache items
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit d63293272abb51c02457f1017dfd61c3270d9ae3 ]
+
+Surface the count of freed nfsd_file items.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 5a09b76ae25a8..5d31622c23040 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -43,6 +43,7 @@ struct nfsd_fcache_bucket {
+
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
++static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
+
+ struct nfsd_fcache_disposal {
+ struct work_struct work;
+@@ -195,6 +196,8 @@ nfsd_file_free(struct nfsd_file *nf)
+ {
+ bool flush = false;
+
++ this_cpu_inc(nfsd_file_releases);
++
+ trace_nfsd_file_put_final(nf);
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+@@ -1049,7 +1052,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+- unsigned long hits = 0, acquisitions = 0;
++ unsigned long hits = 0, acquisitions = 0, releases = 0;
+ unsigned int i, count = 0, longest = 0;
+ unsigned long lru = 0;
+
+@@ -1071,6 +1074,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ for_each_possible_cpu(i) {
+ hits += per_cpu(nfsd_file_cache_hits, i);
+ acquisitions += per_cpu(nfsd_file_acquisitions, i);
++ releases += per_cpu(nfsd_file_releases, i);
+ }
+
+ seq_printf(m, "total entries: %u\n", count);
+@@ -1078,6 +1082,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, "lru entries: %lu\n", lru);
+ seq_printf(m, "cache hits: %lu\n", hits);
+ seq_printf(m, "acquisitions: %lu\n", acquisitions);
++ seq_printf(m, "releases: %lu\n", releases);
+ return 0;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 1ef81c9bf71531df014adfef7e18e40cce4ea3b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:23:52 -0400
+Subject: NFSD: Report filecache LRU size
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 0fd244c115f0321fc5e34ad2291f2a572508e3f7 ]
+
+Surface the NFSD filecache's LRU list length to help field
+troubleshooters monitor filecache issues.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 1d3d13b78be0e..377d8211200ff 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1047,7 +1047,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+ unsigned int i, count = 0, longest = 0;
+- unsigned long hits = 0;
++ unsigned long lru = 0, hits = 0;
+
+ /*
+ * No need for spinlocks here since we're not terribly interested in
+@@ -1060,6 +1060,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ count += nfsd_file_hashtbl[i].nfb_count;
+ longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
+ }
++ lru = list_lru_count(&nfsd_file_lru);
+ }
+ mutex_unlock(&nfsd_mutex);
+
+@@ -1068,6 +1069,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+
+ seq_printf(m, "total entries: %u\n", count);
+ seq_printf(m, "longest chain: %u\n", longest);
++ seq_printf(m, "lru entries: %lu\n", lru);
+ seq_printf(m, "cache hits: %lu\n", hits);
+ return 0;
+ }
+--
+2.43.0
+
--- /dev/null
+From 2d9400ff34f534e10719e896d187074848f1dbde Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:38 -0400
+Subject: NFSD: Report the number of items evicted by the LRU walk
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 94660cc19c75083af046b0f8362e3d3bc2eba21d ]
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 13 ++++++++++---
+ fs/nfsd/trace.h | 29 +++++++++++++++++++++++++++++
+ 2 files changed, 39 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 1d94491e5ddad..e5bd9f06492c8 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -45,6 +45,7 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
++static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
+
+ struct nfsd_fcache_disposal {
+ struct work_struct work;
+@@ -445,6 +446,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ goto out_skip;
+
+ list_lru_isolate_move(lru, &nf->nf_lru, head);
++ this_cpu_inc(nfsd_file_evictions);
+ return LRU_REMOVED;
+ out_skip:
+ return LRU_SKIP;
+@@ -475,9 +477,11 @@ static void
+ nfsd_file_gc(void)
+ {
+ LIST_HEAD(dispose);
++ unsigned long ret;
+
+- list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
+- &dispose, LONG_MAX);
++ ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
++ &dispose, LONG_MAX);
++ trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
+ nfsd_file_gc_dispose_list(&dispose);
+ }
+
+@@ -502,6 +506,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+ nfsd_file_lru_cb, &dispose);
++ trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
+ nfsd_file_gc_dispose_list(&dispose);
+ return ret;
+ }
+@@ -1064,7 +1069,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ */
+ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+- unsigned long hits = 0, acquisitions = 0, releases = 0;
++ unsigned long hits = 0, acquisitions = 0, releases = 0, evictions = 0;
+ unsigned int i, count = 0, longest = 0;
+ unsigned long lru = 0, total_age = 0;
+
+@@ -1088,6 +1093,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ acquisitions += per_cpu(nfsd_file_acquisitions, i);
+ releases += per_cpu(nfsd_file_releases, i);
+ total_age += per_cpu(nfsd_file_total_age, i);
++ evictions += per_cpu(nfsd_file_evictions, i);
+ }
+
+ seq_printf(m, "total entries: %u\n", count);
+@@ -1096,6 +1102,7 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, "cache hits: %lu\n", hits);
+ seq_printf(m, "acquisitions: %lu\n", acquisitions);
+ seq_printf(m, "releases: %lu\n", releases);
++ seq_printf(m, "evictions: %lu\n", evictions);
+ if (releases)
+ seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
+ else
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 593218d8a54d0..71919f7a31dc8 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -894,6 +894,35 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+ __entry->nlink, __entry->mode, __entry->mask)
+ );
+
++DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
++ TP_PROTO(
++ unsigned long removed,
++ unsigned long remaining
++ ),
++ TP_ARGS(removed, remaining),
++ TP_STRUCT__entry(
++ __field(unsigned long, removed)
++ __field(unsigned long, remaining)
++ ),
++ TP_fast_assign(
++ __entry->removed = removed;
++ __entry->remaining = remaining;
++ ),
++ TP_printk("%lu entries removed, %lu remaining",
++ __entry->removed, __entry->remaining)
++);
++
++#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \
++DEFINE_EVENT(nfsd_file_lruwalk_class, name, \
++ TP_PROTO( \
++ unsigned long removed, \
++ unsigned long remaining \
++ ), \
++ TP_ARGS(removed, remaining))
++
++DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
++DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
++
+ #include "cache.h"
+
+ TRACE_DEFINE_ENUM(RC_DROPIT);
+--
+2.43.0
+
--- /dev/null
+From 5bb600464636aca928569e423d893047eb10dad2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 18 Dec 2021 20:37:56 -0500
+Subject: nfsd: Retry once in nfsd_open on an -EOPENSTALE return
+
+From: Jeff Layton <jeff.layton@primarydata.com>
+
+[ Upstream commit 12bcbd40fd931472c7fc9cf3bfe66799ece93ed8 ]
+
+If we get back -EOPENSTALE from an NFSv4 open, then we either got some
+unhandled error or the inode we got back was not the same as the one
+associated with the dentry.
+
+We really have no recourse in that situation other than to retry the
+open, and if it fails to just return nfserr_stale back to the client.
+
+Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
+Signed-off-by: Lance Shelton <lance.shelton@hammerspace.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsproc.c | 1 +
+ fs/nfsd/vfs.c | 10 +++++++++-
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 406dc50fea7ba..f65eba938a57d 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -875,6 +875,7 @@ nfserrno (int errno)
+ { nfserr_serverfault, -ESERVERFAULT },
+ { nfserr_serverfault, -ENFILE },
+ { nfserr_io, -EREMOTEIO },
++ { nfserr_stale, -EOPENSTALE },
+ { nfserr_io, -EUCLEAN },
+ { nfserr_perm, -ENOKEY },
+ { nfserr_no_grace, -ENOGRACE},
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 925aa08ca1075..bc025fe5a595b 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -791,6 +791,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
+ {
+ __be32 err;
++ bool retried = false;
+
+ validate_process_creds();
+ /*
+@@ -806,9 +807,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ */
+ if (type == S_IFREG)
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
++retry:
+ err = fh_verify(rqstp, fhp, type, may_flags);
+- if (!err)
++ if (!err) {
+ err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
++ if (err == nfserr_stale && !retried) {
++ retried = true;
++ fh_put(fhp);
++ goto retry;
++ }
++ }
+ validate_process_creds();
+ return err;
+ }
+--
+2.43.0
+
--- /dev/null
+From d1877e3f0cd9ad70fceb0ce4336df8e494a5a697 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Nov 2022 06:58:41 -0500
+Subject: nfsd: return error if nfs4_setacl fails
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 01d53a88c08951f88f2a42f1f1e6568928e0590e ]
+
+With the addition of POSIX ACLs to struct nfsd_attrs, we no longer
+return an error if setting the ACL fails. Ensure we return the na_aclerr
+error on SETATTR if there is one.
+
+Fixes: c0cbe70742f4 ("NFSD: add posix ACLs to struct nfsd_attrs")
+Cc: Neil Brown <neilb@suse.de>
+Reported-by: Yongcheng Yang <yoyang@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 92d4eb1032ff9..eeff0ba0be558 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1135,6 +1135,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ 0, (time64_t)0);
+ if (!status)
+ status = nfserrno(attrs.na_labelerr);
++ if (!status)
++ status = nfserrno(attrs.na_aclerr);
+ out:
+ nfsd_attrs_free(&attrs);
+ fh_drop_write(&cstate->current_fh);
+--
+2.43.0
+
--- /dev/null
+From 951dee0a773cac798e243b617d14960ce42af9f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:46:44 -0400
+Subject: NFSD: Revert "NFSD: NFSv4 CLOSE should release an nfsd_file
+ immediately"
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit dcf3f80965ca787c70def402cdf1553c93c75529 ]
+
+This reverts commit 5e138c4a750dc140d881dab4a8804b094bbc08d2.
+
+That commit attempted to make files available to other users as soon
+as all NFSv4 clients were done with them, rather than waiting until
+the filecache LRU had garbage collected them.
+
+It gets the reference counting wrong, for one thing.
+
+But it also misses that DELEGRETURN should release a file in the
+same fashion. In fact, any nfsd_file_put() on an file held open
+by an NFSv4 client needs potentially to release the file
+immediately...
+
+Clear the way for implementing that idea.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 18 ------------------
+ fs/nfsd/filecache.h | 1 -
+ fs/nfsd/nfs4state.c | 4 ++--
+ 3 files changed, 2 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index dceb522f5cee9..e429fce894316 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -443,24 +443,6 @@ nfsd_file_put(struct nfsd_file *nf)
+ nfsd_file_put_noref(nf);
+ }
+
+-/**
+- * nfsd_file_close - Close an nfsd_file
+- * @nf: nfsd_file to close
+- *
+- * If this is the final reference for @nf, free it immediately.
+- * This reflects an on-the-wire CLOSE or DELEGRETURN into the
+- * VFS and exported filesystem.
+- */
+-void nfsd_file_close(struct nfsd_file *nf)
+-{
+- nfsd_file_put(nf);
+- if (refcount_dec_if_one(&nf->nf_ref)) {
+- nfsd_file_unhash(nf);
+- nfsd_file_lru_remove(nf);
+- nfsd_file_free(nf);
+- }
+-}
+-
+ struct nfsd_file *
+ nfsd_file_get(struct nfsd_file *nf)
+ {
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 357832bac736b..6b012ea4bd9da 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -52,7 +52,6 @@ void nfsd_file_cache_shutdown(void);
+ int nfsd_file_cache_start_net(struct net *net);
+ void nfsd_file_cache_shutdown_net(struct net *net);
+ void nfsd_file_put(struct nfsd_file *nf);
+-void nfsd_file_close(struct nfsd_file *nf);
+ struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+ void nfsd_file_close_inode_sync(struct inode *inode);
+ bool nfsd_file_is_cached(struct inode *inode);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index dbcdb74e9ff6f..2f720433632b8 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -842,9 +842,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
+ swap(f2, fp->fi_fds[O_RDWR]);
+ spin_unlock(&fp->fi_lock);
+ if (f1)
+- nfsd_file_close(f1);
++ nfsd_file_put(f1);
+ if (f2)
+- nfsd_file_close(f2);
++ nfsd_file_put(f2);
+ }
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 033f2d2baa9f74e6607dfa673b35b1b57d18aa04 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Oct 2022 15:41:10 -0400
+Subject: nfsd: rework hashtable handling in nfsd_do_file_acquire
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 243a5263014a30436c93ed3f1f864c1da845455e ]
+
+nfsd_file is RCU-freed, so we need to hold the rcu_read_lock long enough
+to get a reference after finding it in the hash. Take the
+rcu_read_lock() and call rhashtable_lookup directly.
+
+Switch to using rhashtable_lookup_insert_key as well, and use the usual
+retry mechanism if we hit an -EEXIST. Rename the "retry" bool to
+open_retry, and eliminiate the insert_err goto target.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 52 +++++++++++++++++++--------------------------
+ 1 file changed, 22 insertions(+), 30 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index a0d93e797cdce..0b19eb015c6c8 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1041,9 +1041,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ .need = may_flags & NFSD_FILE_MAY_MASK,
+ .net = SVC_NET(rqstp),
+ };
+- struct nfsd_file *nf, *new;
+- bool retry = true;
++ bool open_retry = true;
++ struct nfsd_file *nf;
+ __be32 status;
++ int ret;
+
+ status = fh_verify(rqstp, fhp, S_IFREG,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+@@ -1053,35 +1054,33 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ key.cred = get_current_cred();
+
+ retry:
+- /* Avoid allocation if the item is already in cache */
+- nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
+- nfsd_file_rhash_params);
++ rcu_read_lock();
++ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
++ nfsd_file_rhash_params);
+ if (nf)
+ nf = nfsd_file_get(nf);
++ rcu_read_unlock();
+ if (nf)
+ goto wait_for_construction;
+
+- new = nfsd_file_alloc(&key, may_flags);
+- if (!new) {
++ nf = nfsd_file_alloc(&key, may_flags);
++ if (!nf) {
+ status = nfserr_jukebox;
+ goto out_status;
+ }
+
+- nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl,
+- &key, &new->nf_rhash,
+- nfsd_file_rhash_params);
+- if (!nf) {
+- nf = new;
+- goto open_file;
+- }
+- if (IS_ERR(nf))
+- goto insert_err;
+- nf = nfsd_file_get(nf);
+- if (nf == NULL) {
+- nf = new;
++ ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
++ &key, &nf->nf_rhash,
++ nfsd_file_rhash_params);
++ if (likely(ret == 0))
+ goto open_file;
+- }
+- nfsd_file_slab_free(&new->nf_rcu);
++
++ nfsd_file_slab_free(&nf->nf_rcu);
++ if (ret == -EEXIST)
++ goto retry;
++ trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
++ status = nfserr_jukebox;
++ goto out_status;
+
+ wait_for_construction:
+ wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+@@ -1089,11 +1088,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ /* Did construction of this file fail? */
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
+- if (!retry) {
++ if (!open_retry) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+- retry = false;
++ open_retry = false;
+ nfsd_file_put_noref(nf);
+ goto retry;
+ }
+@@ -1141,13 +1140,6 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ smp_mb__after_atomic();
+ wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+ goto out;
+-
+-insert_err:
+- nfsd_file_slab_free(&new->nf_rcu);
+- trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf));
+- nf = NULL;
+- status = nfserr_jukebox;
+- goto out_status;
+ }
+
+ /**
+--
+2.43.0
+
--- /dev/null
+From e47c1757ac53c114ca3d28a48fc724aa2dee2fbc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Dec 2022 06:19:33 -0500
+Subject: nfsd: rework refcounting in filecache
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit ac3a2585f018f10039b4a856dcb122da88c1c1c9 ]
+
+The filecache refcounting is a bit non-standard for something searchable
+by RCU, in that we maintain a sentinel reference while it's hashed. This
+in turn requires that we have to do things differently in the "put"
+depending on whether its hashed, which we believe to have led to races.
+
+There are other problems in here too. nfsd_file_close_inode_sync can end
+up freeing an nfsd_file while there are still outstanding references to
+it, and there are a number of subtle ToC/ToU races.
+
+Rework the code so that the refcount is what drives the lifecycle. When
+the refcount goes to zero, then unhash and rcu free the object. A task
+searching for a nfsd_file is allowed to bump its refcount, but only if
+it's not already 0. Ensure that we don't make any other changes to it
+until a reference is held.
+
+With this change, the LRU carries a reference. Take special care to deal
+with it when removing an entry from the list, and ensure that we only
+repurpose the nf_lru list_head when the refcount is 0 to ensure
+exclusive access to it.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 318 +++++++++++++++++++++++---------------------
+ fs/nfsd/trace.h | 51 +++----
+ 2 files changed, 189 insertions(+), 180 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 6b8873b0c2c38..140094a44cc40 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -323,8 +323,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+ if (key->gc)
+ __set_bit(NFSD_FILE_GC, &nf->nf_flags);
+ nf->nf_inode = key->inode;
+- /* nf_ref is pre-incremented for hash table */
+- refcount_set(&nf->nf_ref, 2);
++ refcount_set(&nf->nf_ref, 1);
+ nf->nf_may = key->need;
+ nf->nf_mark = NULL;
+ }
+@@ -376,24 +375,35 @@ nfsd_file_unhash(struct nfsd_file *nf)
+ return false;
+ }
+
+-static bool
++static void
+ nfsd_file_free(struct nfsd_file *nf)
+ {
+ s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
+- bool flush = false;
+
+ trace_nfsd_file_free(nf);
+
+ this_cpu_inc(nfsd_file_releases);
+ this_cpu_add(nfsd_file_total_age, age);
+
++ nfsd_file_unhash(nf);
++
++ /*
++ * We call fsync here in order to catch writeback errors. It's not
++ * strictly required by the protocol, but an nfsd_file could get
++ * evicted from the cache before a COMMIT comes in. If another
++ * task were to open that file in the interim and scrape the error,
++ * then the client may never see it. By calling fsync here, we ensure
++ * that writeback happens before the entry is freed, and that any
++ * errors reported result in the write verifier changing.
++ */
++ nfsd_file_fsync(nf);
++
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, NULL);
+ fput(nf->nf_file);
+- flush = true;
+ }
+
+ /*
+@@ -401,10 +411,9 @@ nfsd_file_free(struct nfsd_file *nf)
+ * WARN and leak it to preserve system stability.
+ */
+ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
+- return flush;
++ return;
+
+ call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+- return flush;
+ }
+
+ static bool
+@@ -420,17 +429,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
+ mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+ }
+
+-static void nfsd_file_lru_add(struct nfsd_file *nf)
++static bool nfsd_file_lru_add(struct nfsd_file *nf)
+ {
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+- if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
++ if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) {
+ trace_nfsd_file_lru_add(nf);
++ return true;
++ }
++ return false;
+ }
+
+-static void nfsd_file_lru_remove(struct nfsd_file *nf)
++static bool nfsd_file_lru_remove(struct nfsd_file *nf)
+ {
+- if (list_lru_del(&nfsd_file_lru, &nf->nf_lru))
++ if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) {
+ trace_nfsd_file_lru_del(nf);
++ return true;
++ }
++ return false;
+ }
+
+ struct nfsd_file *
+@@ -441,86 +456,60 @@ nfsd_file_get(struct nfsd_file *nf)
+ return NULL;
+ }
+
+-static void
+-nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose)
+-{
+- trace_nfsd_file_unhash_and_queue(nf);
+- if (nfsd_file_unhash(nf)) {
+- /* caller must call nfsd_file_dispose_list() later */
+- nfsd_file_lru_remove(nf);
+- list_add(&nf->nf_lru, dispose);
+- }
+-}
+-
+-static void
+-nfsd_file_put_noref(struct nfsd_file *nf)
+-{
+- trace_nfsd_file_put(nf);
+-
+- if (refcount_dec_and_test(&nf->nf_ref)) {
+- WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+- nfsd_file_lru_remove(nf);
+- nfsd_file_free(nf);
+- }
+-}
+-
+-static void
+-nfsd_file_unhash_and_put(struct nfsd_file *nf)
+-{
+- if (nfsd_file_unhash(nf))
+- nfsd_file_put_noref(nf);
+-}
+-
++/**
++ * nfsd_file_put - put the reference to a nfsd_file
++ * @nf: nfsd_file of which to put the reference
++ *
++ * Put a reference to a nfsd_file. In the non-GC case, we just put the
++ * reference immediately. In the GC case, if the reference would be
++ * the last one, the put it on the LRU instead to be cleaned up later.
++ */
+ void
+ nfsd_file_put(struct nfsd_file *nf)
+ {
+ might_sleep();
++ trace_nfsd_file_put(nf);
+
+- if (test_bit(NFSD_FILE_GC, &nf->nf_flags))
+- nfsd_file_lru_add(nf);
+- else if (refcount_read(&nf->nf_ref) == 2)
+- nfsd_file_unhash_and_put(nf);
+-
+- if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+- nfsd_file_fsync(nf);
+- nfsd_file_put_noref(nf);
+- } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
+- nfsd_file_put_noref(nf);
+- nfsd_file_schedule_laundrette();
+- } else
+- nfsd_file_put_noref(nf);
+-}
+-
+-static void
+-nfsd_file_dispose_list(struct list_head *dispose)
+-{
+- struct nfsd_file *nf;
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
++ test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
++ /*
++ * If this is the last reference (nf_ref == 1), then try to
++ * transfer it to the LRU.
++ */
++ if (refcount_dec_not_one(&nf->nf_ref))
++ return;
++
++ /* Try to add it to the LRU. If that fails, decrement. */
++ if (nfsd_file_lru_add(nf)) {
++ /* If it's still hashed, we're done */
++ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
++ nfsd_file_schedule_laundrette();
++ return;
++ }
+
+- while(!list_empty(dispose)) {
+- nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+- list_del_init(&nf->nf_lru);
+- nfsd_file_fsync(nf);
+- nfsd_file_put_noref(nf);
++ /*
++ * We're racing with unhashing, so try to remove it from
++ * the LRU. If removal fails, then someone else already
++ * has our reference.
++ */
++ if (!nfsd_file_lru_remove(nf))
++ return;
++ }
+ }
++ if (refcount_dec_and_test(&nf->nf_ref))
++ nfsd_file_free(nf);
+ }
+
+ static void
+-nfsd_file_dispose_list_sync(struct list_head *dispose)
++nfsd_file_dispose_list(struct list_head *dispose)
+ {
+- bool flush = false;
+ struct nfsd_file *nf;
+
+- while(!list_empty(dispose)) {
++ while (!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ list_del_init(&nf->nf_lru);
+- nfsd_file_fsync(nf);
+- if (!refcount_dec_and_test(&nf->nf_ref))
+- continue;
+- if (nfsd_file_free(nf))
+- flush = true;
++ nfsd_file_free(nf);
+ }
+- if (flush)
+- flush_delayed_fput();
+ }
+
+ static void
+@@ -590,21 +579,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ struct list_head *head = arg;
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+- /*
+- * Do a lockless refcount check. The hashtable holds one reference, so
+- * we look to see if anything else has a reference, or if any have
+- * been put since the shrinker last ran. Those don't get unhashed and
+- * released.
+- *
+- * Note that in the put path, we set the flag and then decrement the
+- * counter. Here we check the counter and then test and clear the flag.
+- * That order is deliberate to ensure that we can do this locklessly.
+- */
+- if (refcount_read(&nf->nf_ref) > 1) {
+- list_lru_isolate(lru, &nf->nf_lru);
+- trace_nfsd_file_gc_in_use(nf);
+- return LRU_REMOVED;
+- }
++ /* We should only be dealing with GC entries here */
++ WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags));
+
+ /*
+ * Don't throw out files that are still undergoing I/O or
+@@ -615,40 +591,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ return LRU_SKIP;
+ }
+
++ /* If it was recently added to the list, skip it */
+ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
+ trace_nfsd_file_gc_referenced(nf);
+ return LRU_ROTATE;
+ }
+
+- if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+- trace_nfsd_file_gc_hashed(nf);
+- return LRU_SKIP;
++ /*
++ * Put the reference held on behalf of the LRU. If it wasn't the last
++ * one, then just remove it from the LRU and ignore it.
++ */
++ if (!refcount_dec_and_test(&nf->nf_ref)) {
++ trace_nfsd_file_gc_in_use(nf);
++ list_lru_isolate(lru, &nf->nf_lru);
++ return LRU_REMOVED;
+ }
+
++ /* Refcount went to zero. Unhash it and queue it to the dispose list */
++ nfsd_file_unhash(nf);
+ list_lru_isolate_move(lru, &nf->nf_lru, head);
+ this_cpu_inc(nfsd_file_evictions);
+ trace_nfsd_file_gc_disposed(nf);
+ return LRU_REMOVED;
+ }
+
+-/*
+- * Unhash items on @dispose immediately, then queue them on the
+- * disposal workqueue to finish releasing them in the background.
+- *
+- * cel: Note that between the time list_lru_shrink_walk runs and
+- * now, these items are in the hash table but marked unhashed.
+- * Why release these outside of lru_cb ? There's no lock ordering
+- * problem since lru_cb currently takes no lock.
+- */
+-static void nfsd_file_gc_dispose_list(struct list_head *dispose)
+-{
+- struct nfsd_file *nf;
+-
+- list_for_each_entry(nf, dispose, nf_lru)
+- nfsd_file_hash_remove(nf);
+- nfsd_file_dispose_list_delayed(dispose);
+-}
+-
+ static void
+ nfsd_file_gc(void)
+ {
+@@ -658,7 +624,7 @@ nfsd_file_gc(void)
+ ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
+ &dispose, list_lru_count(&nfsd_file_lru));
+ trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
+- nfsd_file_gc_dispose_list(&dispose);
++ nfsd_file_dispose_list_delayed(&dispose);
+ }
+
+ static void
+@@ -684,7 +650,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+ nfsd_file_lru_cb, &dispose);
+ trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
+- nfsd_file_gc_dispose_list(&dispose);
++ nfsd_file_dispose_list_delayed(&dispose);
+ return ret;
+ }
+
+@@ -694,72 +660,111 @@ static struct shrinker nfsd_file_shrinker = {
+ .seeks = 1,
+ };
+
+-/*
+- * Find all cache items across all net namespaces that match @inode and
+- * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire().
++/**
++ * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
++ * @inode: inode on which to close out nfsd_files
++ * @dispose: list on which to gather nfsd_files to close out
++ *
++ * An nfsd_file represents a struct file being held open on behalf of nfsd. An
++ * open file however can block other activity (such as leases), or cause
++ * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
++ *
++ * This function is intended to find open nfsd_files when this sort of
++ * conflicting access occurs and then attempt to close those files out.
++ *
++ * Populates the dispose list with entries that have already had their
++ * refcounts go to zero. The actual free of an nfsd_file can be expensive,
++ * so we leave it up to the caller whether it wants to wait or not.
+ */
+-static unsigned int
+-__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
++static void
++nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+ {
+ struct nfsd_file_lookup_key key = {
+ .type = NFSD_FILE_KEY_INODE,
+ .inode = inode,
+ };
+- unsigned int count = 0;
+ struct nfsd_file *nf;
+
+ rcu_read_lock();
+ do {
++ int decrement = 1;
++
+ nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
+ nfsd_file_rhash_params);
+ if (!nf)
+ break;
+- nfsd_file_unhash_and_queue(nf, dispose);
+- count++;
++
++ /* If we raced with someone else unhashing, ignore it */
++ if (!nfsd_file_unhash(nf))
++ continue;
++
++ /* If we can't get a reference, ignore it */
++ if (!nfsd_file_get(nf))
++ continue;
++
++ /* Extra decrement if we remove from the LRU */
++ if (nfsd_file_lru_remove(nf))
++ ++decrement;
++
++ /* If refcount goes to 0, then put on the dispose list */
++ if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
++ list_add(&nf->nf_lru, dispose);
++ trace_nfsd_file_closing(nf);
++ }
+ } while (1);
+ rcu_read_unlock();
+- return count;
+ }
+
+ /**
+- * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
++ * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+- * Unhash and put, then flush and fput all cache items associated with @inode.
++ * Close out any open nfsd_files that can be reaped for @inode. The
++ * actual freeing is deferred to the dispose_list_delayed infrastructure.
++ *
++ * This is used by the fsnotify callbacks and setlease notifier.
+ */
+-void
+-nfsd_file_close_inode_sync(struct inode *inode)
++static void
++nfsd_file_close_inode(struct inode *inode)
+ {
+ LIST_HEAD(dispose);
+- unsigned int count;
+
+- count = __nfsd_file_close_inode(inode, &dispose);
+- trace_nfsd_file_close_inode_sync(inode, count);
+- nfsd_file_dispose_list_sync(&dispose);
++ nfsd_file_queue_for_close(inode, &dispose);
++ nfsd_file_dispose_list_delayed(&dispose);
+ }
+
+ /**
+- * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
++ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+- * Unhash and put all cache item associated with @inode.
++ * Close out any open nfsd_files that can be reaped for @inode. The
++ * nfsd_files are closed out synchronously.
++ *
++ * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames
++ * when reexporting NFS.
+ */
+-static void
+-nfsd_file_close_inode(struct inode *inode)
++void
++nfsd_file_close_inode_sync(struct inode *inode)
+ {
++ struct nfsd_file *nf;
+ LIST_HEAD(dispose);
+- unsigned int count;
+
+- count = __nfsd_file_close_inode(inode, &dispose);
+- trace_nfsd_file_close_inode(inode, count);
+- nfsd_file_dispose_list_delayed(&dispose);
++ trace_nfsd_file_close(inode);
++
++ nfsd_file_queue_for_close(inode, &dispose);
++ while (!list_empty(&dispose)) {
++ nf = list_first_entry(&dispose, struct nfsd_file, nf_lru);
++ list_del_init(&nf->nf_lru);
++ nfsd_file_free(nf);
++ }
++ flush_delayed_fput();
+ }
+
+ /**
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+- * Walk the LRU list and close any entries that have not been used since
++ * Walk the LRU list and destroy any entries that have not been used since
+ * the last scan.
+ */
+ static void
+@@ -781,7 +786,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+
+ /* Only close files for F_SETLEASE leases */
+ if (fl->fl_flags & FL_LEASE)
+- nfsd_file_close_inode_sync(file_inode(fl->fl_file));
++ nfsd_file_close_inode(file_inode(fl->fl_file));
+ return 0;
+ }
+
+@@ -902,6 +907,13 @@ nfsd_file_cache_init(void)
+ goto out;
+ }
+
++/**
++ * __nfsd_file_cache_purge: clean out the cache for shutdown
++ * @net: net-namespace to shut down the cache (may be NULL)
++ *
++ * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
++ * then close out everything. Called when an nfsd instance is being shut down.
++ */
+ static void
+ __nfsd_file_cache_purge(struct net *net)
+ {
+@@ -915,8 +927,11 @@ __nfsd_file_cache_purge(struct net *net)
+
+ nf = rhashtable_walk_next(&iter);
+ while (!IS_ERR_OR_NULL(nf)) {
+- if (!net || nf->nf_net == net)
+- nfsd_file_unhash_and_queue(nf, &dispose);
++ if (!net || nf->nf_net == net) {
++ nfsd_file_unhash(nf);
++ nfsd_file_lru_remove(nf);
++ list_add(&nf->nf_lru, &dispose);
++ }
+ nf = rhashtable_walk_next(&iter);
+ }
+
+@@ -1083,8 +1098,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (nf)
+ nf = nfsd_file_get(nf);
+ rcu_read_unlock();
+- if (nf)
++
++ if (nf) {
++ if (nfsd_file_lru_remove(nf))
++ WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
+ goto wait_for_construction;
++ }
+
+ nf = nfsd_file_alloc(&key, may_flags);
+ if (!nf) {
+@@ -1117,11 +1136,11 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto out;
+ }
+ open_retry = false;
+- nfsd_file_put_noref(nf);
++ if (refcount_dec_and_test(&nf->nf_ref))
++ nfsd_file_free(nf);
+ goto retry;
+ }
+
+- nfsd_file_lru_remove(nf);
+ this_cpu_inc(nfsd_file_cache_hits);
+
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
+@@ -1131,7 +1150,8 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ this_cpu_inc(nfsd_file_acquisitions);
+ *pnf = nf;
+ } else {
+- nfsd_file_put(nf);
++ if (refcount_dec_and_test(&nf->nf_ref))
++ nfsd_file_free(nf);
+ nf = NULL;
+ }
+
+@@ -1157,8 +1177,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * If construction failed, or we raced with a call to unlink()
+ * then unhash.
+ */
+- if (status != nfs_ok || key.inode->i_nlink == 0)
+- nfsd_file_unhash_and_put(nf);
++ if (status == nfs_ok && key.inode->i_nlink == 0)
++ status = nfserr_jukebox;
++ if (status != nfs_ok)
++ nfsd_file_unhash(nf);
+ clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index f8eaef5b319eb..77be39fcb3d44 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -875,8 +875,8 @@ DEFINE_CLID_EVENT(confirmed_r);
+ __print_flags(val, "|", \
+ { 1 << NFSD_FILE_HASHED, "HASHED" }, \
+ { 1 << NFSD_FILE_PENDING, "PENDING" }, \
+- { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \
+- { 1 << NFSD_FILE_GC, "GC"})
++ { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \
++ { 1 << NFSD_FILE_GC, "GC" })
+
+ DECLARE_EVENT_CLASS(nfsd_file_class,
+ TP_PROTO(struct nfsd_file *nf),
+@@ -911,6 +911,7 @@ DEFINE_EVENT(nfsd_file_class, name, \
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_free);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
++DEFINE_NFSD_FILE_EVENT(nfsd_file_closing);
+ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue);
+
+ TRACE_EVENT(nfsd_file_alloc,
+@@ -1102,35 +1103,6 @@ TRACE_EVENT(nfsd_file_open,
+ __entry->nf_file)
+ )
+
+-DECLARE_EVENT_CLASS(nfsd_file_search_class,
+- TP_PROTO(
+- const struct inode *inode,
+- unsigned int count
+- ),
+- TP_ARGS(inode, count),
+- TP_STRUCT__entry(
+- __field(const struct inode *, inode)
+- __field(unsigned int, count)
+- ),
+- TP_fast_assign(
+- __entry->inode = inode;
+- __entry->count = count;
+- ),
+- TP_printk("inode=%p count=%u",
+- __entry->inode, __entry->count)
+-);
+-
+-#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \
+-DEFINE_EVENT(nfsd_file_search_class, name, \
+- TP_PROTO( \
+- const struct inode *inode, \
+- unsigned int count \
+- ), \
+- TP_ARGS(inode, count))
+-
+-DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
+-DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
+-
+ TRACE_EVENT(nfsd_file_is_cached,
+ TP_PROTO(
+ const struct inode *inode,
+@@ -1208,7 +1180,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
+-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed);
+ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed);
+
+ DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
+@@ -1240,6 +1211,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \
+ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
+ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
+
++TRACE_EVENT(nfsd_file_close,
++ TP_PROTO(
++ const struct inode *inode
++ ),
++ TP_ARGS(inode),
++ TP_STRUCT__entry(
++ __field(const void *, inode)
++ ),
++ TP_fast_assign(
++ __entry->inode = inode;
++ ),
++ TP_printk("inode=%p",
++ __entry->inode
++ )
++);
++
+ TRACE_EVENT(nfsd_file_fsync,
+ TP_PROTO(
+ const struct nfsd_file *nf,
+--
+2.43.0
+
--- /dev/null
+From 8122383608995d4cb211026ba1c86639ec782353 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Oct 2021 10:40:59 -0400
+Subject: NFSD: Save location of NFSv4 COMPOUND status
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3b0ebb255fdc49a3d340846deebf045ef58ec744 ]
+
+Refactor: Currently nfs4svc_encode_compoundres() relies on the NFS
+dispatcher to pass in the buffer location of the COMPOUND status.
+Instead, save that buffer location in struct nfsd4_compoundres.
+
+The compound tag follows immediately after.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/nfs4xdr.c | 9 +++++++--
+ fs/nfsd/xdr4.h | 3 ++-
+ 3 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index bc7ae9a8604ec..002473c59fc6f 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2462,11 +2462,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ __be32 status;
+
+ resp->xdr = &rqstp->rq_res_stream;
++ resp->statusp = resp->xdr->p;
+
+ /* reserve space for: NFS status code */
+ xdr_reserve_space(resp->xdr, XDR_UNIT);
+
+- resp->tagp = resp->xdr->p;
+ /* reserve space for: taglen, tag, and opcnt */
+ xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen);
+ resp->taglen = args->taglen;
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 9fcaf5f93f75d..e94f57f174f12 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5436,11 +5436,16 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
+ WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
+ buf->tail[0].iov_len);
+
+- *p = resp->cstate.status;
++ /*
++ * Send buffer space for the following items is reserved
++ * at the top of nfsd4_proc_compound().
++ */
++ p = resp->statusp;
++
++ *p++ = resp->cstate.status;
+
+ rqstp->rq_next_page = resp->xdr->page_ptr + 1;
+
+- p = resp->tagp;
+ *p++ = htonl(resp->taglen);
+ memcpy(p, resp->tag, resp->taglen);
+ p += XDR_QUADLEN(resp->taglen);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 50242d8cd09e8..f20c1ae97fec5 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -703,10 +703,11 @@ struct nfsd4_compoundres {
+ struct xdr_stream *xdr;
+ struct svc_rqst * rqstp;
+
++ __be32 *statusp;
+ u32 taglen;
+ char * tag;
+ u32 opcnt;
+- __be32 * tagp; /* tag, opcount encode location */
++
+ struct nfsd4_compound_state cstate;
+ };
+
+--
+2.43.0
+
--- /dev/null
+From 892eac1a333372224ae3c49c0d010e2f9d90db44 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 16:48:32 +1000
+Subject: nfsd: separate nfsd_last_thread() from nfsd_put()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 9f28a971ee9fdf1bf8ce8c88b103f483be610277 ]
+
+Now that the last nfsd thread is stopped by an explicit act of calling
+svc_set_num_threads() with a count of zero, we only have a limited
+number of places that can happen, and don't need to call
+nfsd_last_thread() in nfsd_put()
+
+So separate that out and call it at the two places where the number of
+threads is set to zero.
+
+Move the clearing of ->nfsd_serv and the call to svc_xprt_destroy_all()
+into nfsd_last_thread(), as they are really part of the same action.
+
+nfsd_put() is now a thin wrapper around svc_put(), so make it a static
+inline.
+
+nfsd_put() cannot be called after nfsd_last_thread(), so in a couple of
+places we have to use svc_put() instead.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsd.h | 7 ++++++-
+ fs/nfsd/nfssvc.c | 52 ++++++++++++++++++------------------------------
+ 2 files changed, 25 insertions(+), 34 deletions(-)
+
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index fa0144a742678..867dcfd64d426 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -96,7 +96,12 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
+ int nfsd_pool_stats_release(struct inode *, struct file *);
+ void nfsd_shutdown_threads(struct net *net);
+
+-void nfsd_put(struct net *net);
++static inline void nfsd_put(struct net *net)
++{
++ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++
++ svc_put(nn->nfsd_serv);
++}
+
+ bool i_am_nfsd(void);
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 8907dba22c3f2..ee5713fca1870 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -529,9 +529,14 @@ static struct notifier_block nfsd_inet6addr_notifier = {
+ /* Only used under nfsd_mutex, so this atomic may be overkill: */
+ static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
+
+-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
++static void nfsd_last_thread(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++ struct svc_serv *serv = nn->nfsd_serv;
++
++ spin_lock(&nfsd_notifier_lock);
++ nn->nfsd_serv = NULL;
++ spin_unlock(&nfsd_notifier_lock);
+
+ /* check if the notifier still has clients */
+ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
+@@ -541,6 +546,8 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+ #endif
+ }
+
++ svc_xprt_destroy_all(serv, net);
++
+ /*
+ * write_ports can create the server without actually starting
+ * any threads--if we get shut down before any threads are
+@@ -631,7 +638,8 @@ void nfsd_shutdown_threads(struct net *net)
+ svc_get(serv);
+ /* Kill outstanding nfsd threads */
+ svc_set_num_threads(serv, NULL, 0);
+- nfsd_put(net);
++ nfsd_last_thread(net);
++ svc_put(serv);
+ mutex_unlock(&nfsd_mutex);
+ }
+
+@@ -661,9 +669,6 @@ int nfsd_create_serv(struct net *net)
+ serv->sv_maxconn = nn->max_connections;
+ error = svc_bind(serv, net);
+ if (error < 0) {
+- /* NOT nfsd_put() as notifiers (see below) haven't
+- * been set up yet.
+- */
+ svc_put(serv);
+ return error;
+ }
+@@ -706,29 +711,6 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
+ return 0;
+ }
+
+-/* This is the callback for kref_put() below.
+- * There is no code here as the first thing to be done is
+- * call svc_shutdown_net(), but we cannot get the 'net' from
+- * the kref. So do all the work when kref_put returns true.
+- */
+-static void nfsd_noop(struct kref *ref)
+-{
+-}
+-
+-void nfsd_put(struct net *net)
+-{
+- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+-
+- if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
+- svc_xprt_destroy_all(nn->nfsd_serv, net);
+- nfsd_last_thread(nn->nfsd_serv, net);
+- svc_destroy(&nn->nfsd_serv->sv_refcnt);
+- spin_lock(&nfsd_notifier_lock);
+- nn->nfsd_serv = NULL;
+- spin_unlock(&nfsd_notifier_lock);
+- }
+-}
+-
+ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
+ {
+ int i = 0;
+@@ -779,7 +761,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
+ if (err)
+ break;
+ }
+- nfsd_put(net);
++ svc_put(nn->nfsd_serv);
+ return err;
+ }
+
+@@ -794,6 +776,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+ int error;
+ bool nfsd_up_before;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++ struct svc_serv *serv;
+
+ mutex_lock(&nfsd_mutex);
+ dprintk("nfsd: creating service\n");
+@@ -813,22 +796,25 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+ goto out;
+
+ nfsd_up_before = nn->nfsd_net_up;
++ serv = nn->nfsd_serv;
+
+ error = nfsd_startup_net(net, cred);
+ if (error)
+ goto out_put;
+- error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
++ error = svc_set_num_threads(serv, NULL, nrservs);
+ if (error)
+ goto out_shutdown;
+- error = nn->nfsd_serv->sv_nrthreads;
++ error = serv->sv_nrthreads;
++ if (error == 0)
++ nfsd_last_thread(net);
+ out_shutdown:
+ if (error < 0 && !nfsd_up_before)
+ nfsd_shutdown_net(net);
+ out_put:
+ /* Threads now hold service active */
+ if (xchg(&nn->keep_active, 0))
+- nfsd_put(net);
+- nfsd_put(net);
++ svc_put(serv);
++ svc_put(serv);
+ out:
+ mutex_unlock(&nfsd_mutex);
+ return error;
+--
+2.43.0
+
--- /dev/null
+From 1aa497f1b46e822629c06679d6bd24d9e40b367a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:43 -0400
+Subject: NFSD: Separate tracepoints for acquire and create
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit be0230069fcbf7d332d010b57c1d0cfd623a84d6 ]
+
+These tracepoints collect different information: the create case does
+not open a file, so there's no nf_file available.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 9 ++++----
+ fs/nfsd/nfs4state.c | 1 +
+ fs/nfsd/trace.h | 54 ++++++++++++++++++++++++++++++++++++++-------
+ 3 files changed, 52 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index c6dc55c0f758b..85813affb8abf 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1039,7 +1039,7 @@ nfsd_file_is_cached(struct inode *inode)
+ }
+
+ static __be32
+-nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
++nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf, bool open)
+ {
+ struct nfsd_file_lookup_key key = {
+@@ -1120,7 +1120,8 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ out_status:
+ put_cred(key.cred);
+- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
++ if (open)
++ trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ return status;
+
+ open_file:
+@@ -1168,7 +1169,7 @@ __be32
+ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+ {
+- return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true);
+ }
+
+ /**
+@@ -1185,7 +1186,7 @@ __be32
+ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+ {
+- return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
++ return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false);
+ }
+
+ /*
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 08700b6acba31..d349abf0821d6 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5121,6 +5121,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ goto out_put_access;
+ nf->nf_file = open->op_filp;
+ open->op_filp = NULL;
++ trace_nfsd_file_create(rqstp, access, nf);
+ }
+
+ spin_lock(&fp->fi_lock);
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index f94db18777ad7..12dbc190e6595 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -781,10 +781,10 @@ DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose);
+
+ TRACE_EVENT(nfsd_file_acquire,
+ TP_PROTO(
+- struct svc_rqst *rqstp,
+- struct inode *inode,
++ const struct svc_rqst *rqstp,
++ const struct inode *inode,
+ unsigned int may_flags,
+- struct nfsd_file *nf,
++ const struct nfsd_file *nf,
+ __be32 status
+ ),
+
+@@ -792,12 +792,12 @@ TRACE_EVENT(nfsd_file_acquire,
+
+ TP_STRUCT__entry(
+ __field(u32, xid)
+- __field(void *, inode)
++ __field(const void *, inode)
+ __field(unsigned long, may_flags)
+- __field(int, nf_ref)
++ __field(unsigned int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned long, nf_may)
+- __field(struct file *, nf_file)
++ __field(const void *, nf_file)
+ __field(u32, status)
+ ),
+
+@@ -812,12 +812,50 @@ TRACE_EVENT(nfsd_file_acquire,
+ __entry->status = be32_to_cpu(status);
+ ),
+
+- TP_printk("xid=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u",
++ TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u",
+ __entry->xid, __entry->inode,
+ show_nfsd_may_flags(__entry->may_flags),
+ __entry->nf_ref, show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may),
+- __entry->nf_file, __entry->status)
++ __entry->nf_file, __entry->status
++ )
++);
++
++TRACE_EVENT(nfsd_file_create,
++ TP_PROTO(
++ const struct svc_rqst *rqstp,
++ unsigned int may_flags,
++ const struct nfsd_file *nf
++ ),
++
++ TP_ARGS(rqstp, may_flags, nf),
++
++ TP_STRUCT__entry(
++ __field(const void *, nf_inode)
++ __field(const void *, nf_file)
++ __field(unsigned long, may_flags)
++ __field(unsigned long, nf_flags)
++ __field(unsigned long, nf_may)
++ __field(unsigned int, nf_ref)
++ __field(u32, xid)
++ ),
++
++ TP_fast_assign(
++ __entry->nf_inode = nf->nf_inode;
++ __entry->nf_file = nf->nf_file;
++ __entry->may_flags = may_flags;
++ __entry->nf_flags = nf->nf_flags;
++ __entry->nf_may = nf->nf_may;
++ __entry->nf_ref = refcount_read(&nf->nf_ref);
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ ),
++
++ TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
++ __entry->xid, __entry->nf_inode,
++ show_nfsd_may_flags(__entry->may_flags),
++ __entry->nf_ref, show_nf_flags(__entry->nf_flags),
++ show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
++ )
+ );
+
+ TRACE_EVENT(nfsd_file_insert_err,
+--
+2.43.0
+
--- /dev/null
+From 1c2ee119732dfa48ceea7a241699c858ea230377 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: set attributes when creating symlinks
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 93adc1e391a761441d783828b93979b38093d011 ]
+
+The NFS protocol includes attributes when creating symlinks.
+Linux does store attributes for symlinks and allows them to be set,
+though they are not used for permission checking.
+
+NFSD currently doesn't set standard (struct iattr) attributes when
+creating symlinks, but for NFSv4 it does set ACLs and security labels.
+This is inconsistent.
+
+To improve consistency, pass the provided attributes into nfsd_symlink()
+and call nfsd_create_setattr() to set them.
+
+NOTE: this results in a behaviour change for all NFS versions when the
+client sends non-default attributes with a SYMLINK request. With the
+Linux client, the only attributes are:
+ attr.ia_mode = S_IFLNK | S_IRWXUGO;
+ attr.ia_valid = ATTR_MODE;
+so the final outcome will be unchanged. Other clients might sent
+different attributes, and if they did they probably expect them to be
+honoured.
+
+We ignore any error from nfsd_create_setattr(). It isn't really clear
+what should be done if a file is successfully created, but the
+attributes cannot be set. NFS doesn't allow partial success to be
+reported. Reporting failure is probably more misleading than reporting
+success, so the status is ignored.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 5 ++++-
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/nfsproc.c | 5 ++++-
+ fs/nfsd/vfs.c | 25 ++++++++++++++++++-------
+ fs/nfsd/vfs.h | 5 +++--
+ 5 files changed, 30 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 113567b3a98a5..cb91088bce2e8 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -397,6 +397,9 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
+ {
+ struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &argp->attrs,
++ };
+
+ if (argp->tlen == 0) {
+ resp->status = nfserr_inval;
+@@ -423,7 +426,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
+ fh_copy(&resp->dirfh, &argp->ffh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+ resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname,
+- argp->flen, argp->tname, &resp->fh);
++ argp->flen, argp->tname, &attrs, &resp->fh);
+ kfree(argp->tname);
+ out:
+ return rpc_success;
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 9b04611a318d7..96f6fe4f86fd8 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -813,7 +813,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ case NF4LNK:
+ status = nfsd_symlink(rqstp, &cstate->current_fh,
+ create->cr_name, create->cr_namelen,
+- create->cr_data, &resfh);
++ create->cr_data, &attrs, &resfh);
+ break;
+
+ case NF4BLK:
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index c75d83bc3f21b..09afd188099be 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -479,6 +479,9 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
+ {
+ struct nfsd_symlinkargs *argp = rqstp->rq_argp;
+ struct nfsd_stat *resp = rqstp->rq_resp;
++ struct nfsd_attrs attrs = {
++ .na_iattr = &argp->attrs,
++ };
+ struct svc_fh newfh;
+
+ if (argp->tlen > NFS_MAXPATHLEN) {
+@@ -500,7 +503,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
+
+ fh_init(&newfh, NFS_FHSIZE);
+ resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
+- argp->tname, &newfh);
++ argp->tname, &attrs, &newfh);
+
+ kfree(argp->tname);
+ fh_put(&argp->ffh);
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 489225de05a2a..bfdb42aa23a01 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1446,15 +1446,25 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
+ return 0;
+ }
+
+-/*
+- * Create a symlink and look up its inode
++/**
++ * nfsd_symlink - Create a symlink and look up its inode
++ * @rqstp: RPC transaction being executed
++ * @fhp: NFS filehandle of parent directory
++ * @fname: filename of the new symlink
++ * @flen: length of @fname
++ * @path: content of the new symlink (NUL-terminated)
++ * @attrs: requested attributes of new object
++ * @resfhp: NFS filehandle of new object
++ *
+ * N.B. After this call _both_ fhp and resfhp need an fh_put
++ *
++ * Returns nfs_ok on success, or an nfsstat in network byte order.
+ */
+ __be32
+ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+- char *fname, int flen,
+- char *path,
+- struct svc_fh *resfhp)
++ char *fname, int flen,
++ char *path, struct nfsd_attrs *attrs,
++ struct svc_fh *resfhp)
+ {
+ struct dentry *dentry, *dnew;
+ __be32 err, cerr;
+@@ -1484,13 +1494,14 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path);
+ err = nfserrno(host_err);
++ cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
++ if (!err)
++ nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
+ fh_unlock(fhp);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
+-
+ fh_drop_write(fhp);
+
+- cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+ dput(dnew);
+ if (err==0) err = cerr;
+ out:
+diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
+index d8b1a36fca956..5047cec4c423c 100644
+--- a/fs/nfsd/vfs.h
++++ b/fs/nfsd/vfs.h
+@@ -114,8 +114,9 @@ __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
+ char *, int *);
+ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
+- char *name, int len, char *path,
+- struct svc_fh *res);
++ char *name, int len, char *path,
++ struct nfsd_attrs *attrs,
++ struct svc_fh *res);
+ __be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
+ char *, int, struct svc_fh *);
+ ssize_t nfsd_copy_file_range(struct file *, u64,
+--
+2.43.0
+
--- /dev/null
+From 76c1f9f984556160a596d0df235912283cf01277 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:26:23 -0400
+Subject: NFSD: Set up an rhashtable for the filecache
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit fc22945ecc2a0a028f3683115f98a922d506c284 ]
+
+Add code to initialize and tear down an rhashtable. The rhashtable
+is not used yet.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 160 ++++++++++++++++++++++++++++++++++++++------
+ fs/nfsd/filecache.h | 1 +
+ 2 files changed, 140 insertions(+), 21 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 33bb4d31b4972..95e7e15b567e2 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -12,6 +12,7 @@
+ #include <linux/fsnotify_backend.h>
+ #include <linux/fsnotify.h>
+ #include <linux/seq_file.h>
++#include <linux/rhashtable.h>
+
+ #include "vfs.h"
+ #include "nfsd.h"
+@@ -62,6 +63,136 @@ static unsigned long nfsd_file_flags;
+ static struct fsnotify_group *nfsd_file_fsnotify_group;
+ static atomic_long_t nfsd_filecache_count;
+ static struct delayed_work nfsd_filecache_laundrette;
++static struct rhashtable nfsd_file_rhash_tbl
++ ____cacheline_aligned_in_smp;
++
++enum nfsd_file_lookup_type {
++ NFSD_FILE_KEY_INODE,
++ NFSD_FILE_KEY_FULL,
++};
++
++struct nfsd_file_lookup_key {
++ struct inode *inode;
++ struct net *net;
++ const struct cred *cred;
++ unsigned char need;
++ enum nfsd_file_lookup_type type;
++};
++
++/*
++ * The returned hash value is based solely on the address of an in-code
++ * inode, a pointer to a slab-allocated object. The entropy in such a
++ * pointer is concentrated in its middle bits.
++ */
++static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
++{
++ unsigned long ptr = (unsigned long)inode;
++ u32 k;
++
++ k = ptr >> L1_CACHE_SHIFT;
++ k &= 0x00ffffff;
++ return jhash2(&k, 1, seed);
++}
++
++/**
++ * nfsd_file_key_hashfn - Compute the hash value of a lookup key
++ * @data: key on which to compute the hash value
++ * @len: rhash table's key_len parameter (unused)
++ * @seed: rhash table's random seed of the day
++ *
++ * Return value:
++ * Computed 32-bit hash value
++ */
++static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
++{
++ const struct nfsd_file_lookup_key *key = data;
++
++ return nfsd_file_inode_hash(key->inode, seed);
++}
++
++/**
++ * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
++ * @data: object on which to compute the hash value
++ * @len: rhash table's key_len parameter (unused)
++ * @seed: rhash table's random seed of the day
++ *
++ * Return value:
++ * Computed 32-bit hash value
++ */
++static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
++{
++ const struct nfsd_file *nf = data;
++
++ return nfsd_file_inode_hash(nf->nf_inode, seed);
++}
++
++static bool
++nfsd_match_cred(const struct cred *c1, const struct cred *c2)
++{
++ int i;
++
++ if (!uid_eq(c1->fsuid, c2->fsuid))
++ return false;
++ if (!gid_eq(c1->fsgid, c2->fsgid))
++ return false;
++ if (c1->group_info == NULL || c2->group_info == NULL)
++ return c1->group_info == c2->group_info;
++ if (c1->group_info->ngroups != c2->group_info->ngroups)
++ return false;
++ for (i = 0; i < c1->group_info->ngroups; i++) {
++ if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
++ return false;
++ }
++ return true;
++}
++
++/**
++ * nfsd_file_obj_cmpfn - Match a cache item against search criteria
++ * @arg: search criteria
++ * @ptr: cache item to check
++ *
++ * Return values:
++ * %0 - Item matches search criteria
++ * %1 - Item does not match search criteria
++ */
++static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
++ const void *ptr)
++{
++ const struct nfsd_file_lookup_key *key = arg->key;
++ const struct nfsd_file *nf = ptr;
++
++ switch (key->type) {
++ case NFSD_FILE_KEY_INODE:
++ if (nf->nf_inode != key->inode)
++ return 1;
++ break;
++ case NFSD_FILE_KEY_FULL:
++ if (nf->nf_inode != key->inode)
++ return 1;
++ if (nf->nf_may != key->need)
++ return 1;
++ if (nf->nf_net != key->net)
++ return 1;
++ if (!nfsd_match_cred(nf->nf_cred, key->cred))
++ return 1;
++ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
++ return 1;
++ break;
++ }
++ return 0;
++}
++
++static const struct rhashtable_params nfsd_file_rhash_params = {
++ .key_len = sizeof_field(struct nfsd_file, nf_inode),
++ .key_offset = offsetof(struct nfsd_file, nf_inode),
++ .head_offset = offsetof(struct nfsd_file, nf_rhash),
++ .hashfn = nfsd_file_key_hashfn,
++ .obj_hashfn = nfsd_file_obj_hashfn,
++ .obj_cmpfn = nfsd_file_obj_cmpfn,
++ /* Reduce resizing churn on light workloads */
++ .min_size = 512, /* buckets */
++ .automatic_shrinking = true,
++};
+
+ static void
+ nfsd_file_schedule_laundrette(void)
+@@ -693,13 +824,18 @@ static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+ int
+ nfsd_file_cache_init(void)
+ {
+- int ret = -ENOMEM;
++ int ret;
+ unsigned int i;
+
+ lockdep_assert_held(&nfsd_mutex);
+ if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+ return 0;
+
++ ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
++ if (ret)
++ return ret;
++
++ ret = -ENOMEM;
+ nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
+ if (!nfsd_filecache_wq)
+ goto out;
+@@ -777,6 +913,7 @@ nfsd_file_cache_init(void)
+ nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
++ rhashtable_destroy(&nfsd_file_rhash_tbl);
+ goto out;
+ }
+
+@@ -902,6 +1039,7 @@ nfsd_file_cache_shutdown(void)
+ nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
++ rhashtable_destroy(&nfsd_file_rhash_tbl);
+
+ for_each_possible_cpu(i) {
+ per_cpu(nfsd_file_cache_hits, i) = 0;
+@@ -913,26 +1051,6 @@ nfsd_file_cache_shutdown(void)
+ }
+ }
+
+-static bool
+-nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+-{
+- int i;
+-
+- if (!uid_eq(c1->fsuid, c2->fsuid))
+- return false;
+- if (!gid_eq(c1->fsgid, c2->fsgid))
+- return false;
+- if (c1->group_info == NULL || c2->group_info == NULL)
+- return c1->group_info == c2->group_info;
+- if (c1->group_info->ngroups != c2->group_info->ngroups)
+- return false;
+- for (i = 0; i < c1->group_info->ngroups; i++) {
+- if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+- return false;
+- }
+- return true;
+-}
+-
+ static struct nfsd_file *
+ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+ unsigned int hashval, struct net *net)
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 82051e1b8420d..5cbfc61a7d7d9 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -29,6 +29,7 @@ struct nfsd_file_mark {
+ * never be dereferenced, only used for comparison.
+ */
+ struct nfsd_file {
++ struct rhash_head nf_rhash;
+ struct hlist_node nf_node;
+ struct list_head nf_lru;
+ struct rcu_head nf_rcu;
+--
+2.43.0
+
--- /dev/null
+From 4186b039edc3da567f1f9b8487a077c8fea33311 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 14:19:27 -0700
+Subject: NFSD: Show state of courtesy client in client info
+
+From: Dai Ngo <dai.ngo@oracle.com>
+
+[ Upstream commit e9488d5ae13c0a72223c507e2508dc2ac66cad4f ]
+
+Update client_info_show to show state of courtesy client
+and seconds since last renew.
+
+Reviewed-by: J. Bruce Fields <bfields@fieldses.org>
+Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 447faa4348227..5bbf769b688bc 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2494,10 +2494,17 @@ static int client_info_show(struct seq_file *m, void *v)
+ memcpy(&clid, &clp->cl_clientid, sizeof(clid));
+ seq_printf(m, "clientid: 0x%llx\n", clid);
+ seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
+- if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
++
++ if (clp->cl_state == NFSD4_COURTESY)
++ seq_puts(m, "status: courtesy\n");
++ else if (clp->cl_state == NFSD4_EXPIRABLE)
++ seq_puts(m, "status: expirable\n");
++ else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
+ seq_puts(m, "status: confirmed\n");
+ else
+ seq_puts(m, "status: unconfirmed\n");
++ seq_printf(m, "seconds from last renew: %lld\n",
++ ktime_get_boottime_seconds() - clp->cl_time);
+ seq_printf(m, "name: ");
+ seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
+ seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
+--
+2.43.0
+
--- /dev/null
+From 82e27493f46eee4d63a3aa14ebeaedb510239ff7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:22 -0400
+Subject: NFSD: Shrink size of struct nfsd4_copy
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 87689df694916c40e8e6c179ab1c8710f65cb6c6 ]
+
+struct nfsd4_copy is part of struct nfsd4_op, which resides in an
+8-element array.
+
+sizeof(struct nfsd4_op):
+Before: /* size: 1696, cachelines: 27, members: 5 */
+After: /* size: 672, cachelines: 11, members: 5 */
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 8 ++++++--
+ fs/nfsd/nfs4xdr.c | 5 ++++-
+ fs/nfsd/xdr4.h | 2 +-
+ 3 files changed, 11 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 80ec51a89d5b5..f0722d4ed0810 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1291,6 +1291,7 @@ void nfs4_put_copy(struct nfsd4_copy *copy)
+ {
+ if (!refcount_dec_and_test(©->refcount))
+ return;
++ kfree(copy->cp_src);
+ kfree(copy);
+ }
+
+@@ -1545,7 +1546,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ if (status)
+ goto out;
+
+- status = nfsd4_interssc_connect(©->cp_src, rqstp, mount);
++ status = nfsd4_interssc_connect(copy->cp_src, rqstp, mount);
+ if (status)
+ goto out;
+
+@@ -1753,7 +1754,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+ dst->nf_src = nfsd_file_get(src->nf_src);
+
+ memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
+- memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server));
++ memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server));
+ memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
+ memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
+ dst->ss_mnt = src->ss_mnt;
+@@ -1847,6 +1848,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
+ if (!async_copy)
+ goto out_err;
++ async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
++ if (!async_copy->cp_src)
++ goto out_err;
+ if (!nfs4_init_copy_state(nn, copy))
+ goto out_err;
+ refcount_set(&async_copy->refcount, 1);
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 515edc1b662e1..dad744d44b0a2 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -1920,6 +1920,9 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
++ copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src));
++ if (copy->cp_src == NULL)
++ return nfserr_jukebox;
+ copy->cp_intra = false;
+ if (count == 0) { /* intra-server copy */
+ copy->cp_intra = true;
+@@ -1927,7 +1930,7 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+ }
+
+ /* decode all the supplied server addresses but use only the first */
+- status = nfsd4_decode_nl4_server(argp, ©->cp_src);
++ status = nfsd4_decode_nl4_server(argp, copy->cp_src);
+ if (status)
+ return status;
+
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 6673c2980c77e..ed90843a55293 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -540,7 +540,7 @@ struct nfsd4_copy {
+ u64 cp_src_pos;
+ u64 cp_dst_pos;
+ u64 cp_count;
+- struct nl4_server cp_src;
++ struct nl4_server *cp_src;
+ bool cp_intra;
+
+ /* both */
+--
+2.43.0
+
--- /dev/null
+From 1f65c5119eef9b066524db099f5d5439585202a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 14:40:16 -0400
+Subject: NFSD: Shrink size of struct nfsd4_copy_notify
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 09426ef2a64ee189ca1e3298f1e874842dbf35ea ]
+
+struct nfsd4_copy_notify is part of struct nfsd4_op, which resides
+in an 8-element array.
+
+sizeof(struct nfsd4_op):
+Before: /* size: 2208, cachelines: 35, members: 5 */
+After: /* size: 1696, cachelines: 27, members: 5 */
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 4 ++--
+ fs/nfsd/nfs4xdr.c | 12 ++++++++++--
+ fs/nfsd/xdr4.h | 4 ++--
+ 3 files changed, 14 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 5dce18fe99085..80ec51a89d5b5 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -1947,9 +1947,9 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ /* For now, only return one server address in cpn_src, the
+ * address used by the client to connect to this server.
+ */
+- cn->cpn_src.nl4_type = NL4_NETADDR;
++ cn->cpn_src->nl4_type = NL4_NETADDR;
+ status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
+- &cn->cpn_src.u.nl4_addr);
++ &cn->cpn_src->u.nl4_addr);
+ WARN_ON_ONCE(status);
+ if (status) {
+ nfs4_put_cpntf_state(nn, cps);
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index fb891249694c3..515edc1b662e1 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -1952,10 +1952,17 @@ nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+ {
+ __be32 status;
+
++ cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src));
++ if (cn->cpn_src == NULL)
++ return nfserr_jukebox;
++ cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst));
++ if (cn->cpn_dst == NULL)
++ return nfserr_jukebox;
++
+ status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid);
+ if (status)
+ return status;
+- return nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
++ return nfsd4_decode_nl4_server(argp, cn->cpn_dst);
+ }
+
+ static __be32
+@@ -4903,7 +4910,8 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ *p++ = cpu_to_be32(1);
+
+- return nfsd42_encode_nl4_server(resp, &cn->cpn_src);
++ nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src);
++ return nfserr;
+ }
+
+ static __be32
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 0737f81c1004e..6673c2980c77e 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -595,13 +595,13 @@ struct nfsd4_offload_status {
+ struct nfsd4_copy_notify {
+ /* request */
+ stateid_t cpn_src_stateid;
+- struct nl4_server cpn_dst;
++ struct nl4_server *cpn_dst;
+
+ /* response */
+ stateid_t cpn_cnr_stateid;
+ u64 cpn_sec;
+ u32 cpn_nsec;
+- struct nl4_server cpn_src;
++ struct nl4_server *cpn_src;
+ };
+
+ struct nfsd4_op {
+--
+2.43.0
+
--- /dev/null
+From c6991c4afe34f0fdec2660e3c9e4cb989d0e62e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 08:39:23 -0400
+Subject: nfsd: silence extraneous printk on nfsd.ko insertion
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 3a5940bfa17fb9964bf9688b4356ca643a8f5e2d ]
+
+This printk pops every time nfsd.ko gets plugged in. Most kmods don't do
+that and this one is not very informative. Olaf's email address seems to
+be defunct at this point anyway. Just drop it.
+
+Cc: Olaf Kirch <okir@suse.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 164c822ae3ae9..917fa1892fd2d 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1519,7 +1519,6 @@ static struct pernet_operations nfsd_net_ops = {
+ static int __init init_nfsd(void)
+ {
+ int retval;
+- printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
+
+ retval = nfsd4_init_slabs();
+ if (retval)
+--
+2.43.0
+
--- /dev/null
+From 909a929b48ad248d73e3d6697eb9e473c05ac4aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 16:48:31 +1000
+Subject: nfsd: Simplify code around svc_exit_thread() call in nfsd()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 18e4cf915543257eae2925671934937163f5639b ]
+
+Previously a thread could exit asynchronously (due to a signal) so some
+care was needed to hold nfsd_mutex over the last svc_put() call. Now a
+thread can only exit when svc_set_num_threads() is called, and this is
+always called under nfsd_mutex. So no care is needed.
+
+Not only is the mutex held when a thread exits now, but the svc refcount
+is elevated, so the svc_put() in svc_exit_thread() will never be a final
+put, so the mutex isn't even needed at this point in the code.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 23 -----------------------
+ include/linux/sunrpc/svc.h | 13 -------------
+ 2 files changed, 36 deletions(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 8063fab2c0279..8907dba22c3f2 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -979,31 +979,8 @@ nfsd(void *vrqstp)
+ atomic_dec(&nfsdstats.th_cnt);
+
+ out:
+- /* Take an extra ref so that the svc_put in svc_exit_thread()
+- * doesn't call svc_destroy()
+- */
+- svc_get(nn->nfsd_serv);
+-
+ /* Release the thread */
+ svc_exit_thread(rqstp);
+-
+- /* We need to drop a ref, but may not drop the last reference
+- * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
+- * could deadlock with nfsd_shutdown_threads() waiting for us.
+- * So three options are:
+- * - drop a non-final reference,
+- * - get the mutex without waiting
+- * - sleep briefly andd try the above again
+- */
+- while (!svc_put_not_last(nn->nfsd_serv)) {
+- if (mutex_trylock(&nfsd_mutex)) {
+- nfsd_put(net);
+- mutex_unlock(&nfsd_mutex);
+- break;
+- }
+- msleep(20);
+- }
+-
+ return 0;
+ }
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 53405c282209f..6e48c1c88f1bb 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -123,19 +123,6 @@ static inline void svc_put(struct svc_serv *serv)
+ kref_put(&serv->sv_refcnt, svc_destroy);
+ }
+
+-/**
+- * svc_put_not_last - decrement non-final reference count on SUNRPC serv
+- * @serv: the svc_serv to have count decremented
+- *
+- * Returns: %true is refcount was decremented.
+- *
+- * If the refcount is 1, it is not decremented and instead failure is reported.
+- */
+-static inline bool svc_put_not_last(struct svc_serv *serv)
+-{
+- return refcount_dec_not_one(&serv->sv_refcnt.refcount);
+-}
+-
+ /*
+ * Maximum payload size supported by a kernel RPC server.
+ * This is use to determine the max number of pages nfsd is
+--
+2.43.0
+
--- /dev/null
+From 9b3db1e18b4fb74efea1110fbb757aaa240559c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: NFSD: simplify locking for network notifier.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit d057cfec4940ce6eeffa22b4a71dec203b06cd55 ]
+
+nfsd currently maintains an open-coded read/write semaphore (refcount
+and wait queue) for each network namespace to ensure the nfs service
+isn't shut down while the notifier is running.
+
+This is excessive. As there is unlikely to be contention between
+notifiers and they run without sleeping, a single spinlock is sufficient
+to avoid problems.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+[ cel: ensure nfsd_notifier_lock is static ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/netns.h | 3 ---
+ fs/nfsd/nfsctl.c | 2 --
+ fs/nfsd/nfssvc.c | 38 ++++++++++++++++++++------------------
+ 3 files changed, 20 insertions(+), 23 deletions(-)
+
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 1fd59eb0730bb..021acdc0d03bb 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -131,9 +131,6 @@ struct nfsd_net {
+ */
+ int keep_active;
+
+- wait_queue_head_t ntf_wq;
+- atomic_t ntf_refcnt;
+-
+ /*
+ * clientid and stateid data for construction of net unique COPY
+ * stateids.
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 53076c5afe62c..504b169d27881 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1484,8 +1484,6 @@ static __net_init int nfsd_init_net(struct net *net)
+ nn->clientid_counter = nn->clientid_base + 1;
+ nn->s2s_cp_cl_id = nn->clientid_counter++;
+
+- atomic_set(&nn->ntf_refcnt, 0);
+- init_waitqueue_head(&nn->ntf_wq);
+ seqlock_init(&nn->boot_lock);
+
+ return 0;
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 8d49dfbe03f85..8554bc7ff4322 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -434,6 +434,7 @@ static void nfsd_shutdown_net(struct net *net)
+ nfsd_shutdown_generic();
+ }
+
++static DEFINE_SPINLOCK(nfsd_notifier_lock);
+ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+ {
+@@ -443,18 +444,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct sockaddr_in sin;
+
+- if ((event != NETDEV_DOWN) ||
+- !atomic_inc_not_zero(&nn->ntf_refcnt))
++ if (event != NETDEV_DOWN || !nn->nfsd_serv)
+ goto out;
+
++ spin_lock(&nfsd_notifier_lock);
+ if (nn->nfsd_serv) {
+ dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ifa->ifa_local;
+ svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
+ }
+- atomic_dec(&nn->ntf_refcnt);
+- wake_up(&nn->ntf_wq);
++ spin_unlock(&nfsd_notifier_lock);
+
+ out:
+ return NOTIFY_DONE;
+@@ -474,10 +474,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct sockaddr_in6 sin6;
+
+- if ((event != NETDEV_DOWN) ||
+- !atomic_inc_not_zero(&nn->ntf_refcnt))
++ if (event != NETDEV_DOWN || !nn->nfsd_serv)
+ goto out;
+
++ spin_lock(&nfsd_notifier_lock);
+ if (nn->nfsd_serv) {
+ dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
+ sin6.sin6_family = AF_INET6;
+@@ -486,8 +486,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
+ svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
+ }
+- atomic_dec(&nn->ntf_refcnt);
+- wake_up(&nn->ntf_wq);
++ spin_unlock(&nfsd_notifier_lock);
++
+ out:
+ return NOTIFY_DONE;
+ }
+@@ -504,7 +504,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+- atomic_dec(&nn->ntf_refcnt);
+ /* check if the notifier still has clients */
+ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
+ unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
+@@ -512,7 +511,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+ unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
+ #endif
+ }
+- wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);
+
+ /*
+ * write_ports can create the server without actually starting
+@@ -624,6 +622,7 @@ int nfsd_create_serv(struct net *net)
+ {
+ int error;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++ struct svc_serv *serv;
+
+ WARN_ON(!mutex_is_locked(&nfsd_mutex));
+ if (nn->nfsd_serv) {
+@@ -633,21 +632,23 @@ int nfsd_create_serv(struct net *net)
+ if (nfsd_max_blksize == 0)
+ nfsd_max_blksize = nfsd_get_default_max_blksize();
+ nfsd_reset_versions(nn);
+- nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+- &nfsd_thread_sv_ops);
+- if (nn->nfsd_serv == NULL)
++ serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
++ &nfsd_thread_sv_ops);
++ if (serv == NULL)
+ return -ENOMEM;
+
+- nn->nfsd_serv->sv_maxconn = nn->max_connections;
+- error = svc_bind(nn->nfsd_serv, net);
++ serv->sv_maxconn = nn->max_connections;
++ error = svc_bind(serv, net);
+ if (error < 0) {
+ /* NOT nfsd_put() as notifiers (see below) haven't
+ * been set up yet.
+ */
+- svc_put(nn->nfsd_serv);
+- nn->nfsd_serv = NULL;
++ svc_put(serv);
+ return error;
+ }
++ spin_lock(&nfsd_notifier_lock);
++ nn->nfsd_serv = serv;
++ spin_unlock(&nfsd_notifier_lock);
+
+ set_max_drc();
+ /* check if the notifier is already set */
+@@ -657,7 +658,6 @@ int nfsd_create_serv(struct net *net)
+ register_inet6addr_notifier(&nfsd_inet6addr_notifier);
+ #endif
+ }
+- atomic_inc(&nn->ntf_refcnt);
+ nfsd_reset_boot_verifier(nn);
+ return 0;
+ }
+@@ -701,7 +701,9 @@ void nfsd_put(struct net *net)
+ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
+ svc_shutdown_net(nn->nfsd_serv, net);
+ svc_destroy(&nn->nfsd_serv->sv_refcnt);
++ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
++ spin_unlock(&nfsd_notifier_lock);
+ }
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 423daa728097bda27d24c58c5cb4ab672b97ced1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Dec 2021 10:58:14 +1100
+Subject: NFSD: simplify per-net file cache management
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 1463b38e7cf34d4cc60f41daff459ad807b2e408 ]
+
+We currently have a 'laundrette' for closing cached files - a different
+work-item for each network-namespace.
+
+These 'laundrettes' (aka struct nfsd_fcache_disposal) are currently on a
+list, and are freed using rcu.
+
+The list is not necessary as we have a per-namespace structure (struct
+nfsd_net) which can hold a link to the nfsd_fcache_disposal.
+The use of kfree_rcu is also unnecessary as the cache is cleaned of all
+files associated with a given namespace, and no new files can be added,
+before the nfsd_fcache_disposal is freed.
+
+So add a '->fcache_disposal' link to nfsd_net, and discard the list
+management and rcu usage.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 76 +++++++++------------------------------------
+ fs/nfsd/netns.h | 2 ++
+ 2 files changed, 17 insertions(+), 61 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index fbc0628c599af..b99852b30308a 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -44,12 +44,9 @@ struct nfsd_fcache_bucket {
+ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+
+ struct nfsd_fcache_disposal {
+- struct list_head list;
+ struct work_struct work;
+- struct net *net;
+ spinlock_t lock;
+ struct list_head freeme;
+- struct rcu_head rcu;
+ };
+
+ static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
+@@ -62,8 +59,6 @@ static long nfsd_file_lru_flags;
+ static struct fsnotify_group *nfsd_file_fsnotify_group;
+ static atomic_long_t nfsd_filecache_count;
+ static struct delayed_work nfsd_filecache_laundrette;
+-static DEFINE_SPINLOCK(laundrette_lock);
+-static LIST_HEAD(laundrettes);
+
+ static void nfsd_file_gc(void);
+
+@@ -360,19 +355,13 @@ nfsd_file_list_remove_disposal(struct list_head *dst,
+ static void
+ nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+ {
+- struct nfsd_fcache_disposal *l;
++ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+
+- rcu_read_lock();
+- list_for_each_entry_rcu(l, &laundrettes, list) {
+- if (l->net == net) {
+- spin_lock(&l->lock);
+- list_splice_tail_init(files, &l->freeme);
+- spin_unlock(&l->lock);
+- queue_work(nfsd_filecache_wq, &l->work);
+- break;
+- }
+- }
+- rcu_read_unlock();
++ spin_lock(&l->lock);
++ list_splice_tail_init(files, &l->freeme);
++ spin_unlock(&l->lock);
++ queue_work(nfsd_filecache_wq, &l->work);
+ }
+
+ static void
+@@ -748,7 +737,7 @@ nfsd_file_cache_purge(struct net *net)
+ }
+
+ static struct nfsd_fcache_disposal *
+-nfsd_alloc_fcache_disposal(struct net *net)
++nfsd_alloc_fcache_disposal(void)
+ {
+ struct nfsd_fcache_disposal *l;
+
+@@ -756,7 +745,6 @@ nfsd_alloc_fcache_disposal(struct net *net)
+ if (!l)
+ return NULL;
+ INIT_WORK(&l->work, nfsd_file_delayed_close);
+- l->net = net;
+ spin_lock_init(&l->lock);
+ INIT_LIST_HEAD(&l->freeme);
+ return l;
+@@ -765,61 +753,27 @@ nfsd_alloc_fcache_disposal(struct net *net)
+ static void
+ nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
+ {
+- rcu_assign_pointer(l->net, NULL);
+ cancel_work_sync(&l->work);
+ nfsd_file_dispose_list(&l->freeme);
+- kfree_rcu(l, rcu);
+-}
+-
+-static void
+-nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
+-{
+- spin_lock(&laundrette_lock);
+- list_add_tail_rcu(&l->list, &laundrettes);
+- spin_unlock(&laundrette_lock);
+-}
+-
+-static void
+-nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
+-{
+- spin_lock(&laundrette_lock);
+- list_del_rcu(&l->list);
+- spin_unlock(&laundrette_lock);
+-}
+-
+-static int
+-nfsd_alloc_fcache_disposal_net(struct net *net)
+-{
+- struct nfsd_fcache_disposal *l;
+-
+- l = nfsd_alloc_fcache_disposal(net);
+- if (!l)
+- return -ENOMEM;
+- nfsd_add_fcache_disposal(l);
+- return 0;
++ kfree(l);
+ }
+
+ static void
+ nfsd_free_fcache_disposal_net(struct net *net)
+ {
+- struct nfsd_fcache_disposal *l;
++ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+
+- rcu_read_lock();
+- list_for_each_entry_rcu(l, &laundrettes, list) {
+- if (l->net != net)
+- continue;
+- nfsd_del_fcache_disposal(l);
+- rcu_read_unlock();
+- nfsd_free_fcache_disposal(l);
+- return;
+- }
+- rcu_read_unlock();
++ nfsd_free_fcache_disposal(l);
+ }
+
+ int
+ nfsd_file_cache_start_net(struct net *net)
+ {
+- return nfsd_alloc_fcache_disposal_net(net);
++ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
++
++ nn->fcache_disposal = nfsd_alloc_fcache_disposal();
++ return nn->fcache_disposal ? 0 : -ENOMEM;
+ }
+
+ void
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 021acdc0d03bb..9e8b77d2a3a47 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -185,6 +185,8 @@ struct nfsd_net {
+
+ /* utsname taken from the process that starts the server */
+ char nfsd_name[UNX_MAXNODENAME+1];
++
++ struct nfsd_fcache_disposal *fcache_disposal;
+ };
+
+ /* Simple check to find out if a given net was properly initialized */
+--
+2.43.0
+
--- /dev/null
+From e7a1570110078fc46c51741d5156262ae2fb4cba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Sep 2022 14:01:51 -0400
+Subject: NFSD: Simplify READ_PLUS
+
+From: Anna Schumaker <Anna.Schumaker@Netapp.com>
+
+[ Upstream commit eeadcb75794516839078c28b3730132aeb700ce6 ]
+
+Chuck had suggested reverting READ_PLUS so it returns a single DATA
+segment covering the requested read range. This prepares the server for
+a future "sparse read" function so support can easily be added without
+needing to rip out the old READ_PLUS code at the same time.
+
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 139 +++++++++++-----------------------------------
+ 1 file changed, 32 insertions(+), 107 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 76028a5c81d1d..c2457a9ac00aa 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -4778,79 +4778,37 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
+
+ static __be32
+ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
+- struct nfsd4_read *read,
+- unsigned long *maxcount, u32 *eof,
+- loff_t *pos)
++ struct nfsd4_read *read)
+ {
+- struct xdr_stream *xdr = resp->xdr;
++ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
+ struct file *file = read->rd_nf->nf_file;
+- int starting_len = xdr->buf->len;
+- loff_t hole_pos;
+- __be32 nfserr;
+- __be32 *p, tmp;
+- __be64 tmp64;
+-
+- hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
+- if (hole_pos > read->rd_offset)
+- *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
+- *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
++ struct xdr_stream *xdr = resp->xdr;
++ unsigned long maxcount;
++ __be32 nfserr, *p;
+
+ /* Content type, offset, byte count */
+ p = xdr_reserve_space(xdr, 4 + 8 + 4);
+ if (!p)
+- return nfserr_resource;
++ return nfserr_io;
++ if (resp->xdr->buf->page_len && splice_ok) {
++ WARN_ON_ONCE(splice_ok);
++ return nfserr_serverfault;
++ }
+
+- read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
+- if (read->rd_vlen < 0)
+- return nfserr_resource;
++ maxcount = min_t(unsigned long, read->rd_length,
++ (xdr->buf->buflen - xdr->buf->len));
+
+- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
+- resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
++ if (file->f_op->splice_read && splice_ok)
++ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
++ else
++ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+ if (nfserr)
+ return nfserr;
+- xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
+-
+- tmp = htonl(NFS4_CONTENT_DATA);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
+- tmp64 = cpu_to_be64(read->rd_offset);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
+- tmp = htonl(*maxcount);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
+-
+- tmp = xdr_zero;
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
+- xdr_pad_size(*maxcount));
+- return nfs_ok;
+-}
+-
+-static __be32
+-nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
+- struct nfsd4_read *read,
+- unsigned long *maxcount, u32 *eof)
+-{
+- struct file *file = read->rd_nf->nf_file;
+- loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
+- loff_t f_size = i_size_read(file_inode(file));
+- unsigned long count;
+- __be32 *p;
+-
+- if (data_pos == -ENXIO)
+- data_pos = f_size;
+- else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
+- return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
+- count = data_pos - read->rd_offset;
+
+- /* Content type, offset, byte count */
+- p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
+- if (!p)
+- return nfserr_resource;
+-
+- *p++ = htonl(NFS4_CONTENT_HOLE);
++ *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
+ p = xdr_encode_hyper(p, read->rd_offset);
+- p = xdr_encode_hyper(p, count);
++ *p = cpu_to_be32(read->rd_length);
+
+- *eof = (read->rd_offset + count) >= f_size;
+- *maxcount = min_t(unsigned long, count, *maxcount);
+ return nfs_ok;
+ }
+
+@@ -4858,69 +4816,36 @@ static __be32
+ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_read *read)
+ {
+- unsigned long maxcount, count;
++ struct file *file = read->rd_nf->nf_file;
+ struct xdr_stream *xdr = resp->xdr;
+- struct file *file;
+ int starting_len = xdr->buf->len;
+- int last_segment = xdr->buf->len;
+- int segments = 0;
+- __be32 *p, tmp;
+- bool is_data;
+- loff_t pos;
+- u32 eof;
++ u32 segments = 0;
++ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+- file = read->rd_nf->nf_file;
+
+ /* eof flag, segment count */
+ p = xdr_reserve_space(xdr, 4 + 4);
+ if (!p)
+- return nfserr_resource;
++ return nfserr_io;
+ xdr_commit_encode(xdr);
+
+- maxcount = min_t(unsigned long, read->rd_length,
+- (xdr->buf->buflen - xdr->buf->len));
+- count = maxcount;
+-
+- eof = read->rd_offset >= i_size_read(file_inode(file));
+- if (eof)
++ read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
++ if (read->rd_eof)
+ goto out;
+
+- pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
+- is_data = pos > read->rd_offset;
+-
+- while (count > 0 && !eof) {
+- maxcount = count;
+- if (is_data)
+- nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
+- segments == 0 ? &pos : NULL);
+- else
+- nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
+- if (nfserr)
+- goto out;
+- count -= maxcount;
+- read->rd_offset += maxcount;
+- is_data = !is_data;
+- last_segment = xdr->buf->len;
+- segments++;
+- }
+-
+-out:
+- if (nfserr && segments == 0)
++ nfserr = nfsd4_encode_read_plus_data(resp, read);
++ if (nfserr) {
+ xdr_truncate_encode(xdr, starting_len);
+- else {
+- if (nfserr) {
+- xdr_truncate_encode(xdr, last_segment);
+- nfserr = nfs_ok;
+- eof = 0;
+- }
+- tmp = htonl(eof);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
+- tmp = htonl(segments);
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
++ return nfserr;
+ }
+
++ segments++;
++
++out:
++ p = xdr_encode_bool(p, read->rd_eof);
++ *p = cpu_to_be32(segments);
+ return nfserr;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 5ecbeb3dfc1f8599cd64a1ca096f1fc9e2af08c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:09:10 -0400
+Subject: NFSD: Simplify starting_len
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 071ae99feadfc55979f89287d6ad2c6a315cb46d ]
+
+Clean-up: Now that nfsd4_encode_readv() does not have to encode the
+EOF or rd_length values, it no longer needs to subtract 8 from
+@starting_len.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index b7a3c770d436b..310321b9b94cd 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3947,7 +3947,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ struct file *file, unsigned long maxcount)
+ {
+ struct xdr_stream *xdr = resp->xdr;
+- int starting_len = xdr->buf->len - 8;
++ unsigned int starting_len = xdr->buf->len;
+ __be32 nfserr;
+ __be32 tmp;
+ int pad;
+@@ -3962,14 +3962,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ read->rd_length = maxcount;
+ if (nfserr)
+ return nfserr;
+- if (svc_encode_result_payload(resp->rqstp, starting_len + 8, maxcount))
++ if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount))
+ return nfserr_io;
+- xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount));
++ xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount));
+
+ tmp = xdr_zero;
+ pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
+- &tmp, pad);
++ write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &tmp, pad);
+ return 0;
+
+ }
+--
+2.43.0
+
--- /dev/null
+From ffa5ab118d006ec0b5b23e2d81502949f1272e3c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Sep 2021 11:16:32 +1000
+Subject: NFSD: simplify struct nfsfh
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit d8b26071e65e80a348602b939e333242f989221b ]
+
+Most of the fields in 'struct knfsd_fh' are 2 levels deep (a union and a
+struct) and are accessed using macros like:
+
+ #define fh_FOO fh_base.fh_new.fb_FOO
+
+This patch makes the union and struct anonymous, so that "fh_FOO" can be
+a name directly within 'struct knfsd_fh' and the #defines aren't needed.
+
+The file handle as a whole is sometimes accessed as "fh_base" or
+"fh_base.fh_pad", neither of which are particularly helpful names.
+As the struct holding the filehandle is now anonymous, we
+cannot use the name of that, so we union it with 'fh_raw' and use that
+where the raw filehandle is needed. fh_raw also ensure the structure is
+large enough for the largest possible filehandle.
+
+fh_raw is a 'char' array, removing any need to cast it for memcpy etc.
+
+SVCFH_fmt() is simplified using the "%ph" printk format. This
+changes the appearance of filehandles in dprintk() debugging, making
+them a little more precise.
+
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/flexfilelayout.c | 2 +-
+ fs/nfsd/lockd.c | 2 +-
+ fs/nfsd/nfs3xdr.c | 4 ++--
+ fs/nfsd/nfs4callback.c | 2 +-
+ fs/nfsd/nfs4proc.c | 4 ++--
+ fs/nfsd/nfs4state.c | 4 ++--
+ fs/nfsd/nfs4xdr.c | 4 ++--
+ fs/nfsd/nfsctl.c | 6 ++---
+ fs/nfsd/nfsfh.c | 13 ++++-------
+ fs/nfsd/nfsfh.h | 50 ++++++++++++----------------------------
+ fs/nfsd/nfsxdr.c | 4 ++--
+ 11 files changed, 35 insertions(+), 60 deletions(-)
+
+diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
+index db7ef07ae50c9..2e2f1d5e9f623 100644
+--- a/fs/nfsd/flexfilelayout.c
++++ b/fs/nfsd/flexfilelayout.c
+@@ -61,7 +61,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
+ goto out_error;
+
+ fl->fh.size = fhp->fh_handle.fh_size;
+- memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size);
++ memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size);
+
+ /* Give whole file layout segments */
+ seg->offset = 0;
+diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
+index 606fa155c28ad..46a7f9b813e52 100644
+--- a/fs/nfsd/lockd.c
++++ b/fs/nfsd/lockd.c
+@@ -35,7 +35,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
+ /* must initialize before using! but maxsize doesn't matter */
+ fh_init(&fh,0);
+ fh.fh_handle.fh_size = f->size;
+- memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
++ memcpy(&fh.fh_handle.fh_raw, f->data, f->size);
+ fh.fh_export = NULL;
+
+ access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 48d4f99b7f901..c69d0dc50a669 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -92,7 +92,7 @@ svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp)
+ return false;
+ fh_init(fhp, NFS3_FHSIZE);
+ fhp->fh_handle.fh_size = size;
+- memcpy(&fhp->fh_handle.fh_base, p, size);
++ memcpy(&fhp->fh_handle.fh_raw, p, size);
+
+ return true;
+ }
+@@ -131,7 +131,7 @@ svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp)
+ *p++ = cpu_to_be32(size);
+ if (size)
+ p[XDR_QUADLEN(size) - 1] = 0;
+- memcpy(p, &fhp->fh_handle.fh_base, size);
++ memcpy(p, &fhp->fh_handle.fh_raw, size);
+
+ return true;
+ }
+diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
+index 97f517e9b4189..e1272a7f45220 100644
+--- a/fs/nfsd/nfs4callback.c
++++ b/fs/nfsd/nfs4callback.c
+@@ -121,7 +121,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
+
+ BUG_ON(length > NFS4_FHSIZE);
+ p = xdr_reserve_space(xdr, 4 + length);
+- xdr_encode_opaque(p, &fh->fh_base, length);
++ xdr_encode_opaque(p, &fh->fh_raw, length);
+ }
+
+ /*
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index f71af990e1e81..f5ac637b6e83d 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -519,7 +519,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+
+ fh_put(&cstate->current_fh);
+ cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
+- memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
++ memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval,
+ putfh->pf_fhlen);
+ ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+ #ifdef CONFIG_NFSD_V4_2_INTER_SSC
+@@ -1383,7 +1383,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ s_fh = &cstate->save_fh;
+
+ copy->c_fh.size = s_fh->fh_handle.fh_size;
+- memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size);
++ memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size);
+ copy->stateid.seqid = cpu_to_be32(s_stid->si_generation);
+ memcpy(copy->stateid.other, (void *)&s_stid->si_opaque,
+ sizeof(stateid_opaque_t));
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 9b660491f3931..26c4212bcfcde 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1022,7 +1022,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
+ }
+ spin_unlock(&blocked_delegations_lock);
+ }
+- hash = jhash(&fh->fh_base, fh->fh_size, 0);
++ hash = jhash(&fh->fh_raw, fh->fh_size, 0);
+ if (test_bit(hash&255, bd->set[0]) &&
+ test_bit((hash>>8)&255, bd->set[0]) &&
+ test_bit((hash>>16)&255, bd->set[0]))
+@@ -1041,7 +1041,7 @@ static void block_delegations(struct knfsd_fh *fh)
+ u32 hash;
+ struct bloom_pair *bd = &blocked_delegations;
+
+- hash = jhash(&fh->fh_base, fh->fh_size, 0);
++ hash = jhash(&fh->fh_raw, fh->fh_size, 0);
+
+ spin_lock(&blocked_delegations_lock);
+ __set_bit(hash&255, bd->set[bd->new]);
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index d28b75909de89..1474af184368d 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3104,7 +3104,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4);
+ if (!p)
+ goto out_resource;
+- p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base,
++ p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw,
+ fhp->fh_handle.fh_size);
+ }
+ if (bmval0 & FATTR4_WORD0_FILEID) {
+@@ -3675,7 +3675,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
+ p = xdr_reserve_space(xdr, len + 4);
+ if (!p)
+ return nfserr_resource;
+- p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);
++ p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len);
+ return 0;
+ }
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index cb73c12925629..d0761ca8cb542 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -395,12 +395,12 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
+ auth_domain_put(dom);
+ if (len)
+ return len;
+-
++
+ mesg = buf;
+ len = SIMPLE_TRANSACTION_LIMIT;
+- qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size);
++ qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size);
+ mesg[-1] = '\n';
+- return mesg - buf;
++ return mesg - buf;
+ }
+
+ /*
+diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
+index 149f9bbc48a4e..f3779fa72c896 100644
+--- a/fs/nfsd/nfsfh.c
++++ b/fs/nfsd/nfsfh.c
+@@ -640,16 +640,11 @@ fh_put(struct svc_fh *fhp)
+ char * SVCFH_fmt(struct svc_fh *fhp)
+ {
+ struct knfsd_fh *fh = &fhp->fh_handle;
++ static char buf[2+1+1+64*3+1];
+
+- static char buf[80];
+- sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
+- fh->fh_size,
+- fh->fh_base.fh_pad[0],
+- fh->fh_base.fh_pad[1],
+- fh->fh_base.fh_pad[2],
+- fh->fh_base.fh_pad[3],
+- fh->fh_base.fh_pad[4],
+- fh->fh_base.fh_pad[5]);
++ if (fh->fh_size < 0 || fh->fh_size> 64)
++ return "bad-fh";
++ sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw);
+ return buf;
+ }
+
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index 8b5587f274a7d..d11e4b6870d68 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -43,44 +43,24 @@
+ * filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+ * in include/linux/exportfs.h for currently registered values.
+ */
+-struct nfs_fhbase_new {
+- union {
+- struct {
+- u8 fb_version_aux; /* == 1 */
+- u8 fb_auth_type_aux;
+- u8 fb_fsid_type_aux;
+- u8 fb_fileid_type_aux;
+- u32 fb_auth[1];
+- /* u32 fb_fsid[0]; floating */
+- /* u32 fb_fileid[0]; floating */
+- };
+- struct {
+- u8 fb_version; /* == 1 */
+- u8 fb_auth_type;
+- u8 fb_fsid_type;
+- u8 fb_fileid_type;
+- u32 fb_auth_flex[]; /* flexible-array member */
+- };
+- };
+-};
+
+ struct knfsd_fh {
+- unsigned int fh_size; /* significant for NFSv3.
+- * Points to the current size while building
+- * a new file handle
++ unsigned int fh_size; /*
++ * Points to the current size while
++ * building a new file handle.
+ */
+ union {
+- u32 fh_pad[NFS4_FHSIZE/4];
+- struct nfs_fhbase_new fh_new;
+- } fh_base;
++ char fh_raw[NFS4_FHSIZE];
++ struct {
++ u8 fh_version; /* == 1 */
++ u8 fh_auth_type; /* deprecated */
++ u8 fh_fsid_type;
++ u8 fh_fileid_type;
++ u32 fh_fsid[]; /* flexible-array member */
++ };
++ };
+ };
+
+-#define fh_version fh_base.fh_new.fb_version
+-#define fh_fsid_type fh_base.fh_new.fb_fsid_type
+-#define fh_auth_type fh_base.fh_new.fb_auth_type
+-#define fh_fileid_type fh_base.fh_new.fb_fileid_type
+-#define fh_fsid fh_base.fh_new.fb_auth_flex
+-
+ static inline __u32 ino_t_to_u32(ino_t ino)
+ {
+ return (__u32) ino;
+@@ -255,7 +235,7 @@ static inline void
+ fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
+ {
+ dst->fh_size = src->fh_size;
+- memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
++ memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size);
+ }
+
+ static __inline__ struct svc_fh *
+@@ -270,7 +250,7 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+ {
+ if (fh1->fh_size != fh2->fh_size)
+ return false;
+- if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0)
++ if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0)
+ return false;
+ return true;
+ }
+@@ -294,7 +274,7 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+ */
+ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
+ {
+- return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size);
++ return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size);
+ }
+ #else
+ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index 26a42f87c2409..ddcc18adfeb1a 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -64,7 +64,7 @@ svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp)
+ if (!p)
+ return false;
+ fh_init(fhp, NFS_FHSIZE);
+- memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
++ memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE);
+ fhp->fh_handle.fh_size = NFS_FHSIZE;
+
+ return true;
+@@ -78,7 +78,7 @@ svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp)
+ p = xdr_reserve_space(xdr, NFS_FHSIZE);
+ if (!p)
+ return false;
+- memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
++ memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE);
+
+ return true;
+ }
+--
+2.43.0
+
--- /dev/null
+From 8df1b671e9c6dfebf85f0e64db7f0bc2559b7f73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 10:39:01 -0500
+Subject: nfsd: simplify test_bit return in NFSD_FILE_KEY_FULL comparator
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit d69b8dbfd0866abc5ec84652cc1c10fc3d4d91ef ]
+
+test_bit returns bool, so we can just compare the result of that to the
+key->gc value without the "!!".
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 4ddc82b84f7c4..d61c8223082a4 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -188,7 +188,7 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ return 1;
+ if (!nfsd_match_cred(nf->nf_cred, key->cred))
+ return 1;
+- if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
++ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
+ return 1;
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+ return 1;
+--
+2.43.0
+
--- /dev/null
+From fc3d2b992a7a4b9e437f5c9f870d7ed7b839a0a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Apr 2023 17:31:44 -0400
+Subject: nfsd: simplify the delayed disposal list code
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 92e4a6733f922f0fef1d0995f7b2d0eaff86c7ea ]
+
+When queueing a dispose list to the appropriate "freeme" lists, it
+pointlessly queues the objects one at a time to an intermediate list.
+
+Remove a few helpers and just open code a list_move to make it more
+clear and efficient. Better document the resulting functions with
+kerneldoc comments.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 64 ++++++++++++++++-----------------------------
+ 1 file changed, 22 insertions(+), 42 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 52e67ec267965..6b8706f23eaf0 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -401,49 +401,26 @@ nfsd_file_dispose_list(struct list_head *dispose)
+ }
+ }
+
+-static void
+-nfsd_file_list_remove_disposal(struct list_head *dst,
+- struct nfsd_fcache_disposal *l)
+-{
+- spin_lock(&l->lock);
+- list_splice_init(&l->freeme, dst);
+- spin_unlock(&l->lock);
+-}
+-
+-static void
+-nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+-{
+- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+- struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+-
+- spin_lock(&l->lock);
+- list_splice_tail_init(files, &l->freeme);
+- spin_unlock(&l->lock);
+- queue_work(nfsd_filecache_wq, &l->work);
+-}
+-
+-static void
+-nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+- struct net *net)
+-{
+- struct nfsd_file *nf, *tmp;
+-
+- list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+- if (nf->nf_net == net)
+- list_move_tail(&nf->nf_lru, dst);
+- }
+-}
+-
++/**
++ * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
++ * @dispose: list of nfsd_files to be disposed
++ *
++ * Transfers each file to the "freeme" list for its nfsd_net, to eventually
++ * be disposed of by the per-net garbage collector.
++ */
+ static void
+ nfsd_file_dispose_list_delayed(struct list_head *dispose)
+ {
+- LIST_HEAD(list);
+- struct nfsd_file *nf;
+-
+ while(!list_empty(dispose)) {
+- nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+- nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+- nfsd_file_list_add_disposal(&list, nf->nf_net);
++ struct nfsd_file *nf = list_first_entry(dispose,
++ struct nfsd_file, nf_lru);
++ struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
++ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
++
++ spin_lock(&l->lock);
++ list_move_tail(&nf->nf_lru, &l->freeme);
++ spin_unlock(&l->lock);
++ queue_work(nfsd_filecache_wq, &l->work);
+ }
+ }
+
+@@ -664,8 +641,8 @@ nfsd_file_close_inode_sync(struct inode *inode)
+ * nfsd_file_delayed_close - close unused nfsd_files
+ * @work: dummy
+ *
+- * Walk the LRU list and destroy any entries that have not been used since
+- * the last scan.
++ * Scrape the freeme list for this nfsd_net, and then dispose of them
++ * all.
+ */
+ static void
+ nfsd_file_delayed_close(struct work_struct *work)
+@@ -674,7 +651,10 @@ nfsd_file_delayed_close(struct work_struct *work)
+ struct nfsd_fcache_disposal *l = container_of(work,
+ struct nfsd_fcache_disposal, work);
+
+- nfsd_file_list_remove_disposal(&head, l);
++ spin_lock(&l->lock);
++ list_splice_init(&l->freeme, &head);
++ spin_unlock(&l->lock);
++
+ nfsd_file_dispose_list(&head);
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 6847eab9547b6b4c8da4a560260fe4c7626aa110 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Sep 2021 11:39:02 -0400
+Subject: NFSD: Skip extra computation for RC_NOCACHE case
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 0f29ce32fbc56cfdb304eec8a4deb920ccfd89c3 ]
+
+Force the compiler to skip unneeded initialization for cases that
+don't need those values. For example, NFSv4 COMPOUND operations are
+RC_NOCACHE.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfscache.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index 7880a2f2d29f6..1523d2e3dae97 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -421,10 +421,10 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+ */
+ int nfsd_cache_lookup(struct svc_rqst *rqstp)
+ {
+- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
++ struct nfsd_net *nn;
+ struct svc_cacherep *rp, *found;
+ __wsum csum;
+- struct nfsd_drc_bucket *b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
++ struct nfsd_drc_bucket *b;
+ int type = rqstp->rq_cachetype;
+ int rtn = RC_DOIT;
+
+@@ -440,10 +440,12 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
+ * Since the common case is a cache miss followed by an insert,
+ * preallocate an entry.
+ */
++ nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
+ if (!rp)
+ goto out;
+
++ b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
+ spin_lock(&b->cache_lock);
+ found = nfsd_cache_insert(b, rp, nn);
+ if (found != rp) {
+--
+2.43.0
+
--- /dev/null
+From 987c4add1f01341650277784069a49897517ba1e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Sep 2021 11:40:59 -0400
+Subject: NFSD: Streamline the rare "found" case
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit add1511c38166cf1036765f8c4aa939f0275a799 ]
+
+Move a rarely called function call site out of the hot path.
+
+This is an exceptionally small improvement because the compiler
+inlines most of the functions that nfsd_cache_lookup() calls.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfscache.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index 1523d2e3dae97..7da88bdc0d6c3 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -448,11 +448,8 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
+ b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
+ spin_lock(&b->cache_lock);
+ found = nfsd_cache_insert(b, rp, nn);
+- if (found != rp) {
+- nfsd_reply_cache_free_locked(NULL, rp, nn);
+- rp = found;
++ if (found != rp)
+ goto found_entry;
+- }
+
+ nfsd_stats_rc_misses_inc();
+ rqstp->rq_cacherep = rp;
+@@ -470,8 +467,10 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
+
+ found_entry:
+ /* We found a matching entry which is either in progress or done. */
++ nfsd_reply_cache_free_locked(NULL, rp, nn);
+ nfsd_stats_rc_hits_inc();
+ rtn = RC_DROPIT;
++ rp = found;
+
+ /* Request being processed */
+ if (rp->c_state == RC_INPROG)
+--
+2.43.0
+
--- /dev/null
+From 006b67ed1a7f10c05e778b267598f0eb10cbc895 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Dec 2021 14:27:56 -0500
+Subject: NFSD: Trace boot verifier resets
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 75acacb6583df0b9328dc701d8eeea05af49b8b5 ]
+
+According to commit bbf2f098838a ("nfsd: Reset the boot verifier on
+all write I/O errors"), the Linux NFS server forces all clients to
+resend pending unstable writes if any server-side write or commit
+operation encounters an error (say, ENOSPC). This is a rare and
+quite exceptional event that could require administrative recovery
+action, so it should be made trace-able. Example trace event:
+
+nfsd-938 [002] 7174.945558: nfsd_writeverf_reset: boot_time= 61cc920d xid=0xdcd62036 error=-28 new verifier=0x08aecc6142515904
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/trace.h | 28 ++++++++++++++++++++++++++++
+ fs/nfsd/vfs.c | 13 ++++++++++---
+ 2 files changed, 38 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 52c4a4e001729..c55fd77d43605 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -574,6 +574,34 @@ DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
+ DEFINE_NET_EVENT(grace_start);
+ DEFINE_NET_EVENT(grace_complete);
+
++TRACE_EVENT(nfsd_writeverf_reset,
++ TP_PROTO(
++ const struct nfsd_net *nn,
++ const struct svc_rqst *rqstp,
++ int error
++ ),
++ TP_ARGS(nn, rqstp, error),
++ TP_STRUCT__entry(
++ __field(unsigned long long, boot_time)
++ __field(u32, xid)
++ __field(int, error)
++ __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
++ ),
++ TP_fast_assign(
++ __entry->boot_time = nn->boot_time;
++ __entry->xid = be32_to_cpu(rqstp->rq_xid);
++ __entry->error = error;
++
++ /* avoid seqlock inside TP_fast_assign */
++ memcpy(__entry->verifier, nn->writeverf,
++ NFS4_VERIFIER_SIZE);
++ ),
++ TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
++ __entry->boot_time, __entry->xid, __entry->error,
++ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
++ )
++);
++
+ TRACE_EVENT(nfsd_clid_cred_mismatch,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index d7035e3d1a229..284dc900d10ba 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -559,14 +559,17 @@ __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ if (!status)
+ status = commit_inode_metadata(file_inode(src));
+ if (status < 0) {
++ struct nfsd_net *nn = net_generic(nf_dst->nf_net,
++ nfsd_net_id);
++
+ trace_nfsd_clone_file_range_err(rqstp,
+ &nfsd4_get_cstate(rqstp)->save_fh,
+ src_pos,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
+- nfsd_reset_write_verifier(net_generic(nf_dst->nf_net,
+- nfsd_net_id));
++ nfsd_reset_write_verifier(nn);
++ trace_nfsd_writeverf_reset(nn, rqstp, status);
+ ret = nfserrno(status);
+ }
+ }
+@@ -1029,6 +1032,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0) {
+ nfsd_reset_write_verifier(nn);
++ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
+ goto out_nfserr;
+ }
+ *cnt = host_err;
+@@ -1040,8 +1044,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+
+ if (stable && use_wgather) {
+ host_err = wait_for_concurrent_writes(file);
+- if (host_err < 0)
++ if (host_err < 0) {
+ nfsd_reset_write_verifier(nn);
++ trace_nfsd_writeverf_reset(nn, rqstp, host_err);
++ }
+ }
+
+ out_nfserr:
+@@ -1183,6 +1189,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset,
+ break;
+ default:
+ nfsd_reset_write_verifier(nn);
++ trace_nfsd_writeverf_reset(nn, rqstp, err2);
+ err = nfserrno(err2);
+ }
+ } else
+--
+2.43.0
+
--- /dev/null
+From b77159d89958e0e59dfa4d5063d8dca7d30356e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:09 -0400
+Subject: NFSD: Trace delegation revocations
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit a1c74569bbde91299f24535abf711be5c84df9de ]
+
+Delegation revocation is an exceptional event that is not otherwise
+visible externally (eg, no network traffic is emitted). Generate a
+trace record when it occurs so that revocation can be observed or
+other activity can be triggered. Example:
+
+nfsd-1104 [005] 1912.002544: nfsd_stid_revoke: client 633c9343:4e82788d stateid 00000003:00000001 ref=2 type=DELEG
+
+Trace infrastructure is provided for subsequent additional tracing
+related to nfs4_stid activity.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 2 ++
+ fs/nfsd/trace.h | 55 +++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 57 insertions(+)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 9351111730834..b2a4d442af669 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1366,6 +1366,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
+
+ WARN_ON(!list_empty(&dp->dl_recall_lru));
+
++ trace_nfsd_stid_revoke(&dp->dl_stid);
++
+ if (clp->cl_minorversion) {
+ spin_lock(&clp->cl_lock);
+ dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index d55a05f1a58f7..d50d4d6e822df 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -637,6 +637,61 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
+ DEFINE_STATESEQID_EVENT(preprocess);
+ DEFINE_STATESEQID_EVENT(open_confirm);
+
++TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
++TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
++TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
++TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
++TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
++TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
++TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
++
++#define show_stid_type(x) \
++ __print_flags(x, "|", \
++ { NFS4_OPEN_STID, "OPEN" }, \
++ { NFS4_LOCK_STID, "LOCK" }, \
++ { NFS4_DELEG_STID, "DELEG" }, \
++ { NFS4_CLOSED_STID, "CLOSED" }, \
++ { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
++ { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
++ { NFS4_LAYOUT_STID, "LAYOUT" })
++
++DECLARE_EVENT_CLASS(nfsd_stid_class,
++ TP_PROTO(
++ const struct nfs4_stid *stid
++ ),
++ TP_ARGS(stid),
++ TP_STRUCT__entry(
++ __field(unsigned long, sc_type)
++ __field(int, sc_count)
++ __field(u32, cl_boot)
++ __field(u32, cl_id)
++ __field(u32, si_id)
++ __field(u32, si_generation)
++ ),
++ TP_fast_assign(
++ const stateid_t *stp = &stid->sc_stateid;
++
++ __entry->sc_type = stid->sc_type;
++ __entry->sc_count = refcount_read(&stid->sc_count);
++ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
++ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
++ __entry->si_id = stp->si_opaque.so_id;
++ __entry->si_generation = stp->si_generation;
++ ),
++ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
++ __entry->cl_boot, __entry->cl_id,
++ __entry->si_id, __entry->si_generation,
++ __entry->sc_count, show_stid_type(__entry->sc_type)
++ )
++);
++
++#define DEFINE_STID_EVENT(name) \
++DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
++ TP_PROTO(const struct nfs4_stid *stid), \
++ TP_ARGS(stid))
++
++DEFINE_STID_EVENT(revoke);
++
+ DECLARE_EVENT_CLASS(nfsd_clientid_class,
+ TP_PROTO(const clientid_t *clid),
+ TP_ARGS(clid),
+--
+2.43.0
+
--- /dev/null
+From 3e2ae8d8d7b9f6d39f817a586e7f38cf0477965b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:11 -0400
+Subject: NFSD: Trace filecache LRU activity
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c46203acddd9b9200dbc53d0603c97355fd3a03b ]
+
+Observe the operation of garbage collection and the lifetime of
+filecache items.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 44 +++++++++++++++++++++++++++++++-------------
+ fs/nfsd/trace.h | 39 +++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 70 insertions(+), 13 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index d9b5f1e183976..a995a744a7481 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -260,6 +260,18 @@ nfsd_file_flush(struct nfsd_file *nf)
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+ }
+
++static void nfsd_file_lru_add(struct nfsd_file *nf)
++{
++ if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
++ trace_nfsd_file_lru_add(nf);
++}
++
++static void nfsd_file_lru_remove(struct nfsd_file *nf)
++{
++ if (list_lru_del(&nfsd_file_lru, &nf->nf_lru))
++ trace_nfsd_file_lru_del(nf);
++}
++
+ static void
+ nfsd_file_do_unhash(struct nfsd_file *nf)
+ {
+@@ -279,8 +291,7 @@ nfsd_file_unhash(struct nfsd_file *nf)
+ {
+ if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ nfsd_file_do_unhash(nf);
+- if (!list_empty(&nf->nf_lru))
+- list_lru_del(&nfsd_file_lru, &nf->nf_lru);
++ nfsd_file_lru_remove(nf);
+ return true;
+ }
+ return false;
+@@ -443,27 +454,34 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ * counter. Here we check the counter and then test and clear the flag.
+ * That order is deliberate to ensure that we can do this locklessly.
+ */
+- if (refcount_read(&nf->nf_ref) > 1)
+- goto out_skip;
++ if (refcount_read(&nf->nf_ref) > 1) {
++ trace_nfsd_file_gc_in_use(nf);
++ return LRU_SKIP;
++ }
+
+ /*
+ * Don't throw out files that are still undergoing I/O or
+ * that have uncleared errors pending.
+ */
+- if (nfsd_file_check_writeback(nf))
+- goto out_skip;
++ if (nfsd_file_check_writeback(nf)) {
++ trace_nfsd_file_gc_writeback(nf);
++ return LRU_SKIP;
++ }
+
+- if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
+- goto out_skip;
++ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
++ trace_nfsd_file_gc_referenced(nf);
++ return LRU_SKIP;
++ }
+
+- if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+- goto out_skip;
++ if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
++ trace_nfsd_file_gc_hashed(nf);
++ return LRU_SKIP;
++ }
+
+ list_lru_isolate_move(lru, &nf->nf_lru, head);
+ this_cpu_inc(nfsd_file_evictions);
++ trace_nfsd_file_gc_disposed(nf);
+ return LRU_REMOVED;
+-out_skip:
+- return LRU_SKIP;
+ }
+
+ /*
+@@ -1016,7 +1034,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ refcount_inc(&nf->nf_ref);
+ __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+ __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+- list_lru_add(&nfsd_file_lru, &nf->nf_lru);
++ nfsd_file_lru_add(nf);
+ hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
+ ++nfsd_file_hashtbl[hashval].nfb_count;
+ nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 71919f7a31dc8..c47f46d433ddb 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -894,6 +894,45 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+ __entry->nlink, __entry->mode, __entry->mask)
+ );
+
++DECLARE_EVENT_CLASS(nfsd_file_gc_class,
++ TP_PROTO(
++ const struct nfsd_file *nf
++ ),
++ TP_ARGS(nf),
++ TP_STRUCT__entry(
++ __field(void *, nf_inode)
++ __field(void *, nf_file)
++ __field(int, nf_ref)
++ __field(unsigned long, nf_flags)
++ ),
++ TP_fast_assign(
++ __entry->nf_inode = nf->nf_inode;
++ __entry->nf_file = nf->nf_file;
++ __entry->nf_ref = refcount_read(&nf->nf_ref);
++ __entry->nf_flags = nf->nf_flags;
++ ),
++ TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p",
++ __entry->nf_inode, __entry->nf_ref,
++ show_nf_flags(__entry->nf_flags),
++ __entry->nf_file
++ )
++);
++
++#define DEFINE_NFSD_FILE_GC_EVENT(name) \
++DEFINE_EVENT(nfsd_file_gc_class, name, \
++ TP_PROTO( \
++ const struct nfsd_file *nf \
++ ), \
++ TP_ARGS(nf))
++
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed);
++DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed);
++
+ DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
+ TP_PROTO(
+ unsigned long removed,
+--
+2.43.0
+
--- /dev/null
+From d5937b0e84180848f17fd09122e312f334446553 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 27 Mar 2022 16:42:20 -0400
+Subject: NFSD: Trace filecache opens
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 0122e882119ddbd9efa6edfeeac3f5c704a7aeea ]
+
+Instrument calls to nfsd_open_verified() to get a sense of the
+filecache hit rate.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 5 +++--
+ fs/nfsd/trace.h | 28 ++++++++++++++++++++++++++++
+ 2 files changed, 31 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 045f5a869ddc7..0863bf5050935 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -976,10 +976,11 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf);
+ if (nf->nf_mark) {
+- if (open)
++ if (open) {
+ status = nfsd_open_verified(rqstp, fhp, may_flags,
+ &nf->nf_file);
+- else
++ trace_nfsd_file_open(nf, status);
++ } else
+ status = nfs_ok;
+ } else
+ status = nfserr_jukebox;
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 7f3f40f6c0ff3..3cff3ada00a85 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -775,6 +775,34 @@ TRACE_EVENT(nfsd_file_acquire,
+ __entry->nf_file, __entry->status)
+ );
+
++TRACE_EVENT(nfsd_file_open,
++ TP_PROTO(struct nfsd_file *nf, __be32 status),
++ TP_ARGS(nf, status),
++ TP_STRUCT__entry(
++ __field(unsigned int, nf_hashval)
++ __field(void *, nf_inode) /* cannot be dereferenced */
++ __field(int, nf_ref)
++ __field(unsigned long, nf_flags)
++ __field(unsigned long, nf_may)
++ __field(void *, nf_file) /* cannot be dereferenced */
++ ),
++ TP_fast_assign(
++ __entry->nf_hashval = nf->nf_hashval;
++ __entry->nf_inode = nf->nf_inode;
++ __entry->nf_ref = refcount_read(&nf->nf_ref);
++ __entry->nf_flags = nf->nf_flags;
++ __entry->nf_may = nf->nf_may;
++ __entry->nf_file = nf->nf_file;
++ ),
++ TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p",
++ __entry->nf_hashval,
++ __entry->nf_inode,
++ __entry->nf_ref,
++ show_nf_flags(__entry->nf_flags),
++ show_nfsd_may_flags(__entry->nf_may),
++ __entry->nf_file)
++)
++
+ DECLARE_EVENT_CLASS(nfsd_file_search_class,
+ TP_PROTO(struct inode *inode, unsigned int hash, int found),
+ TP_ARGS(inode, hash, found),
+--
+2.43.0
+
--- /dev/null
+From 2c3dde5e8fa4958187254ab7cd889aa4ceff2b66 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Sep 2022 18:13:48 -0400
+Subject: NFSD: Trace NFSv4 COMPOUND tags
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit de29cf7e6cbbe236c3a51999c188fcd467762899 ]
+
+The Linux NFSv4 client implementation does not use COMPOUND tags,
+but the Solaris and MacOS implementations do, and so does pynfs.
+Record these eye-catchers in the server's trace buffer to annotate
+client requests while troubleshooting.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 2 +-
+ fs/nfsd/trace.h | 21 ++++++++++++++-------
+ 2 files changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 69d3013fb1b26..e4c0dc577fe35 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -2637,7 +2637,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+
+ rqstp->rq_lease_breaker = (void **)&cstate->clp;
+
+- trace_nfsd_compound(rqstp, args->client_opcnt);
++ trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
+ while (!status && resp->opcnt < args->opcnt) {
+ op = &args->ops[resp->opcnt++];
+
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index 297bf9ddc5090..c5d4a258680c3 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -84,19 +84,26 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
+ { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" })
+
+ TRACE_EVENT(nfsd_compound,
+- TP_PROTO(const struct svc_rqst *rqst,
+- u32 args_opcnt),
+- TP_ARGS(rqst, args_opcnt),
++ TP_PROTO(
++ const struct svc_rqst *rqst,
++ const char *tag,
++ u32 taglen,
++ u32 opcnt
++ ),
++ TP_ARGS(rqst, tag, taglen, opcnt),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+- __field(u32, args_opcnt)
++ __field(u32, opcnt)
++ __string_len(tag, tag, taglen)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+- __entry->args_opcnt = args_opcnt;
++ __entry->opcnt = opcnt;
++ __assign_str_len(tag, tag, taglen);
+ ),
+- TP_printk("xid=0x%08x opcnt=%u",
+- __entry->xid, __entry->args_opcnt)
++ TP_printk("xid=0x%08x opcnt=%u tag=%s",
++ __entry->xid, __entry->opcnt, __get_str(tag)
++ )
+ )
+
+ TRACE_EVENT(nfsd_compound_status,
+--
+2.43.0
+
--- /dev/null
+From a1729b17413fc2ae4a10e03e028387f5c53255b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:03 -0400
+Subject: NFSD: Trace stateids returned via DELEGRETURN
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 20eee313ff4b8a7e71ae9560f5c4ba27cd763005 ]
+
+Handing out a delegation stateid is recorded with the
+nfsd_deleg_read tracepoint, but there isn't a matching tracepoint
+for recording when the stateid is returned.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 1 +
+ fs/nfsd/trace.h | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 2f720433632b8..9351111730834 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -6915,6 +6915,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ if (status)
+ goto put_stateid;
+
++ trace_nfsd_deleg_return(stateid);
+ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
+ destroy_delegation(dp);
+ put_stateid:
+diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
+index d449c364cc76b..d55a05f1a58f7 100644
+--- a/fs/nfsd/trace.h
++++ b/fs/nfsd/trace.h
+@@ -604,6 +604,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
+
+ DEFINE_STATEID_EVENT(open);
+ DEFINE_STATEID_EVENT(deleg_read);
++DEFINE_STATEID_EVENT(deleg_return);
+ DEFINE_STATEID_EVENT(deleg_recall);
+
+ DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
+--
+2.43.0
+
--- /dev/null
+From f2917e5b17ce71ee82a46dd949bca8b5689a2740 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Oct 2022 14:59:02 +0900
+Subject: NFSD: unregister shrinker when nfsd_init_net() fails
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit bd86c69dae65de30f6d47249418ba7889809e31a ]
+
+syzbot is reporting UAF read at register_shrinker_prepared() [1], for
+commit 7746b32f467b3813 ("NFSD: add shrinker to reap courtesy clients on
+low memory condition") missed that nfsd4_leases_net_shutdown() from
+nfsd_exit_net() is called only when nfsd_init_net() succeeded.
+If nfsd_init_net() fails due to nfsd_reply_cache_init() failure,
+register_shrinker() from nfsd4_init_leases_net() has to be undone
+before nfsd_init_net() returns.
+
+Link: https://syzkaller.appspot.com/bug?extid=ff796f04613b4c84ad89 [1]
+Reported-by: syzbot <syzbot+ff796f04613b4c84ad89@syzkaller.appspotmail.com>
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Fixes: 7746b32f467b3813 ("NFSD: add shrinker to reap courtesy clients on low memory condition")
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 6a29bcfc93909..dc74a947a440c 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1458,12 +1458,14 @@ static __net_init int nfsd_init_net(struct net *net)
+ goto out_drc_error;
+ retval = nfsd_reply_cache_init(nn);
+ if (retval)
+- goto out_drc_error;
++ goto out_cache_error;
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
+
+ return 0;
+
++out_cache_error:
++ nfsd4_leases_net_shutdown(nn);
+ out_drc_error:
+ nfsd_idmap_shutdown(net);
+ out_idmap_error:
+--
+2.43.0
+
--- /dev/null
+From 175f7361384b4ab1069b943b2191caeed45adcba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 May 2022 12:08:44 +0800
+Subject: nfsd: Unregister the cld notifier when laundry_wq create failed
+
+From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+
+[ Upstream commit 62fdb65edb6c43306c774939001f3a00974832aa ]
+
+If laundry_wq create failed, the cld notifier should be unregistered.
+
+Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 322a208878f2c..55949e60897d5 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -1543,12 +1543,14 @@ static int __init init_nfsd(void)
+ goto out_free_filesystem;
+ retval = register_cld_notifier();
+ if (retval)
+- goto out_free_all;
++ goto out_free_subsys;
+ retval = nfsd4_create_laundry_wq();
+ if (retval)
+ goto out_free_all;
+ return 0;
+ out_free_all:
++ unregister_cld_notifier();
++out_free_subsys:
+ unregister_pernet_subsys(&nfsd_net_ops);
+ out_free_filesystem:
+ unregister_filesystem(&nfsd_fs_type);
+--
+2.43.0
+
--- /dev/null
+From 4732051fb6cfb79c738024b8d915e7e0fd966998 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Jan 2023 12:21:16 -0500
+Subject: nfsd: update comment over __nfsd_file_cache_purge
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 972cc0e0924598cb293b919d39c848dc038b2c28 ]
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 786e06cf107ff..1d4c0387c4192 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -906,7 +906,8 @@ nfsd_file_cache_init(void)
+ * @net: net-namespace to shut down the cache (may be NULL)
+ *
+ * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
+- * then close out everything. Called when an nfsd instance is being shut down.
++ * then close out everything. Called when an nfsd instance is being shut down,
++ * and when the exports table is flushed.
+ */
+ static void
+ __nfsd_file_cache_purge(struct net *net)
+--
+2.43.0
+
--- /dev/null
+From d401c9f6a73bd93cd8bb1e9c241e8f6f20cfa158 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Feb 2024 11:44:28 -0500
+Subject: nfsd: update create verifier comment
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+[ Upstream commit 2336d696862186fd4a6ddd1ea0cb243b3e32847c ]
+
+I don't know if that Solaris behavior matters any more or if it's still
+possible to look up that bug ID any more. The XFS behavior's definitely
+still relevant, though; any but the most recent XFS filesystems will
+lose the top bits.
+
+Reported-by: Frank S. Filz <ffilzlnx@mindspring.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 24a5b5cfcfb03..59e30cff920ca 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1436,7 +1436,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+
+ if (nfsd_create_is_exclusive(createmode)) {
+ /* solaris7 gets confused (bugid 4218508) if these have
+- * the high bit set, so just clear the high bits. If this is
++ * the high bit set, as do xfs filesystems without the
++ * "bigtime" feature. So just clear the high bits. If this is
+ * ever changed to use different attrs for storing the
+ * verifier, then do_open_lookup() will also need to be fixed
+ * accordingly.
+--
+2.43.0
+
--- /dev/null
+From b2a4ee18f45c46433c0e4e6f1c1fd098fd7b01f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:22 -0400
+Subject: NFSD: Update file_hashtbl() helpers
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 3fe828caddd81e68e9d29353c6e9285a658ca056 ]
+
+Enable callers to use const pointers for type safety.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index b2a4d442af669..aa7374933de77 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -721,7 +721,7 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
+ #define FILE_HASH_BITS 8
+ #define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
+
+-static unsigned int file_hashval(struct svc_fh *fh)
++static unsigned int file_hashval(const struct svc_fh *fh)
+ {
+ struct inode *inode = d_inode(fh->fh_dentry);
+
+@@ -4686,7 +4686,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
+
+ /* search file_hashtbl[] for file */
+ static struct nfs4_file *
+-find_file_locked(struct svc_fh *fh, unsigned int hashval)
++find_file_locked(const struct svc_fh *fh, unsigned int hashval)
+ {
+ struct nfs4_file *fp;
+
+--
+2.43.0
+
--- /dev/null
+From 21529f8fefb6510f57c6e987b120debb6bc48ef8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:16 -0400
+Subject: NFSD: Use const pointers as parameters to fh_ helpers
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit b48f8056c034f28dd54668399f1d22be421b0bef ]
+
+Enable callers to use const pointers where they are able to.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsfh.h | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
+index c3ae6414fc5cf..513e028b0bbee 100644
+--- a/fs/nfsd/nfsfh.h
++++ b/fs/nfsd/nfsfh.h
+@@ -220,7 +220,7 @@ __be32 fh_update(struct svc_fh *);
+ void fh_put(struct svc_fh *);
+
+ static __inline__ struct svc_fh *
+-fh_copy(struct svc_fh *dst, struct svc_fh *src)
++fh_copy(struct svc_fh *dst, const struct svc_fh *src)
+ {
+ WARN_ON(src->fh_dentry);
+
+@@ -229,7 +229,7 @@ fh_copy(struct svc_fh *dst, struct svc_fh *src)
+ }
+
+ static inline void
+-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
++fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src)
+ {
+ dst->fh_size = src->fh_size;
+ memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size);
+@@ -243,7 +243,8 @@ fh_init(struct svc_fh *fhp, int maxsize)
+ return fhp;
+ }
+
+-static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
++static inline bool fh_match(const struct knfsd_fh *fh1,
++ const struct knfsd_fh *fh2)
+ {
+ if (fh1->fh_size != fh2->fh_size)
+ return false;
+@@ -252,7 +253,8 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+ return true;
+ }
+
+-static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
++static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
++ const struct knfsd_fh *fh2)
+ {
+ if (fh1->fh_fsid_type != fh2->fh_fsid_type)
+ return false;
+--
+2.43.0
+
--- /dev/null
+From 56c3eae39897d28a64032505ca01276300827d95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Sep 2022 00:31:52 +0800
+Subject: nfsd: use DEFINE_PROC_SHOW_ATTRIBUTE to define nfsd_proc_ops
+
+From: ChenXiaoSong <chenxiaosong2@huawei.com>
+
+[ Upstream commit 0cfb0c4228a5c8e2ed2b58f8309b660b187cef02 ]
+
+Use DEFINE_PROC_SHOW_ATTRIBUTE helper macro to simplify the code.
+
+Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/stats.c | 14 ++------------
+ 1 file changed, 2 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
+index a8c5a02a84f04..777e24e5da33b 100644
+--- a/fs/nfsd/stats.c
++++ b/fs/nfsd/stats.c
+@@ -32,7 +32,7 @@ struct svc_stat nfsd_svcstats = {
+ .program = &nfsd_program,
+ };
+
+-static int nfsd_proc_show(struct seq_file *seq, void *v)
++static int nfsd_show(struct seq_file *seq, void *v)
+ {
+ int i;
+
+@@ -72,17 +72,7 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
+ return 0;
+ }
+
+-static int nfsd_proc_open(struct inode *inode, struct file *file)
+-{
+- return single_open(file, nfsd_proc_show, NULL);
+-}
+-
+-static const struct proc_ops nfsd_proc_ops = {
+- .proc_open = nfsd_proc_open,
+- .proc_read = seq_read,
+- .proc_lseek = seq_lseek,
+- .proc_release = single_release,
+-};
++DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
+
+ int nfsd_percpu_counters_init(struct percpu_counter counters[], int num)
+ {
+--
+2.43.0
+
--- /dev/null
+From 003da08781cf4d4ec13d62edb5f067587a54ed7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Sep 2022 00:31:54 +0800
+Subject: nfsd: use DEFINE_SHOW_ATTRIBUTE to define client_info_fops
+
+From: ChenXiaoSong <chenxiaosong2@huawei.com>
+
+[ Upstream commit 1d7f6b302b75ff7acb9eb3cab0c631b10cfa7542 ]
+
+Use DEFINE_SHOW_ATTRIBUTE helper macro to simplify the code.
+
+inode is converted from seq_file->file instead of seq_file->private in
+client_info_show().
+
+Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 14 ++------------
+ 1 file changed, 2 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 8cbb66b07d519..cc258f2988c73 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -2503,7 +2503,7 @@ static const char *cb_state2str(int state)
+
+ static int client_info_show(struct seq_file *m, void *v)
+ {
+- struct inode *inode = m->private;
++ struct inode *inode = file_inode(m->file);
+ struct nfs4_client *clp;
+ u64 clid;
+
+@@ -2543,17 +2543,7 @@ static int client_info_show(struct seq_file *m, void *v)
+ return 0;
+ }
+
+-static int client_info_open(struct inode *inode, struct file *file)
+-{
+- return single_open(file, client_info_show, inode);
+-}
+-
+-static const struct file_operations client_info_fops = {
+- .open = client_info_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release,
+-};
++DEFINE_SHOW_ATTRIBUTE(client_info);
+
+ static void *states_start(struct seq_file *s, loff_t *pos)
+ __acquires(&clp->cl_lock)
+--
+2.43.0
+
--- /dev/null
+From ac9562653267f0155532e058680105ad084e9aa6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Sep 2022 00:31:53 +0800
+Subject: nfsd: use DEFINE_SHOW_ATTRIBUTE to define export_features_fops and
+ supported_enctypes_fops
+
+From: ChenXiaoSong <chenxiaosong2@huawei.com>
+
+[ Upstream commit 9beeaab8e05d353d709103cafa1941714b4d5d94 ]
+
+Use DEFINE_SHOW_ATTRIBUTE helper macro to simplify the code.
+
+Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
+[ cel: reduce line length ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 29 +++++------------------------
+ 1 file changed, 5 insertions(+), 24 deletions(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 597a26ad4183f..3ed0cfdb0c0b5 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -185,17 +185,7 @@ static int export_features_show(struct seq_file *m, void *v)
+ return 0;
+ }
+
+-static int export_features_open(struct inode *inode, struct file *file)
+-{
+- return single_open(file, export_features_show, NULL);
+-}
+-
+-static const struct file_operations export_features_operations = {
+- .open = export_features_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release,
+-};
++DEFINE_SHOW_ATTRIBUTE(export_features);
+
+ #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
+ static int supported_enctypes_show(struct seq_file *m, void *v)
+@@ -204,17 +194,7 @@ static int supported_enctypes_show(struct seq_file *m, void *v)
+ return 0;
+ }
+
+-static int supported_enctypes_open(struct inode *inode, struct file *file)
+-{
+- return single_open(file, supported_enctypes_show, NULL);
+-}
+-
+-static const struct file_operations supported_enctypes_ops = {
+- .open = supported_enctypes_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release,
+-};
++DEFINE_SHOW_ATTRIBUTE(supported_enctypes);
+ #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
+
+ static const struct file_operations pool_stats_operations = {
+@@ -1365,7 +1345,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
+ /* Per-export io stats use same ops as exports file */
+ [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO},
+ [NFSD_Export_features] = {"export_features",
+- &export_features_operations, S_IRUGO},
++ &export_features_fops, S_IRUGO},
+ [NFSD_FO_UnlockIP] = {"unlock_ip",
+ &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_FO_UnlockFS] = {"unlock_filesystem",
+@@ -1381,7 +1361,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
+ [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_Filecache] = {"filecache", &filecache_ops, S_IRUGO},
+ #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
+- [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
++ [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes",
++ &supported_enctypes_fops, S_IRUGO},
+ #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
+ #ifdef CONFIG_NFSD_V4
+ [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+--
+2.43.0
+
--- /dev/null
+From bab2fcf83d1d35178a1307f2e8be977b38dee821 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Sep 2022 00:31:56 +0800
+Subject: nfsd: use DEFINE_SHOW_ATTRIBUTE to define nfsd_file_cache_stats_fops
+
+From: ChenXiaoSong <chenxiaosong2@huawei.com>
+
+[ Upstream commit 1342f9dd3fc219089deeb2620f6790f19b4129b1 ]
+
+Use DEFINE_SHOW_ATTRIBUTE helper macro to simplify the code.
+
+Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 7 +------
+ fs/nfsd/filecache.h | 2 +-
+ fs/nfsd/nfsctl.c | 9 ++-------
+ 3 files changed, 4 insertions(+), 14 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 55478d411e5a0..fa8e1546e0206 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -1211,7 +1211,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+-static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
++int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ {
+ unsigned long releases = 0, pages_flushed = 0, evictions = 0;
+ unsigned long hits = 0, acquisitions = 0;
+@@ -1258,8 +1258,3 @@ static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, "pages flushed: %lu\n", pages_flushed);
+ return 0;
+ }
+-
+-int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
+-{
+- return single_open(file, nfsd_file_cache_stats_show, NULL);
+-}
+diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
+index 8e8c0c47d67df..357832bac736b 100644
+--- a/fs/nfsd/filecache.h
++++ b/fs/nfsd/filecache.h
+@@ -60,5 +60,5 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+-int nfsd_file_cache_stats_open(struct inode *, struct file *);
++int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
+ #endif /* _FS_NFSD_FILECACHE_H */
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 1983f4f2908d9..6a29bcfc93909 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -206,12 +206,7 @@ static const struct file_operations pool_stats_operations = {
+
+ DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats);
+
+-static const struct file_operations filecache_ops = {
+- .open = nfsd_file_cache_stats_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release,
+-};
++DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats);
+
+ /*----------------------------------------------------------------------------*/
+ /*
+@@ -1355,7 +1350,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
+ [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
+- [NFSD_Filecache] = {"filecache", &filecache_ops, S_IRUGO},
++ [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
+ #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
+ [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes",
+ &supported_enctypes_fops, S_IRUGO},
+--
+2.43.0
+
--- /dev/null
+From 980919e5fc4d1910956b4279623d714711872ad4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Sep 2022 00:31:55 +0800
+Subject: nfsd: use DEFINE_SHOW_ATTRIBUTE to define nfsd_reply_cache_stats_fops
+
+From: ChenXiaoSong <chenxiaosong2@huawei.com>
+
+[ Upstream commit 64776611a06322b99386f8dfe3b3ba1aa0347a38 ]
+
+Use DEFINE_SHOW_ATTRIBUTE helper macro to simplify the code.
+
+nfsd_net is converted from seq_file->file instead of seq_file->private in
+nfsd_reply_cache_stats_show().
+
+Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
+[ cel: reduce line length ]
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/cache.h | 2 +-
+ fs/nfsd/nfscache.c | 13 +++----------
+ fs/nfsd/nfsctl.c | 10 +++-------
+ 3 files changed, 7 insertions(+), 18 deletions(-)
+
+diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
+index 65c331f75e9c7..f21259ead64bb 100644
+--- a/fs/nfsd/cache.h
++++ b/fs/nfsd/cache.h
+@@ -84,6 +84,6 @@ int nfsd_reply_cache_init(struct nfsd_net *);
+ void nfsd_reply_cache_shutdown(struct nfsd_net *);
+ int nfsd_cache_lookup(struct svc_rqst *);
+ void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+-int nfsd_reply_cache_stats_open(struct inode *, struct file *);
++int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
+
+ #endif /* NFSCACHE_H */
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index 7da88bdc0d6c3..2b5417e06d80d 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -603,9 +603,10 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+-static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
++int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
+ {
+- struct nfsd_net *nn = m->private;
++ struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info,
++ nfsd_net_id);
+
+ seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
+ seq_printf(m, "num entries: %u\n",
+@@ -625,11 +626,3 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
+ seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
+ return 0;
+ }
+-
+-int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
+-{
+- struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info,
+- nfsd_net_id);
+-
+- return single_open(file, nfsd_reply_cache_stats_show, nn);
+-}
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 3ed0cfdb0c0b5..1983f4f2908d9 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -204,12 +204,7 @@ static const struct file_operations pool_stats_operations = {
+ .release = nfsd_pool_stats_release,
+ };
+
+-static const struct file_operations reply_cache_stats_operations = {
+- .open = nfsd_reply_cache_stats_open,
+- .read = seq_read,
+- .llseek = seq_lseek,
+- .release = single_release,
+-};
++DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats);
+
+ static const struct file_operations filecache_ops = {
+ .open = nfsd_file_cache_stats_open,
+@@ -1354,7 +1349,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
+ [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
+- [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO},
++ [NFSD_Reply_Cache_Stats] = {"reply_cache_stats",
++ &nfsd_reply_cache_stats_fops, S_IRUGO},
+ [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+--
+2.43.0
+
--- /dev/null
+From 48110b37ccc86f7a47a591c4f690fa8d5a4dca02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: use explicit lock/unlock for directory ops
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit debf16f0c671cb8db154a9ebcd6014cfff683b80 ]
+
+When creating or unlinking a name in a directory use explicit
+inode_lock_nested() instead of fh_lock(), and explicit calls to
+fh_fill_pre_attrs() and fh_fill_post_attrs(). This is already done
+for renames, with lock_rename() as the explicit locking.
+
+Also move the 'fill' calls closer to the operation that might change the
+attributes. This way they are avoided on some error paths.
+
+For the v2-only code in nfsproc.c, the fill calls are not replaced as
+they aren't needed.
+
+Making the locking explicit will simplify proposed future changes to
+locking for directories. It also makes it easily visible exactly where
+pre/post attributes are used - not all callers of fh_lock() actually
+need the pre/post attributes.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3proc.c | 6 ++++--
+ fs/nfsd/nfs4proc.c | 6 ++++--
+ fs/nfsd/nfsproc.c | 5 ++---
+ fs/nfsd/vfs.c | 30 +++++++++++++++++++-----------
+ 4 files changed, 29 insertions(+), 18 deletions(-)
+
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index fbdc109fbd067..5b1e771238b35 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -260,7 +260,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (host_err)
+ return nfserrno(host_err);
+
+- fh_lock_nested(fhp, I_MUTEX_PARENT);
++ inode_lock_nested(inode, I_MUTEX_PARENT);
+
+ child = lookup_one_len(argp->name, parent, argp->len);
+ if (IS_ERR(child)) {
+@@ -318,11 +318,13 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
++ fh_fill_pre_attrs(fhp);
+ host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
+ if (host_err < 0) {
+ status = nfserrno(host_err);
+ goto out;
+ }
++ fh_fill_post_attrs(fhp);
+
+ /* A newly created file already has a file size of zero. */
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+@@ -340,7 +342,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
+
+ out:
+- fh_unlock(fhp);
++ inode_unlock(inode);
+ if (child && !IS_ERR(child))
+ dput(child);
+ fh_drop_write(fhp);
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index b6df56fb6755d..5e4b7858b2e50 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -264,7 +264,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (is_create_with_attrs(open))
+ nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs);
+
+- fh_lock_nested(fhp, I_MUTEX_PARENT);
++ inode_lock_nested(inode, I_MUTEX_PARENT);
+
+ child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
+ if (IS_ERR(child)) {
+@@ -348,10 +348,12 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
++ fh_fill_pre_attrs(fhp);
+ status = nfsd4_vfs_create(fhp, child, open);
+ if (status != nfs_ok)
+ goto out;
+ open->op_created = true;
++ fh_fill_post_attrs(fhp);
+
+ /* A newly created file already has a file size of zero. */
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+@@ -373,7 +375,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (attrs.na_aclerr)
+ open->op_bmval[0] &= ~FATTR4_WORD0_ACL;
+ out:
+- fh_unlock(fhp);
++ inode_unlock(inode);
+ nfsd_attrs_free(&attrs);
+ if (child && !IS_ERR(child))
+ dput(child);
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 09afd188099be..4b19cc727ea50 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -292,7 +292,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+ goto done;
+ }
+
+- fh_lock_nested(dirfhp, I_MUTEX_PARENT);
++ inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT);
+ dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
+ if (IS_ERR(dchild)) {
+ resp->status = nfserrno(PTR_ERR(dchild));
+@@ -408,8 +408,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
+ }
+
+ out_unlock:
+- /* We don't really need to unlock, as fh_put does it. */
+- fh_unlock(dirfhp);
++ inode_unlock(dirfhp->fh_dentry->d_inode);
+ fh_drop_write(dirfhp);
+ done:
+ fh_put(dirfhp);
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index c07fe50d6bdfb..7de76b37a9bc2 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -1371,7 +1371,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ if (host_err)
+ return nfserrno(host_err);
+
+- fh_lock_nested(fhp, I_MUTEX_PARENT);
++ inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
+ dchild = lookup_one_len(fname, dentry, flen);
+ host_err = PTR_ERR(dchild);
+ if (IS_ERR(dchild)) {
+@@ -1386,10 +1386,12 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ dput(dchild);
+ if (err)
+ goto out_unlock;
++ fh_fill_pre_attrs(fhp);
+ err = nfsd_create_locked(rqstp, fhp, fname, flen, attrs, type,
+ rdev, resfhp);
++ fh_fill_post_attrs(fhp);
+ out_unlock:
+- fh_unlock(fhp);
++ inode_unlock(dentry->d_inode);
+ return err;
+ }
+
+@@ -1472,20 +1474,22 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ goto out;
+ }
+
+- fh_lock(fhp);
+ dentry = fhp->fh_dentry;
++ inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
+ dnew = lookup_one_len(fname, dentry, flen);
+ if (IS_ERR(dnew)) {
+ err = nfserrno(PTR_ERR(dnew));
+- fh_unlock(fhp);
++ inode_unlock(dentry->d_inode);
+ goto out_drop_write;
+ }
++ fh_fill_pre_attrs(fhp);
+ host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path);
+ err = nfserrno(host_err);
+ cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+ if (!err)
+ nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
+- fh_unlock(fhp);
++ fh_fill_post_attrs(fhp);
++ inode_unlock(dentry->d_inode);
+ if (!err)
+ err = nfserrno(commit_metadata(fhp));
+ dput(dnew);
+@@ -1531,9 +1535,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+ goto out;
+ }
+
+- fh_lock_nested(ffhp, I_MUTEX_PARENT);
+ ddir = ffhp->fh_dentry;
+ dirp = d_inode(ddir);
++ inode_lock_nested(dirp, I_MUTEX_PARENT);
+
+ dnew = lookup_one_len(name, ddir, len);
+ if (IS_ERR(dnew)) {
+@@ -1546,8 +1550,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+ err = nfserr_noent;
+ if (d_really_is_negative(dold))
+ goto out_dput;
++ fh_fill_pre_attrs(ffhp);
+ host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL);
+- fh_unlock(ffhp);
++ fh_fill_post_attrs(ffhp);
++ inode_unlock(dirp);
+ if (!host_err) {
+ err = nfserrno(commit_metadata(ffhp));
+ if (!err)
+@@ -1567,7 +1573,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+ out_dput:
+ dput(dnew);
+ out_unlock:
+- fh_unlock(ffhp);
++ inode_unlock(dirp);
+ goto out_drop_write;
+ }
+
+@@ -1742,9 +1748,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ if (host_err)
+ goto out_nfserr;
+
+- fh_lock_nested(fhp, I_MUTEX_PARENT);
+ dentry = fhp->fh_dentry;
+ dirp = d_inode(dentry);
++ inode_lock_nested(dirp, I_MUTEX_PARENT);
+
+ rdentry = lookup_one_len(fname, dentry, flen);
+ host_err = PTR_ERR(rdentry);
+@@ -1762,6 +1768,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ if (!type)
+ type = d_inode(rdentry)->i_mode & S_IFMT;
+
++ fh_fill_pre_attrs(fhp);
+ if (type != S_IFDIR) {
+ if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
+ nfsd_close_cached_files(rdentry);
+@@ -1769,8 +1776,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ } else {
+ host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
+ }
++ fh_fill_post_attrs(fhp);
+
+- fh_unlock(fhp);
++ inode_unlock(dirp);
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+ dput(rdentry);
+@@ -1793,7 +1801,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ out:
+ return err;
+ out_unlock:
+- fh_unlock(fhp);
++ inode_unlock(dirp);
+ goto out_drop_write;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 14c94ca02993fdd65b21436e8fe9a2bd18fbd814 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:03:20 +0300
+Subject: nfsd: use fsnotify group lock helpers
+
+From: Amir Goldstein <amir73il@gmail.com>
+
+[ Upstream commit b8962a9d8cc2d8c93362e2f684091c79f702f6f3 ]
+
+Before commit 9542e6a643fc6 ("nfsd: Containerise filecache laundrette")
+nfsd would close open files in direct reclaim context and that could
+cause a deadlock when fsnotify mark allocation went into direct reclaim
+and nfsd shrinker tried to free existing fsnotify marks.
+
+To avoid issues like this in future code, set the FSNOTIFY_GROUP_NOFS
+flag on nfsd fsnotify group to prevent going into direct reclaim from
+fsnotify_add_inode_mark().
+
+Link: https://lore.kernel.org/r/20220422120327.3459282-10-amir73il@gmail.com
+Suggested-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220321112310.vpr7oxro2xkz5llh@quack3.lan/
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 97ca256a76323..b1afe6db589f2 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -118,14 +118,14 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+ struct inode *inode = nf->nf_inode;
+
+ do {
+- mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
++ fsnotify_group_lock(nfsd_file_fsnotify_group);
+ mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
+- nfsd_file_fsnotify_group);
++ nfsd_file_fsnotify_group);
+ if (mark) {
+ nfm = nfsd_file_mark_get(container_of(mark,
+ struct nfsd_file_mark,
+ nfm_mark));
+- mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
++ fsnotify_group_unlock(nfsd_file_fsnotify_group);
+ if (nfm) {
+ fsnotify_put_mark(mark);
+ break;
+@@ -133,8 +133,9 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
+ /* Avoid soft lockup race with nfsd_file_mark_put() */
+ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
+ fsnotify_put_mark(mark);
+- } else
+- mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
++ } else {
++ fsnotify_group_unlock(nfsd_file_fsnotify_group);
++ }
+
+ /* allocate a new nfm */
+ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+@@ -672,7 +673,7 @@ nfsd_file_cache_init(void)
+ }
+
+ nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
+- 0);
++ FSNOTIFY_GROUP_NOFS);
+ if (IS_ERR(nfsd_file_fsnotify_group)) {
+ pr_err("nfsd: unable to create fsnotify group: %ld\n",
+ PTR_ERR(nfsd_file_fsnotify_group));
+--
+2.43.0
+
--- /dev/null
+From c70ea7862fd2722db142b6ab526a7d2e6c5930ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Nov 2022 09:36:07 -0500
+Subject: nfsd: use locks_inode_context helper
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 77c67530e1f95ac25c7075635f32f04367380894 ]
+
+nfsd currently doesn't access i_flctx safely everywhere. This requires a
+smp_load_acquire, as the pointer is set via cmpxchg (a release
+operation).
+
+Acked-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 7cfc92aa2a236..dbcdb74e9ff6f 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4773,7 +4773,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+
+ static bool nfsd4_deleg_present(const struct inode *inode)
+ {
+- struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx);
++ struct file_lock_context *ctx = locks_inode_context(inode);
+
+ return ctx && !list_empty_careful(&ctx->flc_lease);
+ }
+@@ -5912,7 +5912,7 @@ nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ nf = stp->st_stid.sc_file;
+- ctx = nf->fi_inode->i_flctx;
++ ctx = locks_inode_context(nf->fi_inode);
+ if (!ctx)
+ continue;
+ if (locks_owner_has_blockers(ctx, lo))
+@@ -7726,7 +7726,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
+ }
+
+ inode = locks_inode(nf->nf_file);
+- flctx = inode->i_flctx;
++ flctx = locks_inode_context(inode);
+
+ if (flctx && !list_empty_careful(&flctx->flc_posix)) {
+ spin_lock(&flctx->flc_lock);
+--
+2.43.0
+
--- /dev/null
+From fd9b182b0e63855d244e0e84983392b2363bd369 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 26 Nov 2022 15:55:30 -0500
+Subject: NFSD: Use only RQ_DROPME to signal the need to drop a reply
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 9315564747cb6a570e99196b3a4880fb817635fd ]
+
+Clean up: NFSv2 has the only two usages of rpc_drop_reply in the
+NFSD code base. Since NFSv2 is going away at some point, replace
+these in order to simplify the "drop this reply?" check in
+nfsd_dispatch().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsproc.c | 4 ++--
+ fs/nfsd/nfssvc.c | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index 52fc222c34f26..a5570cf75f3fd 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+- return rpc_drop_reply;
++ __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+- return rpc_drop_reply;
++ __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 429f38c986280..325d3d3f12110 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1060,7 +1060,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ svcxdr_init_encode(rqstp);
+
+ *statp = proc->pc_func(rqstp);
+- if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
++ if (test_bit(RQ_DROPME, &rqstp->rq_flags))
+ goto out_update_drop;
+
+ if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
+--
+2.43.0
+
--- /dev/null
+From a05e62e780528f00b9d245854b02c58de4f8d806 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Oct 2022 10:47:53 -0400
+Subject: NFSD: Use rhashtable for managing nfs4_file objects
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit d47b295e8d76a4d69f0e2ea0cd8a79c9d3488280 ]
+
+fh_match() is costly, especially when filehandles are large (as is
+the case for NFSv4). It needs to be used sparingly when searching
+data structures. Unfortunately, with common workloads, I see
+multiple thousands of objects stored in file_hashtbl[], which has
+just 256 buckets, making its bucket hash chains quite lengthy.
+
+Walking long hash chains with the state_lock held blocks other
+activity that needs that lock. Sizable hash chains are a common
+occurrance once the server has handed out some delegations, for
+example -- IIUC, each delegated file is held open on the server by
+an nfs4_file object.
+
+To help mitigate the cost of searching with fh_match(), replace the
+nfs4_file hash table with an rhashtable, which can dynamically
+resize its bucket array to minimize hash chain length.
+
+The result of this modification is an improvement in the latency of
+NFSv4 operations, and the reduction of nfsd CPU utilization due to
+eliminating the cost of multiple calls to fh_match() and reducing
+the CPU cache misses incurred while walking long hash chains in the
+nfs4_file hash table.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 97 +++++++++++++++++++++++++++++----------------
+ fs/nfsd/state.h | 5 +--
+ 2 files changed, 63 insertions(+), 39 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 4f2ad5bf1f1b8..39b315e3471f5 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -44,7 +44,9 @@
+ #include <linux/jhash.h>
+ #include <linux/string_helpers.h>
+ #include <linux/fsnotify.h>
++#include <linux/rhashtable.h>
+ #include <linux/nfs_ssc.h>
++
+ #include "xdr4.h"
+ #include "xdr4cb.h"
+ #include "vfs.h"
+@@ -589,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
+ void
+ put_nfs4_file(struct nfs4_file *fi)
+ {
+- might_lock(&state_lock);
+-
+- if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
++ if (refcount_dec_and_test(&fi->fi_ref)) {
+ nfsd4_file_hash_remove(fi);
+- spin_unlock(&state_lock);
+ WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
+ WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
+ call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
+@@ -718,19 +717,20 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
+ return ret & OWNER_HASH_MASK;
+ }
+
+-/* hash table for nfs4_file */
+-#define FILE_HASH_BITS 8
+-#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
+-
+-static unsigned int file_hashval(const struct svc_fh *fh)
+-{
+- struct inode *inode = d_inode(fh->fh_dentry);
++static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
+
+- /* XXX: why not (here & in file cache) use inode? */
+- return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
+-}
++static const struct rhashtable_params nfs4_file_rhash_params = {
++ .key_len = sizeof_field(struct nfs4_file, fi_inode),
++ .key_offset = offsetof(struct nfs4_file, fi_inode),
++ .head_offset = offsetof(struct nfs4_file, fi_rlist),
+
+-static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
++ /*
++ * Start with a single page hash table to reduce resizing churn
++ * on light workloads.
++ */
++ .min_size = 256,
++ .automatic_shrinking = true,
++};
+
+ /*
+ * Check if courtesy clients have conflicting access and resolve it if possible
+@@ -4685,12 +4685,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
+ static noinline_for_stack struct nfs4_file *
+ nfsd4_file_hash_lookup(const struct svc_fh *fhp)
+ {
+- unsigned int hashval = file_hashval(fhp);
++ struct inode *inode = d_inode(fhp->fh_dentry);
++ struct rhlist_head *tmp, *list;
+ struct nfs4_file *fi;
+
+ rcu_read_lock();
+- hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash,
+- lockdep_is_held(&state_lock)) {
++ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
++ nfs4_file_rhash_params);
++ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
+ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
+ if (refcount_inc_not_zero(&fi->fi_ref)) {
+ rcu_read_unlock();
+@@ -4704,40 +4706,56 @@ nfsd4_file_hash_lookup(const struct svc_fh *fhp)
+
+ /*
+ * On hash insertion, identify entries with the same inode but
+- * distinct filehandles. They will all be in the same hash bucket
+- * because nfs4_file's are hashed by the address in the fi_inode
+- * field.
++ * distinct filehandles. They will all be on the list returned
++ * by rhltable_lookup().
++ *
++ * inode->i_lock prevents racing insertions from adding an entry
++ * for the same inode/fhp pair twice.
+ */
+ static noinline_for_stack struct nfs4_file *
+ nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp)
+ {
+- unsigned int hashval = file_hashval(fhp);
++ struct inode *inode = d_inode(fhp->fh_dentry);
++ struct rhlist_head *tmp, *list;
+ struct nfs4_file *ret = NULL;
+ bool alias_found = false;
+ struct nfs4_file *fi;
++ int err;
+
+- spin_lock(&state_lock);
+- hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash,
+- lockdep_is_held(&state_lock)) {
++ rcu_read_lock();
++ spin_lock(&inode->i_lock);
++
++ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
++ nfs4_file_rhash_params);
++ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
+ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
+ if (refcount_inc_not_zero(&fi->fi_ref))
+ ret = fi;
+- } else if (d_inode(fhp->fh_dentry) == fi->fi_inode)
++ } else
+ fi->fi_aliased = alias_found = true;
+ }
+- if (likely(ret == NULL)) {
+- nfsd4_file_init(fhp, new);
+- hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]);
+- new->fi_aliased = alias_found;
+- ret = new;
+- }
+- spin_unlock(&state_lock);
++ if (ret)
++ goto out_unlock;
++
++ nfsd4_file_init(fhp, new);
++ err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist,
++ nfs4_file_rhash_params);
++ if (err)
++ goto out_unlock;
++
++ new->fi_aliased = alias_found;
++ ret = new;
++
++out_unlock:
++ spin_unlock(&inode->i_lock);
++ rcu_read_unlock();
+ return ret;
+ }
+
+ static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
+ {
+- hlist_del_rcu(&fi->fi_hash);
++ rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist,
++ nfs4_file_rhash_params);
+ }
+
+ /*
+@@ -5628,6 +5646,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ * If not found, create the nfs4_file struct
+ */
+ fp = nfsd4_file_hash_insert(open->op_file, current_fh);
++ if (unlikely(!fp))
++ return nfserr_jukebox;
+ if (fp != open->op_file) {
+ status = nfs4_check_deleg(cl, open, &dp);
+ if (status)
+@@ -8040,10 +8060,16 @@ nfs4_state_start(void)
+ {
+ int ret;
+
+- ret = nfsd4_create_callback_queue();
++ ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
+ if (ret)
+ return ret;
+
++ ret = nfsd4_create_callback_queue();
++ if (ret) {
++ rhltable_destroy(&nfs4_file_rhltable);
++ return ret;
++ }
++
+ set_max_delegations();
+ return 0;
+ }
+@@ -8074,6 +8100,7 @@ nfs4_state_shutdown_net(struct net *net)
+
+ nfsd4_client_tracking_exit(net);
+ nfs4_state_destroy_net(net);
++ rhltable_destroy(&nfs4_file_rhltable);
+ #ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_shutdown_umount(nn);
+ #endif
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index e2daef3cc0034..eadd7f465bf52 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -536,16 +536,13 @@ struct nfs4_clnt_odstate {
+ * inode can have multiple filehandles associated with it, so there is
+ * (potentially) a many to one relationship between this struct and struct
+ * inode.
+- *
+- * These are hashed by filehandle in the file_hashtbl, which is protected by
+- * the global state_lock spinlock.
+ */
+ struct nfs4_file {
+ refcount_t fi_ref;
+ struct inode * fi_inode;
+ bool fi_aliased;
+ spinlock_t fi_lock;
+- struct hlist_node fi_hash; /* hash on fi_fhandle */
++ struct rhlist_head fi_rlist;
+ struct list_head fi_stateids;
+ union {
+ struct list_head fi_delegations;
+--
+2.43.0
+
--- /dev/null
+From 7ed165404893b5b80bef1dd497273c3f0f1c2df7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 7 Jan 2023 10:15:35 -0500
+Subject: NFSD: Use set_bit(RQ_DROPME)
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5304930dbae82d259bcf7e5611db7c81e7a42eff ]
+
+The premise that "Once an svc thread is scheduled and executing an
+RPC, no other processes will touch svc_rqst::rq_flags" is false.
+svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd
+threads when determining which thread to wake up next.
+
+Fixes: 9315564747cb ("NFSD: Use only RQ_DROPME to signal the need to drop a reply")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsproc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index a5570cf75f3fd..9744443c39652 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+- __set_bit(RQ_DROPME, &rqstp->rq_flags);
++ set_bit(RQ_DROPME, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+@@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+- __set_bit(RQ_DROPME, &rqstp->rq_flags);
++ set_bit(RQ_DROPME, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 7f2f6a77c6c04e014ad7b29279a11a998aa426ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Nov 2022 17:18:35 +0800
+Subject: NFSD: Use struct_size() helper in alloc_session()
+
+From: Xiu Jianfeng <xiujianfeng@huawei.com>
+
+[ Upstream commit 85a0d0c9a58002ef7d1bf5e3ea630f4fbd42a4f0 ]
+
+Use struct_size() helper to simplify the code, no functional changes.
+
+Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 39b315e3471f5..524865c7211ef 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -1833,13 +1833,12 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
+ int numslots = fattrs->maxreqs;
+ int slotsize = slot_bytes(fattrs);
+ struct nfsd4_session *new;
+- int mem, i;
++ int i;
+
+- BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
+- + sizeof(struct nfsd4_session) > PAGE_SIZE);
+- mem = numslots * sizeof(struct nfsd4_slot *);
++ BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
++ > PAGE_SIZE);
+
+- new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
++ new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+ if (!new)
+ return NULL;
+ /* allocate each struct nfsd4_slot and data cache in one piece */
+--
+2.43.0
+
--- /dev/null
+From d6f30a383c330905761aaadde5894efe6572ae29 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: use (un)lock_inode instead of fh_(un)lock for file operations
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit bb4d53d66e4b8c8b8e5634802262e53851a2d2db ]
+
+When locking a file to access ACLs and xattrs etc, use explicit locking
+with inode_lock() instead of fh_lock(). This means that the calls to
+fh_fill_pre/post_attr() are also explicit which improves readability and
+allows us to place them only where they are needed. Only the xattr
+calls need pre/post information.
+
+When locking a file we don't need I_MUTEX_PARENT as the file is not a
+parent of anything, so we can use inode_lock() directly rather than the
+inode_lock_nested() call that fh_lock() uses.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs2acl.c | 6 +++---
+ fs/nfsd/nfs3acl.c | 4 ++--
+ fs/nfsd/nfs4state.c | 9 +++++----
+ fs/nfsd/vfs.c | 34 ++++++++++++++++++++--------------
+ 4 files changed, 30 insertions(+), 23 deletions(-)
+
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index efcd429b0f28e..87f224cd30a85 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -111,7 +111,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
+ if (error)
+ goto out_errno;
+
+- fh_lock(fh);
++ inode_lock(inode);
+
+ error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS,
+ argp->acl_access);
+@@ -122,7 +122,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
+ if (error)
+ goto out_drop_lock;
+
+- fh_unlock(fh);
++ inode_unlock(inode);
+
+ fh_drop_write(fh);
+
+@@ -136,7 +136,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
+ return rpc_success;
+
+ out_drop_lock:
+- fh_unlock(fh);
++ inode_unlock(inode);
+ fh_drop_write(fh);
+ out_errno:
+ resp->status = nfserrno(error);
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index 35b2ebda14dac..9446c67436649 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -101,7 +101,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
+ if (error)
+ goto out_errno;
+
+- fh_lock(fh);
++ inode_lock(inode);
+
+ error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS,
+ argp->acl_access);
+@@ -111,7 +111,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
+ argp->acl_default);
+
+ out_drop_lock:
+- fh_unlock(fh);
++ inode_unlock(inode);
+ fh_drop_write(fh);
+ out_errno:
+ resp->status = nfserrno(error);
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index f66fb39714893..66cf8217ebe57 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7415,21 +7415,22 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
+ {
+ struct nfsd_file *nf;
++ struct inode *inode;
+ __be32 err;
+
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
+ if (err)
+ return err;
+- fh_lock(fhp); /* to block new leases till after test_lock: */
+- err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode,
+- NFSD_MAY_READ));
++ inode = fhp->fh_dentry->d_inode;
++ inode_lock(inode); /* to block new leases till after test_lock: */
++ err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (err)
+ goto out;
+ lock->fl_file = nf->nf_file;
+ err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ lock->fl_file = NULL;
+ out:
+- fh_unlock(fhp);
++ inode_unlock(inode);
+ nfsd_file_put(nf);
+ return err;
+ }
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 7de76b37a9bc2..73a153be6a5ad 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -417,7 +417,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ return err;
+ }
+
+- fh_lock(fhp);
++ inode_lock(inode);
+ if (size_change) {
+ /*
+ * RFC5661, Section 18.30.4:
+@@ -465,7 +465,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ attr->na_aclerr = set_posix_acl(&init_user_ns,
+ inode, ACL_TYPE_DEFAULT,
+ attr->na_dpacl);
+- fh_unlock(fhp);
++ inode_unlock(inode);
+ if (size_change)
+ put_write_access(inode);
+ out:
+@@ -2156,13 +2156,16 @@ nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp,
+ return err;
+ }
+
+-/*
+- * Removexattr and setxattr need to call fh_lock to both lock the inode
+- * and set the change attribute. Since the top-level vfs_removexattr
+- * and vfs_setxattr calls already do their own inode_lock calls, call
+- * the _locked variant. Pass in a NULL pointer for delegated_inode,
+- * and let the client deal with NFS4ERR_DELAY (same as with e.g.
+- * setattr and remove).
++/**
++ * nfsd_removexattr - Remove an extended attribute
++ * @rqstp: RPC transaction being executed
++ * @fhp: NFS filehandle of object with xattr to remove
++ * @name: name of xattr to remove (NUL-terminate)
++ *
++ * Pass in a NULL pointer for delegated_inode, and let the client deal
++ * with NFS4ERR_DELAY (same as with e.g. setattr and remove).
++ *
++ * Returns nfs_ok on success, or an nfsstat in network byte order.
+ */
+ __be32
+ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
+@@ -2178,12 +2181,14 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
+ if (ret)
+ return nfserrno(ret);
+
+- fh_lock(fhp);
++ inode_lock(fhp->fh_dentry->d_inode);
++ fh_fill_pre_attrs(fhp);
+
+ ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry,
+ name, NULL);
+
+- fh_unlock(fhp);
++ fh_fill_post_attrs(fhp);
++ inode_unlock(fhp->fh_dentry->d_inode);
+ fh_drop_write(fhp);
+
+ return nfsd_xattr_errno(ret);
+@@ -2203,12 +2208,13 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
+ ret = fh_want_write(fhp);
+ if (ret)
+ return nfserrno(ret);
+- fh_lock(fhp);
++ inode_lock(fhp->fh_dentry->d_inode);
++ fh_fill_pre_attrs(fhp);
+
+ ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf,
+ len, flags, NULL);
+-
+- fh_unlock(fhp);
++ fh_fill_post_attrs(fhp);
++ inode_unlock(fhp->fh_dentry->d_inode);
+ fh_drop_write(fhp);
+
+ return nfsd_xattr_errno(ret);
+--
+2.43.0
+
--- /dev/null
+From 34046a6aaac6a344e35947a702b3675e6eeeb59a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:23:02 -0400
+Subject: NFSD: Use xdr_inline_decode() to decode NFSv3 symlinks
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c3d2a04f05c590303c125a176e6e43df4a436fdb ]
+
+Replace the check for buffer over/underflow with a helper that is
+commonly used for this purpose. The helper also sets xdr->nwords
+correctly after successfully linearizing the symlink argument into
+the stream's scratch buffer.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs3xdr.c | 14 +++-----------
+ 1 file changed, 3 insertions(+), 11 deletions(-)
+
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index 0293b8d65f10f..71e32cf288854 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -616,8 +616,6 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_symlinkargs *args = rqstp->rq_argp;
+ struct kvec *head = rqstp->rq_arg.head;
+- struct kvec *tail = rqstp->rq_arg.tail;
+- size_t remaining;
+
+ if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen))
+ return false;
+@@ -626,16 +624,10 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
+ return false;
+
+- /* request sanity */
+- remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
+- remaining -= xdr_stream_pos(xdr);
+- if (remaining < xdr_align_size(args->tlen))
+- return false;
+-
+- args->first.iov_base = xdr->p;
++ /* symlink_data */
+ args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
+-
+- return true;
++ args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
++ return args->first.iov_base != NULL;
+ }
+
+ bool
+--
+2.43.0
+
--- /dev/null
+From 0b5706d3971ef4d57172492e336edbc253561fae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:09:16 -0400
+Subject: NFSD: Use xdr_pad_size()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 5e64d85c7d0c59cfcd61d899720b8ccfe895d743 ]
+
+Clean up: Use a helper instead of open-coding the calculation of
+the XDR pad size.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4xdr.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 310321b9b94cd..88e8192f9a75d 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -3948,9 +3948,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ {
+ struct xdr_stream *xdr = resp->xdr;
+ unsigned int starting_len = xdr->buf->len;
++ __be32 zero = xdr_zero;
+ __be32 nfserr;
+- __be32 tmp;
+- int pad;
+
+ read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
+ if (read->rd_vlen < 0)
+@@ -3966,11 +3965,9 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ return nfserr_io;
+ xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount));
+
+- tmp = xdr_zero;
+- pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
+- write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &tmp, pad);
+- return 0;
+-
++ write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero,
++ xdr_pad_size(maxcount));
++ return nfs_ok;
+ }
+
+ static __be32
+--
+2.43.0
+
--- /dev/null
+From 74b092c9ad5f5a51972d7008b627a97d92c4b46a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 16:45:30 +1000
+Subject: NFSD: verify the opened dentry after setting a delegation
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 876c553cb41026cb6ad3cef970a35e5f69c42a25 ]
+
+Between opening a file and setting a delegation on it, someone could
+rename or unlink the dentry. If this happens, we do not want to grant a
+delegation on the open.
+
+On a CLAIM_NULL open, we're opening by filename, and we may (in the
+non-create case) or may not (in the create case) be holding i_rwsem
+when attempting to set a delegation. The latter case allows a
+race.
+
+After getting a lease, redo the lookup of the file being opened and
+validate that the resulting dentry matches the one in the open file
+description.
+
+To properly redo the lookup we need an rqst pointer to pass to
+nfsd_lookup_dentry(), so make sure that is available.
+
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 1 +
+ fs/nfsd/nfs4state.c | 54 ++++++++++++++++++++++++++++++++++++++++-----
+ fs/nfsd/xdr4.h | 1 +
+ 3 files changed, 51 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 46ec66f4ec9e7..ae0948271da9c 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -547,6 +547,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ open->op_openowner);
+
+ open->op_filp = NULL;
++ open->op_rqstp = rqstp;
+
+ /* This check required by spec. */
+ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 2b333f9259a03..7122ebc50a035 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -5305,11 +5305,44 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
+ return 0;
+ }
+
++/*
++ * It's possible that between opening the dentry and setting the delegation,
++ * that it has been renamed or unlinked. Redo the lookup to verify that this
++ * hasn't happened.
++ */
++static int
++nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
++ struct svc_fh *parent)
++{
++ struct svc_export *exp;
++ struct dentry *child;
++ __be32 err;
++
++ /* parent may already be locked, and it may get unlocked by
++ * this call, but that is safe.
++ */
++ err = nfsd_lookup_dentry(open->op_rqstp, parent,
++ open->op_fname, open->op_fnamelen,
++ &exp, &child);
++
++ if (err)
++ return -EAGAIN;
++
++ dput(child);
++ if (child != file_dentry(fp->fi_deleg_file->nf_file))
++ return -EAGAIN;
++
++ return 0;
++}
++
+ static struct nfs4_delegation *
+-nfs4_set_delegation(struct nfs4_client *clp,
+- struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
++nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
++ struct svc_fh *parent)
+ {
+ int status = 0;
++ struct nfs4_client *clp = stp->st_stid.sc_client;
++ struct nfs4_file *fp = stp->st_stid.sc_file;
++ struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
+ struct nfs4_delegation *dp;
+ struct nfsd_file *nf;
+ struct file_lock *fl;
+@@ -5364,6 +5397,13 @@ nfs4_set_delegation(struct nfs4_client *clp,
+ locks_free_lock(fl);
+ if (status)
+ goto out_clnt_odstate;
++
++ if (parent) {
++ status = nfsd4_verify_deleg_dentry(open, fp, parent);
++ if (status)
++ goto out_unlock;
++ }
++
+ status = nfsd4_check_conflicting_opens(clp, fp);
+ if (status)
+ goto out_unlock;
+@@ -5419,11 +5459,13 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
+ * proper support for them.
+ */
+ static void
+-nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
++nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
++ struct svc_fh *currentfh)
+ {
+ struct nfs4_delegation *dp;
+ struct nfs4_openowner *oo = openowner(stp->st_stateowner);
+ struct nfs4_client *clp = stp->st_stid.sc_client;
++ struct svc_fh *parent = NULL;
+ int cb_up;
+ int status = 0;
+
+@@ -5437,6 +5479,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
+ goto out_no_deleg;
+ break;
+ case NFS4_OPEN_CLAIM_NULL:
++ parent = currentfh;
++ fallthrough;
+ case NFS4_OPEN_CLAIM_FH:
+ /*
+ * Let's not give out any delegations till everyone's
+@@ -5451,7 +5495,7 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
+ default:
+ goto out_no_deleg;
+ }
+- dp = nfs4_set_delegation(clp, stp->st_stid.sc_file, stp->st_clnt_odstate);
++ dp = nfs4_set_delegation(open, stp, parent);
+ if (IS_ERR(dp))
+ goto out_no_deleg;
+
+@@ -5583,7 +5627,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
+ * Attempt to hand out a delegation. No error return, because the
+ * OPEN succeeds even if we fail.
+ */
+- nfs4_open_delegation(open, stp);
++ nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+ nodeleg:
+ status = nfs_ok;
+ trace_nfsd_open(&stp->st_stid.sc_stateid);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index adb9aef26d7f1..466e2786fc976 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -279,6 +279,7 @@ struct nfsd4_open {
+ struct nfs4_clnt_odstate *op_odstate; /* used during processing */
+ struct nfs4_acl *op_acl;
+ struct xdr_netobj op_label;
++ struct svc_rqst *op_rqstp;
+ };
+
+ struct nfsd4_open_confirm {
+--
+2.43.0
+
--- /dev/null
+From 311b115ce39429a073e0a757fde4992df6ebd90e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:25:04 -0400
+Subject: NFSD: WARN when freeing an item still linked via nf_lru
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 668ed92e651d3c25f9b6e8cb7ceca54d00daa96d ]
+
+Add a guardrail to prevent freeing memory that is still on a list.
+This includes either a dispose list or the LRU list.
+
+This is the sign of a bug, but this class of bugs can be detected
+so that they don't endanger system stability, especially while
+debugging.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index 60c51a4d8e0d7..d9b5f1e183976 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -213,6 +213,14 @@ nfsd_file_free(struct nfsd_file *nf)
+ fput(nf->nf_file);
+ flush = true;
+ }
++
++ /*
++ * If this item is still linked via nf_lru, that's a bug.
++ * WARN and leak it to preserve system stability.
++ */
++ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
++ return flush;
++
+ call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+ return flush;
+ }
+@@ -342,7 +350,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+- list_del(&nf->nf_lru);
++ list_del_init(&nf->nf_lru);
+ nfsd_file_flush(nf);
+ nfsd_file_put_noref(nf);
+ }
+@@ -356,7 +364,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+- list_del(&nf->nf_lru);
++ list_del_init(&nf->nf_lru);
+ nfsd_file_flush(nf);
+ if (!refcount_dec_and_test(&nf->nf_ref))
+ continue;
+--
+2.43.0
+
--- /dev/null
+From 084d8c2036e448da89c4a7eceb2205d22973260c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Dec 2021 10:26:18 -0500
+Subject: NFSD: Write verifier might go backwards
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit cdc556600c0133575487cc69fb3128440b3c3e92 ]
+
+When vfs_iter_write() starts to fail because a file system is full,
+a bunch of writes can fail at once with ENOSPC. These writes
+repeatedly invoke nfsd_reset_boot_verifier() in quick succession.
+
+Ensure that the time it grabs doesn't go backwards due to an ntp
+adjustment going on at the same time.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 8554bc7ff4322..4d1d8aa6d7f9d 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -363,7 +363,7 @@ void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn)
+
+ static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn)
+ {
+- ktime_get_real_ts64(&nn->nfssvc_boot);
++ ktime_get_raw_ts64(&nn->nfssvc_boot);
+ }
+
+ void nfsd_reset_boot_verifier(struct nfsd_net *nn)
+--
+2.43.0
+
--- /dev/null
+From fd356ea408e0a502c723308cc7a7e875778563c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Jul 2022 14:24:51 -0400
+Subject: NFSD: Zero counters when the filecache is re-initialized
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 8b330f78040cbe16cf8029df70391b2a491f17e2 ]
+
+If nfsd_file_cache_init() is called after a shutdown, be sure the
+stat counters are reset.
+
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/filecache.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
+index b9941d4ef20d6..60c51a4d8e0d7 100644
+--- a/fs/nfsd/filecache.c
++++ b/fs/nfsd/filecache.c
+@@ -823,6 +823,8 @@ nfsd_file_cache_shutdown_net(struct net *net)
+ void
+ nfsd_file_cache_shutdown(void)
+ {
++ int i;
++
+ set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
+
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+@@ -846,6 +848,15 @@ nfsd_file_cache_shutdown(void)
+ nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
++
++ for_each_possible_cpu(i) {
++ per_cpu(nfsd_file_cache_hits, i) = 0;
++ per_cpu(nfsd_file_acquisitions, i) = 0;
++ per_cpu(nfsd_file_releases, i) = 0;
++ per_cpu(nfsd_file_total_age, i) = 0;
++ per_cpu(nfsd_file_pages_flushed, i) = 0;
++ per_cpu(nfsd_file_evictions, i) = 0;
++ }
+ }
+
+ static bool
+--
+2.43.0
+
--- /dev/null
+From 05ffb4374be9e441ef1e7fda35ff9f1e4a3c8e10 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Dec 2021 09:49:39 +0300
+Subject: nfsd4: add refcount for nfsd4_blocked_lock
+
+From: Vasily Averin <vvs@virtuozzo.com>
+
+[ Upstream commit 47446d74f1707049067fee038507cdffda805631 ]
+
+nbl allocated in nfsd4_lock can be released by a several ways:
+directly in nfsd4_lock(), via nfs4_laundromat(), via another nfs
+command RELEASE_LOCKOWNER or via nfsd4_callback.
+This structure should be refcounted to be used and released correctly
+in all these cases.
+
+Refcount is initialized to 1 during allocation and is incremented
+when nbl is added into nbl_list/nbl_lru lists.
+
+Usually nbl is linked into both lists together, so only one refcount
+is used for both lists.
+
+However nfsd4_lock() should keep in mind that nbl can be present
+in one of lists only. This can happen if nbl was handled already
+by nfs4_laundromat/nfsd4_callback/etc.
+
+Refcount is decremented if vfs_lock_file() returns FILE_LOCK_DEFERRED,
+because nbl can be handled already by nfs4_laundromat/nfsd4_callback/etc.
+
+Refcount is not changed in find_blocked_lock() because of it reuses counter
+released after removing nbl from lists.
+
+Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4state.c | 25 ++++++++++++++++++++++---
+ fs/nfsd/state.h | 1 +
+ 2 files changed, 23 insertions(+), 3 deletions(-)
+
+diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
+index 5ee11f0e24d3b..fc0d7fbe5d4a6 100644
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -246,6 +246,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
+ list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
+ if (fh_match(fh, &cur->nbl_fh)) {
+ list_del_init(&cur->nbl_list);
++ WARN_ON(list_empty(&cur->nbl_lru));
+ list_del_init(&cur->nbl_lru);
+ found = cur;
+ break;
+@@ -271,6 +272,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
+ INIT_LIST_HEAD(&nbl->nbl_lru);
+ fh_copy_shallow(&nbl->nbl_fh, fh);
+ locks_init_lock(&nbl->nbl_lock);
++ kref_init(&nbl->nbl_kref);
+ nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
+ &nfsd4_cb_notify_lock_ops,
+ NFSPROC4_CLNT_CB_NOTIFY_LOCK);
+@@ -279,12 +281,21 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
+ return nbl;
+ }
+
++static void
++free_nbl(struct kref *kref)
++{
++ struct nfsd4_blocked_lock *nbl;
++
++ nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
++ kfree(nbl);
++}
++
+ static void
+ free_blocked_lock(struct nfsd4_blocked_lock *nbl)
+ {
+ locks_delete_block(&nbl->nbl_lock);
+ locks_release_private(&nbl->nbl_lock);
+- kfree(nbl);
++ kref_put(&nbl->nbl_kref, free_nbl);
+ }
+
+ static void
+@@ -302,6 +313,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
+ struct nfsd4_blocked_lock,
+ nbl_list);
+ list_del_init(&nbl->nbl_list);
++ WARN_ON(list_empty(&nbl->nbl_lru));
+ list_move(&nbl->nbl_lru, &reaplist);
+ }
+ spin_unlock(&nn->blocked_locks_lock);
+@@ -7019,6 +7031,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ spin_lock(&nn->blocked_locks_lock);
+ list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
+ list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
++ kref_get(&nbl->nbl_kref);
+ spin_unlock(&nn->blocked_locks_lock);
+ }
+
+@@ -7031,6 +7044,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ nn->somebody_reclaimed = true;
+ break;
+ case FILE_LOCK_DEFERRED:
++ kref_put(&nbl->nbl_kref, free_nbl);
+ nbl = NULL;
+ fallthrough;
+ case -EAGAIN: /* conflock holds conflicting lock */
+@@ -7051,8 +7065,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ /* dequeue it if we queued it before */
+ if (fl_flags & FL_SLEEP) {
+ spin_lock(&nn->blocked_locks_lock);
+- list_del_init(&nbl->nbl_list);
+- list_del_init(&nbl->nbl_lru);
++ if (!list_empty(&nbl->nbl_list) &&
++ !list_empty(&nbl->nbl_lru)) {
++ list_del_init(&nbl->nbl_list);
++ list_del_init(&nbl->nbl_lru);
++ kref_put(&nbl->nbl_kref, free_nbl);
++ }
++ /* nbl can use one of lists to be linked to reaplist */
+ spin_unlock(&nn->blocked_locks_lock);
+ }
+ free_blocked_lock(nbl);
+diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
+index 6eb3c7157214b..95457cfd37fc0 100644
+--- a/fs/nfsd/state.h
++++ b/fs/nfsd/state.h
+@@ -633,6 +633,7 @@ struct nfsd4_blocked_lock {
+ struct file_lock nbl_lock;
+ struct knfsd_fh nbl_fh;
+ struct nfsd4_callback nbl_cb;
++ struct kref nbl_kref;
+ };
+
+ struct nfsd4_compound_state;
+--
+2.43.0
+
--- /dev/null
+From 0c8dbd4cf80659d54b19a612cadb0ac7693d386a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Oct 2021 12:56:55 -0400
+Subject: nfsd4: remove obselete comment
+
+From: J. Bruce Fields <bfields@redhat.com>
+
+[ Upstream commit 80479eb862102f9513e93fcf726c78cc0be2e3b2 ]
+
+Mandatory locking has been removed. And the rest of this comment is
+redundant with the code.
+
+Reported-by: Jeff layton <jlayton@kernel.org>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index 59e30cff920ca..925aa08ca1075 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -744,9 +744,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ path.dentry = fhp->fh_dentry;
+ inode = d_inode(path.dentry);
+
+- /* Disallow write access to files with the append-only bit set
+- * or any access when mandatory locking enabled
+- */
+ err = nfserr_perm;
+ if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
+ goto out;
+--
+2.43.0
+
--- /dev/null
+From 39db3090791509dadb896ad8a1d1cebefed7c733 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 10 Sep 2022 22:14:02 +0100
+Subject: nfsd_splice_actor(): handle compound pages
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit bfbfb6182ad1d7d184b16f25165faad879147f79 ]
+
+pipe_buffer might refer to a compound page (and contain more than a PAGE_SIZE
+worth of data). Theoretically it had been possible since way back, but
+nfsd_splice_actor() hadn't run into that until copy_page_to_iter() change.
+Fortunately, the only thing that changes for compound pages is that we
+need to stuff each relevant subpage in and convert the offset into offset
+in the first subpage.
+
+Acked-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Benjamin Coddington <bcodding@redhat.com>
+Fixes: f0f6b614f83d "copy_page_to_iter(): don't split high-order page in case of ITER_PIPE"
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/vfs.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
+index ad689215b1f37..343af6341e5e1 100644
+--- a/fs/nfsd/vfs.c
++++ b/fs/nfsd/vfs.c
+@@ -846,10 +846,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+ {
+ struct svc_rqst *rqstp = sd->u.data;
+-
+- svc_rqst_replace_page(rqstp, buf->page);
+- if (rqstp->rq_res.page_len == 0)
+- rqstp->rq_res.page_base = buf->offset;
++ struct page *page = buf->page; // may be a compound one
++ unsigned offset = buf->offset;
++ int i;
++
++ page += offset / PAGE_SIZE;
++ for (i = sd->len; i > 0; i -= PAGE_SIZE)
++ svc_rqst_replace_page(rqstp, page++);
++ if (rqstp->rq_res.page_len == 0) // first call
++ rqstp->rq_res.page_base = offset % PAGE_SIZE;
+ rqstp->rq_res.page_len += sd->len;
+ return sd->len;
+ }
+--
+2.43.0
+
--- /dev/null
+From 2e278ff5605ce4406ac842570282f8b0738d6fbf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 09:40:06 -0400
+Subject: NLM: Defend against file_lock changes after vfs_test_lock()
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+[ Upstream commit 184cefbe62627730c30282df12bcff9aae4816ea ]
+
+Instead of trusting that struct file_lock returns completely unchanged
+after vfs_test_lock() when there's no conflicting lock, stash away our
+nlm_lockowner reference so we can properly release it for all cases.
+
+This defends against another file_lock implementation overwriting fl_owner
+when the return type is F_UNLCK.
+
+Reported-by: Roberto Bergantinos Corpas <rbergant@redhat.com>
+Tested-by: Roberto Bergantinos Corpas <rbergant@redhat.com>
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc4proc.c | 4 +++-
+ fs/lockd/svclock.c | 10 +---------
+ fs/lockd/svcproc.c | 5 ++++-
+ include/linux/lockd/lockd.h | 1 +
+ 4 files changed, 9 insertions(+), 11 deletions(-)
+
+diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
+index 1c9214801e69e..930e90f21b151 100644
+--- a/fs/lockd/svc4proc.c
++++ b/fs/lockd/svc4proc.c
+@@ -95,6 +95,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_host *host;
+ struct nlm_file *file;
++ struct nlm_lockowner *test_owner;
+ __be32 rc = rpc_success;
+
+ dprintk("lockd: TEST4 called\n");
+@@ -104,6 +105,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
+ if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
+ return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+
++ test_owner = argp->lock.fl.fl_owner;
+ /* Now check for conflicting locks */
+ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
+ if (resp->status == nlm_drop_reply)
+@@ -111,7 +113,7 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
+ else
+ dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
+
+- nlmsvc_release_lockowner(&argp->lock);
++ nlmsvc_put_lockowner(test_owner);
+ nlmsvc_release_host(host);
+ nlm_release_file(file);
+ return rc;
+diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
+index cb3658ab9b7ae..9c1aa75441e1c 100644
+--- a/fs/lockd/svclock.c
++++ b/fs/lockd/svclock.c
+@@ -340,7 +340,7 @@ nlmsvc_get_lockowner(struct nlm_lockowner *lockowner)
+ return lockowner;
+ }
+
+-static void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
++void nlmsvc_put_lockowner(struct nlm_lockowner *lockowner)
+ {
+ if (!refcount_dec_and_lock(&lockowner->count, &lockowner->host->h_lock))
+ return;
+@@ -590,7 +590,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
+ int error;
+ int mode;
+ __be32 ret;
+- struct nlm_lockowner *test_owner;
+
+ dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
+ nlmsvc_file_inode(file)->i_sb->s_id,
+@@ -604,9 +603,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
+ goto out;
+ }
+
+- /* If there's a conflicting lock, remember to clean up the test lock */
+- test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
+-
+ mode = lock_to_openmode(&lock->fl);
+ error = vfs_test_lock(file->f_file[mode], &lock->fl);
+ if (error) {
+@@ -635,10 +631,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
+ conflock->fl.fl_end = lock->fl.fl_end;
+ locks_release_private(&lock->fl);
+
+- /* Clean up the test lock */
+- lock->fl.fl_owner = NULL;
+- nlmsvc_put_lockowner(test_owner);
+-
+ ret = nlm_lck_denied;
+ out:
+ return ret;
+diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
+index 99696d3f6dd66..c215a4599d5c8 100644
+--- a/fs/lockd/svcproc.c
++++ b/fs/lockd/svcproc.c
+@@ -116,6 +116,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_host *host;
+ struct nlm_file *file;
++ struct nlm_lockowner *test_owner;
+ __be32 rc = rpc_success;
+
+ dprintk("lockd: TEST called\n");
+@@ -125,6 +126,8 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
+ if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
+ return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+
++ test_owner = argp->lock.fl.fl_owner;
++
+ /* Now check for conflicting locks */
+ resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
+ if (resp->status == nlm_drop_reply)
+@@ -133,7 +136,7 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
+ dprintk("lockd: TEST status %d vers %d\n",
+ ntohl(resp->status), rqstp->rq_vers);
+
+- nlmsvc_release_lockowner(&argp->lock);
++ nlmsvc_put_lockowner(test_owner);
+ nlmsvc_release_host(host);
+ nlm_release_file(file);
+ return rc;
+diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
+index fcef192e5e45e..70ce419e27093 100644
+--- a/include/linux/lockd/lockd.h
++++ b/include/linux/lockd/lockd.h
+@@ -292,6 +292,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
+ __be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
+ struct nlm_lock *);
+ void nlm_release_file(struct nlm_file *);
++void nlmsvc_put_lockowner(struct nlm_lockowner *);
+ void nlmsvc_release_lockowner(struct nlm_lock *);
+ void nlmsvc_mark_resources(struct net *);
+ void nlmsvc_free_host_resources(struct nlm_host *);
+--
+2.43.0
+
--- /dev/null
+From 7d36054d1a25cffb29b354219bc71a6a83e2a7b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Feb 2024 18:08:58 -0500
+Subject: orDate: Thu Sep 30 19:19:57 2021 -0400
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+NFSD: De-duplicate hash bucket indexing
+
+[ Upstream commit 378a6109dd142a678f629b740f558365150f60f9 ]
+
+Clean up: The details of finding the right hash bucket are exactly
+the same in both nfsd_cache_lookup() and nfsd_cache_update().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfscache.c | 22 ++++++++++------------
+ 1 file changed, 10 insertions(+), 12 deletions(-)
+
+diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
+index a838909502907..7880a2f2d29f6 100644
+--- a/fs/nfsd/nfscache.c
++++ b/fs/nfsd/nfscache.c
+@@ -84,12 +84,6 @@ nfsd_hashsize(unsigned int limit)
+ return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
+ }
+
+-static u32
+-nfsd_cache_hash(__be32 xid, struct nfsd_net *nn)
+-{
+- return hash_32((__force u32)xid, nn->maskbits);
+-}
+-
+ static struct svc_cacherep *
+ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
+@@ -241,6 +235,14 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+ list_move_tail(&rp->c_lru, &b->lru_head);
+ }
+
++static noinline struct nfsd_drc_bucket *
++nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
++{
++ unsigned int hash = hash_32((__force u32)xid, nn->maskbits);
++
++ return &nn->drc_hashtbl[hash];
++}
++
+ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
+ unsigned int max)
+ {
+@@ -421,10 +423,8 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
+ {
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct svc_cacherep *rp, *found;
+- __be32 xid = rqstp->rq_xid;
+ __wsum csum;
+- u32 hash = nfsd_cache_hash(xid, nn);
+- struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash];
++ struct nfsd_drc_bucket *b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
+ int type = rqstp->rq_cachetype;
+ int rtn = RC_DOIT;
+
+@@ -528,7 +528,6 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct svc_cacherep *rp = rqstp->rq_cacherep;
+ struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
+- u32 hash;
+ struct nfsd_drc_bucket *b;
+ int len;
+ size_t bufsize = 0;
+@@ -536,8 +535,7 @@ void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+ if (!rp)
+ return;
+
+- hash = nfsd_cache_hash(rp->c_key.k_xid, nn);
+- b = &nn->drc_hashtbl[hash];
++ b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn);
+
+ len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
+ len >>= 2;
+--
+2.43.0
+
--- /dev/null
+From bac47e39b0bb56c4a15d8b93e6079da281ad85b3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 Jan 2023 12:43:37 -0500
+Subject: Revert "SUNRPC: Use RMW bitops in single-threaded hot paths"
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 7827c81f0248e3c2f40d438b020f3d222f002171 ]
+
+The premise that "Once an svc thread is scheduled and executing an
+RPC, no other processes will touch svc_rqst::rq_flags" is false.
+svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd
+threads when determining which thread to wake up next.
+
+Found via KCSAN.
+
+Fixes: 28df0988815f ("SUNRPC: Use RMW bitops in single-threaded hot paths")
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 7 +++----
+ fs/nfsd/nfs4xdr.c | 2 +-
+ net/sunrpc/auth_gss/svcauth_gss.c | 4 ++--
+ net/sunrpc/svc.c | 6 +++---
+ net/sunrpc/svc_xprt.c | 2 +-
+ net/sunrpc/svcsock.c | 8 ++++----
+ net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
+ 7 files changed, 15 insertions(+), 16 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index eeff0ba0be558..5ea71af276c7b 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ * the client wants us to do more in this compound:
+ */
+ if (!nfsd4_last_compound_op(rqstp))
+- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
+ /* check stateid */
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+@@ -2610,12 +2610,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ cstate->minorversion = args->minorversion;
+ fh_init(current_fh, NFS4_FHSIZE);
+ fh_init(save_fh, NFS4_FHSIZE);
+-
+ /*
+ * Don't use the deferral mechanism for NFSv4; compounds make it
+ * too hard to avoid non-idempotency problems.
+ */
+- __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
++ clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+
+ /*
+ * According to RFC3010, this takes precedence over all other errors.
+@@ -2737,7 +2736,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ out:
+ cstate->status = status;
+ /* Reset deferral mechanism for RPC deferrals */
+- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
++ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 30e085f1e4797..9c9ff3bdc62a9 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2526,7 +2526,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
+
+ if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
+- __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
++ clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+
+ return true;
+ }
+diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
+index 2381c5d1b0710..48b608cb5f5ec 100644
+--- a/net/sunrpc/auth_gss/svcauth_gss.c
++++ b/net/sunrpc/auth_gss/svcauth_gss.c
+@@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
+ * rejecting the server-computed MIC in this somewhat rare case,
+ * do not use splice with the GSS integrity service.
+ */
+- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
+ /* Did we already verify the signature on the original pass through? */
+ if (rqstp->rq_deferred)
+@@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
+ int pad, remaining_len, offset;
+ u32 rseqno;
+
+- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
+ priv_len = svc_getnl(&buf->head[0]);
+ if (rqstp->rq_deferred) {
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 86f00019d0ebb..9177b243a949d 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -1281,10 +1281,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
+ goto err_short_len;
+
+ /* Will be turned off by GSS integrity and privacy services */
+- __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ /* Will be turned off only when NFSv4 Sessions are used */
+- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+- __clear_bit(RQ_DROPME, &rqstp->rq_flags);
++ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
++ clear_bit(RQ_DROPME, &rqstp->rq_flags);
+
+ svc_putu32(resv, rqstp->rq_xid);
+
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 39acef5134f5c..67ccf1a6459ae 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -1251,7 +1251,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
+ trace_svc_defer(rqstp);
+ svc_xprt_get(rqstp->rq_xprt);
+ dr->xprt = rqstp->rq_xprt;
+- __set_bit(RQ_DROPME, &rqstp->rq_flags);
++ set_bit(RQ_DROPME, &rqstp->rq_flags);
+
+ dr->handle.revisit = svc_revisit;
+ return &dr->handle;
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index 46cea0e413aeb..be7081284a098 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
+ static void svc_sock_secure_port(struct svc_rqst *rqstp)
+ {
+ if (svc_port_is_privileged(svc_addr(rqstp)))
+- __set_bit(RQ_SECURE, &rqstp->rq_flags);
++ set_bit(RQ_SECURE, &rqstp->rq_flags);
+ else
+- __clear_bit(RQ_SECURE, &rqstp->rq_flags);
++ clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ }
+
+ /*
+@@ -1005,9 +1005,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
+ rqstp->rq_xprt_ctxt = NULL;
+ rqstp->rq_prot = IPPROTO_TCP;
+ if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
+- __set_bit(RQ_LOCAL, &rqstp->rq_flags);
++ set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ else
+- __clear_bit(RQ_LOCAL, &rqstp->rq_flags);
++ clear_bit(RQ_LOCAL, &rqstp->rq_flags);
+
+ p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
+ calldir = p[1];
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+index ac147304fb0e9..f776f0cb471f0 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
+
+ static void svc_rdma_secure_port(struct svc_rqst *rqstp)
+ {
+- __set_bit(RQ_SECURE, &rqstp->rq_flags);
++ set_bit(RQ_SECURE, &rqstp->rq_flags);
+ }
+
+ static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
+--
+2.43.0
+
x86-alternatives-introduce-int3_emulate_jcc.patch
x86-alternatives-teach-text_poke_bp-to-patch-jcc.d32-instructions.patch
x86-static_call-add-support-for-jcc-tail-calls.patch
+fsnotify-pass-data_type-to-fsnotify_name.patch
+fsnotify-pass-dentry-instead-of-inode-data.patch
+fsnotify-clarify-contract-for-create-event-hooks.patch
+fsnotify-don-t-insert-unmergeable-events-in-hashtabl.patch
+fanotify-fold-event-size-calculation-to-its-own-func.patch
+fanotify-split-fsid-check-from-other-fid-mode-checks.patch
+inotify-don-t-force-fs_in_ignored.patch
+fsnotify-add-helper-to-detect-overflow_event.patch
+fsnotify-add-wrapper-around-fsnotify_add_event.patch
+fsnotify-retrieve-super-block-from-the-data-field.patch
+fsnotify-protect-fsnotify_handle_inode_event-from-no.patch
+fsnotify-pass-group-argument-to-free_event.patch
+fanotify-support-null-inode-event-in-fanotify_dfid_i.patch
+fanotify-allow-file-handle-encoding-for-unhashed-eve.patch
+fanotify-encode-empty-file-handle-when-no-inode-is-p.patch
+fanotify-require-fid_mode-for-any-non-fd-event.patch
+fsnotify-support-fs_error-event-type.patch
+fanotify-reserve-uapi-bits-for-fan_fs_error.patch
+fanotify-pre-allocate-pool-of-error-events.patch
+fanotify-support-enqueueing-of-error-events.patch
+fanotify-support-merging-of-error-events.patch
+fanotify-wrap-object_fh-inline-space-in-a-creator-ma.patch
+fanotify-add-helpers-to-decide-whether-to-report-fid.patch
+fanotify-warn_on-against-too-large-file-handles.patch
+fanotify-report-fid-info-for-file-related-file-syste.patch
+fanotify-emit-generic-error-info-for-error-event.patch
+fanotify-allow-users-to-request-fan_fs_error-events.patch
+ext4-send-notifications-on-error.patch
+docs-document-the-fan_fs_error-event.patch
+nfs-remove-unnecessary-trace_define_enum-s.patch
+sunrpc-tracepoints-should-display-tk_pid-and-cl_clid.patch
+nfs-move-generic-fs-show-macros-to-global-header.patch
+nfs-move-nfs-protocol-display-macros-to-global-heade.patch
+nfsd-optimize-drc-bucket-pruning.patch
+nfsd-move-filehandle-format-declarations-out-of-uapi.patch
+nfsd-drop-support-for-ancient-filehandles.patch
+nfsd-simplify-struct-nfsfh.patch
+nfsd-initialize-pointer-ni-with-null-and-not-plain-i.patch
+sunrpc-replace-the-__be32-p-parameter-to-.pc_decode.patch
+sunrpc-change-return-value-type-of-.pc_decode.patch
+nfsd-save-location-of-nfsv4-compound-status.patch
+sunrpc-replace-the-__be32-p-parameter-to-.pc_encode.patch
+sunrpc-change-return-value-type-of-.pc_encode.patch
+nfsd-update-create-verifier-comment.patch
+nfsd-fix-boolreturn.cocci-warning.patch
+nfsd4-remove-obselete-comment.patch
+ext4-fix-error-code-saved-on-super-block-during-file.patch
+fsnotify-clarify-object-type-argument.patch
+fsnotify-separate-mark-iterator-type-from-object-typ.patch
+fanotify-introduce-group-flag-fan_report_target_fid.patch
+fsnotify-generate-fs_rename-event-with-rich-informat.patch
+fanotify-use-macros-to-get-the-offset-to-fanotify_in.patch
+fanotify-use-helpers-to-parcel-fanotify_info-buffer.patch
+fanotify-support-secondary-dir-fh-and-name-in-fanoti.patch
+fanotify-record-old-and-new-parent-and-name-in-fan_r.patch
+fanotify-record-either-old-name-new-name-or-both-for.patch
+fanotify-report-old-and-or-new-parent-name-in-fan_re.patch
+fanotify-wire-up-fan_rename-event.patch
+exit-implement-kthread_exit.patch
+exit-rename-module_put_and_exit-to-module_put_and_kt.patch
+nfsd-handle-errors-better-in-write_ports_addfd.patch
+sunrpc-change-svc_get-to-return-the-svc.patch
+sunrpc-nfsd-clean-up-get-put-functions.patch
+sunrpc-stop-using-sv_nrthreads-as-a-refcount.patch
+nfsd-make-nfsd_stats.th_cnt-atomic_t.patch
+sunrpc-use-sv_lock-to-protect-updates-to-sv_nrthread.patch
+nfsd-narrow-nfsd_mutex-protection-in-nfsd-thread.patch
+nfsd-make-it-possible-to-use-svc_set_num_threads_syn.patch
+sunrpc-discard-svo_setup-and-rename-svc_set_num_thre.patch
+nfsd-simplify-locking-for-network-notifier.patch
+lockd-introduce-nlmsvc_serv.patch
+lockd-simplify-management-of-network-status-notifier.patch
+lockd-move-lockd_start_svc-call-into-lockd_create_sv.patch
+lockd-move-svc_exit_thread-into-the-thread.patch
+lockd-introduce-lockd_put.patch
+lockd-rename-lockd_create_svc-to-lockd_get.patch
+sunrpc-move-the-pool_map-definitions-back-into-svc.c.patch
+sunrpc-always-treat-sv_nrpools-1-as-not-pooled.patch
+lockd-use-svc_set_num_threads-for-thread-start-and-s.patch
+nfs-switch-the-callback-service-back-to-non-pooled.patch
+nfsd-remove-be32_to_cpu-from-drc-hash-function.patch
+nfsd-fix-inconsistent-indenting.patch
+nfsd-simplify-per-net-file-cache-management.patch
+nfsd-combine-xdr-error-tracepoints.patch
+nfsd-improve-stateid-access-bitmask-documentation.patch
+nfsd-de-duplicate-nfsd4_decode_bitmap4.patch
+nfs-block-notification-on-fs-with-its-own-lock.patch
+nfsd4-add-refcount-for-nfsd4_blocked_lock.patch
+nfsd-map-ebadf.patch
+nfsd-add-errno-mapping-for-eremoteio.patch
+nfsd-retry-once-in-nfsd_open-on-an-eopenstale-return.patch
+nfsd-clean-up-nfsd_vfs_write.patch
+nfsd-de-duplicate-net_generic-svc_net-rqstp-nfsd_net.patch
+nfsd-add-a-tracepoint-for-errors-in-nfsd4_clone_file.patch
+nfsd-write-verifier-might-go-backwards.patch
+nfsd-clean-up-the-nfsd_net-nfssvc_boot-field.patch
+nfsd-rename-boot-verifier-functions.patch
+nfsd-trace-boot-verifier-resets.patch
+nfsd-move-fill_pre_wcc-and-fill_post_wcc.patch
+fsnotify-invalidate-dcache-before-in_delete-event.patch
+nfsd-deprecate-nfs_offset_max.patch
+nfsd-add-support-for-the-birth-time-attribute.patch
+ordate-thu-sep-30-19-19-57-2021-0400.patch
+nfsd-skip-extra-computation-for-rc_nocache-case.patch
+nfsd-streamline-the-rare-found-case.patch
+nfsd-remove-nfsd_proc_args_-macros.patch
+sunrpc-remove-the-.svo_enqueue_xprt-method.patch
+sunrpc-merge-svc_do_enqueue_xprt-into-svc_enqueue_xp.patch
+sunrpc-remove-svo_shutdown-method.patch
+sunrpc-rename-svc_create_xprt.patch
+sunrpc-rename-svc_close_xprt.patch
+sunrpc-remove-svc_shutdown_net.patch
+nfsd-remove-svc_serv_ops-svo_module.patch
+nfsd-move-svc_serv_ops-svo_function-into-struct-svc_.patch
+nfsd-remove-config_nfsd_v3.patch
+nfsd-clean-up-_lm_-operation-names.patch
+nfsd-fix-using-the-correct-variable-for-sizeof.patch
+fsnotify-fix-merge-with-parent-s-ignored-mask.patch
+fsnotify-optimize-fs_modify-events-with-no-ignored-m.patch
+fsnotify-remove-redundant-parameter-judgment.patch
+nfsd-fix-a-write-performance-regression.patch
+nfsd-clean-up-nfsd_file_put.patch
+fanotify-do-not-allow-setting-dirent-events-in-mask-.patch
+fs-lock-documentation-cleanup.-replace-inode-i_lock-.patch
+inotify-move-control-flags-from-mask-to-mark-flags.patch
+fsnotify-pass-flags-argument-to-fsnotify_alloc_group.patch
+fsnotify-make-allow_dups-a-property-of-the-group.patch
+fsnotify-create-helpers-for-group-mark_mutex-lock.patch
+inotify-use-fsnotify-group-lock-helpers.patch
+nfsd-use-fsnotify-group-lock-helpers.patch
+dnotify-use-fsnotify-group-lock-helpers.patch
+fsnotify-allow-adding-an-inode-mark-without-pinning-.patch
+fanotify-create-helper-fanotify_mark_user_flags.patch
+fanotify-factor-out-helper-fanotify_mark_update_flag.patch
+fanotify-implement-evictable-inode-marks.patch
+fanotify-use-fsnotify-group-lock-helpers.patch
+fanotify-enable-evictable-inode-marks.patch
+fsnotify-introduce-mark-type-iterator.patch
+fsnotify-consistent-behavior-for-parent-not-watching.patch
+fanotify-fix-incorrect-fmode_t-casts.patch
+nfsd-clean-up-nfsd_splice_actor.patch
+nfsd-add-courteous-server-support-for-thread-with-on.patch
+nfsd-add-support-for-share-reservation-conflict-to-c.patch
+nfsd-move-create-destroy-of-laundry_wq-to-init_nfsd-.patch
+fs-lock-add-helper-locks_owner_has_blockers-to-check.patch
+fs-lock-add-2-callbacks-to-lock_manager_operations-t.patch
+nfsd-add-support-for-lock-conflict-to-courteous-serv.patch
+nfsd-show-state-of-courtesy-client-in-client-info.patch
+nfsd-clean-up-nfsd3_proc_create.patch
+nfsd-avoid-calling-fh_drop_write-twice-in-do_nfsd_cr.patch
+nfsd-refactor-nfsd_create_setattr.patch
+nfsd-refactor-nfsv3-create.patch
+nfsd-refactor-nfsv4-open-create.patch
+nfsd-remove-do_nfsd_create.patch
+nfsd-clean-up-nfsd_open_verified.patch
+nfsd-instantiate-a-struct-file-when-creating-a-regul.patch
+nfsd-remove-dprintk-call-sites-from-tail-of-nfsd4_op.patch
+nfsd-fix-whitespace.patch
+nfsd-move-documenting-comment-for-nfsd4_process_open.patch
+nfsd-trace-filecache-opens.patch
+sunrpc-use-rmw-bitops-in-single-threaded-hot-paths.patch
+nfsd-unregister-the-cld-notifier-when-laundry_wq-cre.patch
+nfsd-fix-null-ptr-deref-in-nfsd_fill_super.patch
+nfsd-modernize-nfsd4_release_lockowner.patch
+nfsd-add-documenting-comment-for-nfsd4_release_locko.patch
+nfsd-nfsd_file_put-can-sleep.patch
+nfsd-fix-potential-use-after-free-in-nfsd_file_put.patch
+nfs-restore-module-put-when-manager-exits.patch
+fanotify-refine-the-validation-checks-on-non-dir-ino.patch
+nfsd-decode-nfsv4-birth-time-attribute.patch
+fs-inotify-fix-typo-in-inotify-comment.patch
+fanotify-prepare-for-setting-event-flags-in-ignore-m.patch
+fanotify-cleanups-for-fanotify_mark-input-validation.patch
+fanotify-introduce-fan_mark_ignore.patch
+fsnotify-fix-comment-typo.patch
+nlm-defend-against-file_lock-changes-after-vfs_test_.patch
+nfsd-instrument-fh_verify.patch
+nfsd-fix-space-and-spelling-mistake.patch
+nfsd-remove-redundant-assignment-to-variable-len.patch
+nfsd-demote-a-warn-to-a-pr_warn.patch
+nfsd-report-filecache-lru-size.patch
+nfsd-report-count-of-calls-to-nfsd_file_acquire.patch
+nfsd-report-count-of-freed-filecache-items.patch
+nfsd-report-average-age-of-filecache-items.patch
+nfsd-add-nfsd_file_lru_dispose_list-helper.patch
+nfsd-refactor-nfsd_file_gc.patch
+nfsd-refactor-nfsd_file_lru_scan.patch
+nfsd-report-the-number-of-items-evicted-by-the-lru-w.patch
+nfsd-record-number-of-flush-calls.patch
+nfsd-zero-counters-when-the-filecache-is-re-initiali.patch
+nfsd-hook-up-the-filecache-stat-file.patch
+nfsd-warn-when-freeing-an-item-still-linked-via-nf_l.patch
+nfsd-trace-filecache-lru-activity.patch
+nfsd-leave-open-files-out-of-the-filecache-lru.patch
+nfsd-fix-the-filecache-lru-shrinker.patch
+nfsd-never-call-nfsd_file_gc-in-foreground-paths.patch
+nfsd-no-longer-record-nf_hashval-in-the-trace-log.patch
+nfsd-remove-lockdep-assertion-from-unhash_and_releas.patch
+nfsd-nfsd_file_unhash-can-compute-hashval-from-nf-nf.patch
+nfsd-refactor-__nfsd_file_close_inode.patch
+nfsd-nfsd_file_hash_remove-can-compute-hashval.patch
+nfsd-remove-nfsd_file-nf_hashval.patch
+nfsd-replace-the-init-once-mechanism.patch
+nfsd-set-up-an-rhashtable-for-the-filecache.patch
+nfsd-convert-the-filecache-to-use-rhashtable.patch
+nfsd-clean-up-unused-code-after-rhashtable-conversio.patch
+nfsd-separate-tracepoints-for-acquire-and-create.patch
+nfsd-move-nfsd_file_trace_alloc-tracepoint.patch
+nfsd-nfsv4-close-should-release-an-nfsd_file-immedia.patch
+nfsd-ensure-nf_inode-is-never-dereferenced.patch
+nfsd-refactoring-v4-specific-code-to-a-helper-in-nfs.patch
+nfsd-keep-track-of-the-number-of-v4-clients-in-the-s.patch
+nfsd-limit-the-number-of-v4-clients-to-1024-per-1gb-.patch
+nfsd-silence-extraneous-printk-on-nfsd.ko-insertion.patch
+nfsd-optimize-nfsd4_encode_operation.patch
+nfsd-optimize-nfsd4_encode_fattr.patch
+nfsd-clean-up-splice_ok-in-nfsd4_encode_read.patch
+nfsd-add-an-nfsd4_read-rd_eof-field.patch
+nfsd-optimize-nfsd4_encode_readv.patch
+nfsd-simplify-starting_len.patch
+nfsd-use-xdr_pad_size.patch
+nfsd-clean-up-nfsd4_encode_readlink.patch
+nfsd-fix-strncpy-fortify-warning.patch
+nfsd-nfserrno-enomem-is-nfserr_jukebox.patch
+nfsd-shrink-size-of-struct-nfsd4_copy_notify.patch
+nfsd-shrink-size-of-struct-nfsd4_copy.patch
+nfsd-reorder-the-fields-in-struct-nfsd4_op.patch
+nfsd-make-nfs4_put_copy-static.patch
+nfsd-replace-boolean-fields-in-struct-nfsd4_copy.patch
+nfsd-refactor-nfsd4_cleanup_inter_ssc-1-2.patch
+nfsd-refactor-nfsd4_cleanup_inter_ssc-2-2.patch
+nfsd-refactor-nfsd4_do_copy.patch
+nfsd-remove-kmalloc-from-nfsd4_do_async_copy.patch
+nfsd-add-nfsd4_send_cb_offload.patch
+nfsd-move-copy-offload-callback-arguments-into-a-sep.patch
+nfsd-drop-fh-argument-from-alloc_init_deleg.patch
+nfsd-verify-the-opened-dentry-after-setting-a-delega.patch
+nfsd-introduce-struct-nfsd_attrs.patch
+nfsd-set-attributes-when-creating-symlinks.patch
+nfsd-add-security-label-to-struct-nfsd_attrs.patch
+nfsd-add-posix-acls-to-struct-nfsd_attrs.patch
+nfsd-change-nfsd_create-nfsd_symlink-to-unlock-direc.patch
+nfsd-always-drop-directory-lock-in-nfsd_unlink.patch
+nfsd-only-call-fh_unlock-once-in-nfsd_link.patch
+nfsd-reduce-locking-in-nfsd_lookup.patch
+nfsd-use-explicit-lock-unlock-for-directory-ops.patch
+nfsd-use-un-lock_inode-instead-of-fh_-un-lock-for-fi.patch
+nfsd-discard-fh_locked-flag-and-fh_lock-fh_unlock.patch
+nfsd-fix-regression-with-setting-acls.patch
+nfsd_splice_actor-handle-compound-pages.patch
+nfsd-move-from-strlcpy-with-unused-retval-to-strscpy.patch
+lockd-move-from-strlcpy-with-unused-retval-to-strscp.patch
+nfsd-enforce-filehandle-check-for-source-file-in-cop.patch
+nfsd-remove-redundant-variable-status.patch
+nfsd-avoid-some-useless-tests.patch
+nfsd-propagate-some-error-code-returned-by-memdup_us.patch
+nfsd-increase-nfsd_max_ops_per_compound.patch
+nfsd-drop-fname-and-flen-args-from-nfsd_create_locke.patch
+nfsd-clean-up-mounted_on_fileid-handling.patch
+nfsd-remove-nfsd4_prepare_cb_recall-declaration.patch
+nfsd-replace-dprintk-call-site-in-fh_verify.patch
+nfsd-trace-nfsv4-compound-tags.patch
+nfsd-add-tracepoints-to-report-nfsv4-callback-comple.patch
+nfsd-add-a-mechanism-to-wait-for-a-delegreturn.patch
+nfsd-refactor-nfsd_setattr.patch
+nfsd-make-nfsd4_setattr-wait-before-returning-nfs4er.patch
+nfsd-make-nfsd4_rename-wait-before-returning-nfs4err.patch
+nfsd-make-nfsd4_remove-wait-before-returning-nfs4err.patch
+nfsd-keep-track-of-the-number-of-courtesy-clients-in.patch
+nfsd-add-shrinker-to-reap-courtesy-clients-on-low-me.patch
+sunrpc-parametrize-how-much-of-argsize-should-be-zer.patch
+nfsd-reduce-amount-of-struct-nfsd4_compoundargs-that.patch
+nfsd-refactor-common-code-out-of-dirlist-helpers.patch
+nfsd-use-xdr_inline_decode-to-decode-nfsv3-symlinks.patch
+nfsd-clean-up-write-arg-decoders.patch
+nfsd-clean-up-nfs4svc_encode_compoundres.patch
+nfsd-remove-inline-directives-on-op_rsize_bop-helper.patch
+nfsd-remove-unused-nfsd4_compoundargs-cachetype-fiel.patch
+nfsd-pack-struct-nfsd4_compoundres.patch
+nfsd-use-define_proc_show_attribute-to-define-nfsd_p.patch
+nfsd-use-define_show_attribute-to-define-export_feat.patch
+nfsd-use-define_show_attribute-to-define-client_info.patch
+nfsd-use-define_show_attribute-to-define-nfsd_reply_.patch
+nfsd-use-define_show_attribute-to-define-nfsd_file_c.patch
+nfsd-rename-the-fields-in-copy_stateid_t.patch
+nfsd-cap-rsize_bop-result-based-on-send-buffer-size.patch
+nfsd-only-fill-out-return-pointer-on-success-in-nfsd.patch
+nfsd-fix-comments-about-spinlock-handling-with-deleg.patch
+nfsd-make-nfsd4_run_cb-a-bool-return-function.patch
+nfsd-extra-checks-when-freeing-delegation-stateids.patch
+fs-notify-constify-path.patch
+fsnotify-remove-unused-declaration.patch
+fanotify-remove-obsoleted-fanotify_event_has_path.patch
+nfsd-fix-nfsd_file_unhash_and_dispose.patch
+nfsd-rework-hashtable-handling-in-nfsd_do_file_acqui.patch
+nfsd-unregister-shrinker-when-nfsd_init_net-fails.patch
+nfsd-ensure-we-always-call-fh_verify_error-tracepoin.patch
+nfsd-fix-net-namespace-logic-in-__nfsd_file_cache_pu.patch
+nfsd-fix-use-after-free-in-nfsd_file_do_acquire-trac.patch
+nfsd-put-the-export-reference-in-nfsd4_verify_deleg_.patch
+nfsd-fix-trace_nfsd_fh_verify_err-crasher.patch
+nfsd-fix-reads-with-a-non-zero-offset-that-don-t-end.patch
+lockd-use-locks_inode_context-helper.patch
+nfsd-use-locks_inode_context-helper.patch
+nfsd-simplify-read_plus.patch
+nfsd-remove-redundant-assignment-to-variable-host_er.patch
+nfsd-finish-converting-the-nfsv3-getacl-result-encod.patch
+nfsd-ignore-requests-to-disable-unsupported-versions.patch
+nfsd-move-nfserrno-to-vfs.c.patch
+nfsd-allow-disabling-nfsv2-at-compile-time.patch
+exportfs-use-pr_debug-for-unreachable-debug-statemen.patch
+nfsd-pass-the-target-nfsd_file-to-nfsd_commit.patch
+nfsd-revert-nfsd-nfsv4-close-should-release-an-nfsd_.patch
+nfsd-add-an-nfsd_file_gc-flag-to-enable-nfsd_file-ga.patch
+nfsd-flesh-out-a-documenting-comment-for-filecache.c.patch
+nfsd-clean-up-nfs4_preprocess_stateid_op-call-sites.patch
+nfsd-trace-stateids-returned-via-delegreturn.patch
+nfsd-trace-delegation-revocations.patch
+nfsd-use-const-pointers-as-parameters-to-fh_-helpers.patch
+nfsd-update-file_hashtbl-helpers.patch
+nfsd-clean-up-nfsd4_init_file.patch
+nfsd-add-a-nfsd4_file_hash_remove-helper.patch
+nfsd-clean-up-find_or_add_file.patch
+nfsd-refactor-find_file.patch
+nfsd-use-rhashtable-for-managing-nfs4_file-objects.patch
+nfsd-fix-licensing-header-in-filecache.c.patch
+nfsd-remove-the-pages_flushed-statistic-from-filecac.patch
+nfsd-reorganize-filecache.c.patch
+filelock-add-a-new-locks_inode_context-accessor-func.patch
+nfsd-fix-up-the-filecache-laundrette-scheduling.patch
+nfsd-add-an-nfsd_file_fsync-tracepoint.patch
+nfsd-return-error-if-nfs4_setacl-fails.patch
+nfsd-use-struct_size-helper-in-alloc_session.patch
+lockd-set-missing-fl_flags-field-when-retrieving-arg.patch
+lockd-ensure-we-use-the-correct-file-descriptor-when.patch
+lockd-fix-file-selection-in-nlmsvc_cancel_blocked.patch
+trace-relocate-event-helper-files.patch
+nfsd-refactoring-courtesy_client_reaper-to-a-generic.patch
+nfsd-add-support-for-sending-cb_recall_any.patch
+nfsd-add-delegation-reaper-to-react-to-low-memory-co.patch
+nfsd-add-cb_recall_any-tracepoints.patch
+nfsd-use-only-rq_dropme-to-signal-the-need-to-drop-a.patch
+nfsd-avoid-clashing-function-prototypes.patch
+nfsd-rework-refcounting-in-filecache.patch
+nfsd-fix-handling-of-cached-open-files-in-nfsd4_open.patch
+revert-sunrpc-use-rmw-bitops-in-single-threaded-hot-.patch
+nfsd-use-set_bit-rq_dropme.patch
+nfsd-register-unregister-of-nfsd-client-shrinker-at-.patch
+nfsd-replace-delayed_work-with-work_struct-for-nfsd_.patch
+nfsd-don-t-free-files-unconditionally-in-__nfsd_file.patch
+nfsd-don-t-destroy-global-nfs4_file-table-in-per-net.patch
+nfsd-enhance-inter-server-copy-cleanup.patch
+nfsd-allow-nfsd_file_get-to-sanely-handle-a-null-poi.patch
+nfsd-clean-up-potential-nfsd_file-refcount-leaks-in-.patch
+nfsd-fix-leaked-reference-count-of-nfsd4_ssc_umount_.patch
+nfsd-don-t-hand-out-delegation-on-setuid-files-being.patch
+nfsd-fix-problems-with-cleanup-on-errors-in-nfsd4_co.patch
+nfsd-fix-courtesy-client-with-deny-mode-handling-in-.patch
+nfsd-don-t-fsync-nfsd_files-on-last-close.patch
+nfsd-copy-the-whole-verifier-in-nfsd_copy_write_veri.patch
+nfsd-protect-against-filesystem-freezing.patch
+nfsd-don-t-replace-page-in-rq_pages-if-it-s-a-contin.patch
+nfsd-call-op_release-even-when-op_func-returns-an-er.patch
+nfsd-don-t-open-code-clear_and_wake_up_bit.patch
+nfsd-nfsd_file_key_inode-only-needs-to-find-gc-ed-en.patch
+nfsd-simplify-test_bit-return-in-nfsd_file_key_full-.patch
+nfsd-don-t-kill-nfsd_files-because-of-lease-break-er.patch
+nfsd-add-some-comments-to-nfsd_file_do_acquire.patch
+nfsd-don-t-take-put-an-extra-reference-when-putting-.patch
+nfsd-update-comment-over-__nfsd_file_cache_purge.patch
+nfsd-allow-reaping-files-still-under-writeback.patch
+nfsd-convert-filecache-to-rhltable.patch
+nfsd-simplify-the-delayed-disposal-list-code.patch
+nfsd-fix-problem-of-commit-and-nfs4err_delay-in-infi.patch
+nfsd-make-a-copy-of-struct-iattr-before-calling-noti.patch
+nfsd-fix-double-fget-bug-in-__write_ports_addfd.patch
+lockd-drop-inappropriate-svc_get-from-locked_get.patch
+nfsd-add-an-nfsd4_encode_nfstime4-helper.patch
+nfsd-fix-creation-time-serialization-order.patch
+nfsd-don-t-allow-nfsd-threads-to-be-signalled.patch
+nfsd-simplify-code-around-svc_exit_thread-call-in-nf.patch
+nfsd-separate-nfsd_last_thread-from-nfsd_put.patch
+documentation-add-missing-documentation-for-export_o.patch
+nfsd-fix-possible-oops-when-nfsd-pool_stats-is-close.patch
+lockd-introduce-safe-async-lock-op.patch
+nfsd-call-nfsd_last_thread-before-final-nfsd_put.patch
+nfsd-drop-the-nfsd_put-helper.patch
+nfsd-fix-release_lockowner.patch
+nfsd-don-t-take-fi_lock-in-nfsd_break_deleg_cb.patch
+nfsd-don-t-call-locks_release_private-twice-concurre.patch
+nfsd-fix-a-regression-in-nfsd_setattr.patch
--- /dev/null
+From 09497160e5a2af55cefe2a0903fff5ac803cc63a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: SUNRPC: always treat sv_nrpools==1 as "not pooled"
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 93aa619eb0b42eec2f3a9b4d9db41f5095390aec ]
+
+Currently 'pooled' services hold a reference on the pool_map, and
+'unpooled' services do not.
+svc_destroy() uses the presence of ->svo_function (via
+svc_serv_is_pooled()) to determine if the reference should be dropped.
+There is no direct correlation between being pooled and the use of
+svo_function, though in practice, lockd is the only non-pooled service,
+and the only one not to use svo_function.
+
+This is untidy and would cause problems if we changed lockd to use
+svc_set_num_threads(), which requires the use of ->svo_function.
+
+So change the test for "is the service pooled" to "is sv_nrpools > 1".
+
+This means that when svc_pool_map_get() returns 1, it must NOT take a
+reference to the pool.
+
+We discard svc_serv_is_pooled(), and test sv_nrpools directly.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ net/sunrpc/svc.c | 54 ++++++++++++++++++++++++++----------------------
+ 1 file changed, 29 insertions(+), 25 deletions(-)
+
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 8fbfea9f2a04c..fee7a22578b64 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -37,8 +37,6 @@
+
+ static void svc_unregister(const struct svc_serv *serv, struct net *net);
+
+-#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function)
+-
+ #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
+
+ /*
+@@ -240,8 +238,10 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
+
+ /*
+ * Add a reference to the global map of cpus to pools (and
+- * vice versa). Initialise the map if we're the first user.
+- * Returns the number of pools.
++ * vice versa) if pools are in use.
++ * Initialise the map if we're the first user.
++ * Returns the number of pools. If this is '1', no reference
++ * was taken.
+ */
+ static unsigned int
+ svc_pool_map_get(void)
+@@ -253,6 +253,7 @@ svc_pool_map_get(void)
+
+ if (m->count++) {
+ mutex_unlock(&svc_pool_map_mutex);
++ WARN_ON_ONCE(m->npools <= 1);
+ return m->npools;
+ }
+
+@@ -268,29 +269,36 @@ svc_pool_map_get(void)
+ break;
+ }
+
+- if (npools < 0) {
++ if (npools <= 0) {
+ /* default, or memory allocation failure */
+ npools = 1;
+ m->mode = SVC_POOL_GLOBAL;
+ }
+ m->npools = npools;
+
++ if (npools == 1)
++ /* service is unpooled, so doesn't hold a reference */
++ m->count--;
++
+ mutex_unlock(&svc_pool_map_mutex);
+- return m->npools;
++ return npools;
+ }
+
+ /*
+- * Drop a reference to the global map of cpus to pools.
++ * Drop a reference to the global map of cpus to pools, if
++ * pools were in use, i.e. if npools > 1.
+ * When the last reference is dropped, the map data is
+ * freed; this allows the sysadmin to change the pool
+ * mode using the pool_mode module option without
+ * rebooting or re-loading sunrpc.ko.
+ */
+ static void
+-svc_pool_map_put(void)
++svc_pool_map_put(int npools)
+ {
+ struct svc_pool_map *m = &svc_pool_map;
+
++ if (npools <= 1)
++ return;
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (!--m->count) {
+@@ -359,21 +367,18 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
+ struct svc_pool_map *m = &svc_pool_map;
+ unsigned int pidx = 0;
+
+- /*
+- * An uninitialised map happens in a pure client when
+- * lockd is brought up, so silently treat it the
+- * same as SVC_POOL_GLOBAL.
+- */
+- if (svc_serv_is_pooled(serv)) {
+- switch (m->mode) {
+- case SVC_POOL_PERCPU:
+- pidx = m->to_pool[cpu];
+- break;
+- case SVC_POOL_PERNODE:
+- pidx = m->to_pool[cpu_to_node(cpu)];
+- break;
+- }
++ if (serv->sv_nrpools <= 1)
++ return serv->sv_pools;
++
++ switch (m->mode) {
++ case SVC_POOL_PERCPU:
++ pidx = m->to_pool[cpu];
++ break;
++ case SVC_POOL_PERNODE:
++ pidx = m->to_pool[cpu_to_node(cpu)];
++ break;
+ }
++
+ return &serv->sv_pools[pidx % serv->sv_nrpools];
+ }
+
+@@ -526,7 +531,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
+ goto out_err;
+ return serv;
+ out_err:
+- svc_pool_map_put();
++ svc_pool_map_put(npools);
+ return NULL;
+ }
+ EXPORT_SYMBOL_GPL(svc_create_pooled);
+@@ -561,8 +566,7 @@ svc_destroy(struct kref *ref)
+
+ cache_clean_deferred(serv);
+
+- if (svc_serv_is_pooled(serv))
+- svc_pool_map_put();
++ svc_pool_map_put(serv->sv_nrpools);
+
+ kfree(serv->sv_pools);
+ kfree(serv);
+--
+2.43.0
+
--- /dev/null
+From b03dc17d85e7622d0e601267e574443f97b8ea93 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Oct 2021 11:57:28 -0400
+Subject: SUNRPC: Change return value type of .pc_decode
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c44b31c263798ec34614dd394c31ef1a2e7e716e ]
+
+Returning an undecorated integer is an age-old trope, but it's
+not clear (even to previous experts in this code) that the only
+valid return values are 1 and 0. These functions do not return
+a negative errno, rpc_stat value, or a positive length.
+
+Document there are only two valid return values by having
+.pc_decode return only true or false.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/xdr.c | 96 +++++++++++++++---------------
+ fs/lockd/xdr4.c | 97 +++++++++++++++---------------
+ fs/nfsd/nfs2acl.c | 30 +++++-----
+ fs/nfsd/nfs3acl.c | 22 +++----
+ fs/nfsd/nfs3xdr.c | 118 ++++++++++++++++++-------------------
+ fs/nfsd/nfs4xdr.c | 22 +++----
+ fs/nfsd/nfsd.h | 2 +-
+ fs/nfsd/nfssvc.c | 6 +-
+ fs/nfsd/nfsxdr.c | 62 +++++++++----------
+ fs/nfsd/xdr.h | 20 +++----
+ fs/nfsd/xdr3.h | 30 +++++-----
+ fs/nfsd/xdr4.h | 2 +-
+ include/linux/lockd/xdr.h | 18 +++---
+ include/linux/lockd/xdr4.h | 18 +++---
+ include/linux/sunrpc/svc.h | 2 +-
+ 15 files changed, 273 insertions(+), 272 deletions(-)
+
+diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
+index 895f152221048..622c2ca37dbfd 100644
+--- a/fs/lockd/xdr.c
++++ b/fs/lockd/xdr.c
+@@ -145,103 +145,103 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
+ * Decode Call arguments
+ */
+
+-int
++bool
+ nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ if (exclusive)
+ argp->lock.fl.fl_type = F_WRLCK;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ if (exclusive)
+ argp->lock.fl.fl_type = F_WRLCK;
+ if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
+- return 0;
++ return false;
+ argp->monitor = 1; /* monitor client by default */
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ if (exclusive)
+ argp->lock.fl.fl_type = F_WRLCK;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ argp->lock.fl.fl_type = F_UNLCK;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &resp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_decode_stats(xdr, &resp->status))
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_reboot *argp = rqstp->rq_argp;
+@@ -249,25 +249,25 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ u32 len;
+
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+- return 0;
++ return false;
+ if (len > SM_MAXSTRLEN)
+- return 0;
++ return false;
+ p = xdr_inline_decode(xdr, len);
+ if (!p)
+- return 0;
++ return false;
+ argp->len = len;
+ argp->mon = (char *)p;
+ if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
+- return 0;
++ return false;
+ p = xdr_inline_decode(xdr, SM_PRIV_SIZE);
+ if (!p)
+- return 0;
++ return false;
+ memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+@@ -278,34 +278,34 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ lock->svid = ~(u32)0;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
+- return 0;
++ return false;
+ if (!svcxdr_decode_fhandle(xdr, &lock->fh))
+- return 0;
++ return false;
+ if (!svcxdr_decode_owner(xdr, &lock->oh))
+- return 0;
++ return false;
+ /* XXX: Range checks are missing in the original code */
+ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_lock *lock = &argp->lock;
+
+ if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+
+diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
+index 5e6885d1b92de..11d93e9de85b9 100644
+--- a/fs/lockd/xdr4.c
++++ b/fs/lockd/xdr4.c
+@@ -140,102 +140,103 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
+ * Decode Call arguments
+ */
+
+-int
++bool
+ nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ if (exclusive)
+ argp->lock.fl.fl_type = F_WRLCK;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ if (exclusive)
+ argp->lock.fl.fl_type = F_WRLCK;
+ if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
+- return 0;
++ return false;
+ argp->monitor = 1; /* monitor client by default */
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ if (exclusive)
+ argp->lock.fl.fl_type = F_WRLCK;
+- return 1;
++
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_decode_lock(xdr, &argp->lock))
+- return 0;
++ return false;
+ argp->lock.fl.fl_type = F_UNLCK;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &resp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_decode_stats(xdr, &resp->status))
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_reboot *argp = rqstp->rq_argp;
+@@ -243,25 +244,25 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ u32 len;
+
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+- return 0;
++ return false;
+ if (len > SM_MAXSTRLEN)
+- return 0;
++ return false;
+ p = xdr_inline_decode(xdr, len);
+ if (!p)
+- return 0;
++ return false;
+ argp->len = len;
+ argp->mon = (char *)p;
+ if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
+- return 0;
++ return false;
+ p = xdr_inline_decode(xdr, SM_PRIV_SIZE);
+ if (!p)
+- return 0;
++ return false;
+ memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+@@ -272,34 +273,34 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ lock->svid = ~(u32)0;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
+- return 0;
++ return false;
+ if (!svcxdr_decode_fhandle(xdr, &lock->fh))
+- return 0;
++ return false;
+ if (!svcxdr_decode_owner(xdr, &lock->oh))
+- return 0;
++ return false;
+ /* XXX: Range checks are missing in the original code */
+ if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_lock *lock = &argp->lock;
+
+ if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index 9bd0899455903..7b1df500e8f41 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -188,51 +188,51 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
+ * XDR decode functions
+ */
+
+-static int
++static bool
+ nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &argp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-static int
++static bool
+ nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &argp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
+- return 0;
++ return false;
+ if (argp->mask & ~NFS_ACL_MASK)
+- return 0;
++ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
+ &argp->acl_access : NULL))
+- return 0;
++ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
+ &argp->acl_default : NULL))
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-static int
++static bool
+ nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_accessargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->access) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+ /*
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index b1e352ed2436e..9e9f6afb2e00b 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -127,38 +127,38 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
+ * XDR decode functions
+ */
+
+-static int
++static bool
+ nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_getaclargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->mask) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-static int
++static bool
+ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
+- return 0;
++ return false;
+ if (argp->mask & ~NFS_ACL_MASK)
+- return 0;
++ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
+ &argp->acl_access : NULL))
+- return 0;
++ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
+ &argp->acl_default : NULL))
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+ /*
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index b4a36989b3e24..a1395049db9f8 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -546,7 +546,7 @@ void fill_post_wcc(struct svc_fh *fhp)
+ * XDR decode functions
+ */
+
+-int
++bool
+ nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_fhandle *args = rqstp->rq_argp;
+@@ -554,7 +554,7 @@ nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ return svcxdr_decode_nfs_fh3(xdr, &args->fh);
+ }
+
+-int
++bool
+ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_sattrargs *args = rqstp->rq_argp;
+@@ -564,7 +564,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_decode_sattrguard3(xdr, args);
+ }
+
+-int
++bool
+ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_diropargs *args = rqstp->rq_argp;
+@@ -572,75 +572,75 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len);
+ }
+
+-int
++bool
+ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_accessargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->access) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_readargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_writeargs *args = rqstp->rq_argp;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->stable) < 0)
+- return 0;
++ return false;
+
+ /* opaque data */
+ if (xdr_stream_decode_u32(xdr, &args->len) < 0)
+- return 0;
++ return false;
+
+ /* request sanity */
+ if (args->count != args->len)
+- return 0;
++ return false;
+ if (args->count > max_blocksize) {
+ args->count = max_blocksize;
+ args->len = max_blocksize;
+ }
+ if (!xdr_stream_subsegment(xdr, &args->payload, args->count))
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_createargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->createmode) < 0)
+- return 0;
++ return false;
+ switch (args->createmode) {
+ case NFS3_CREATE_UNCHECKED:
+ case NFS3_CREATE_GUARDED:
+@@ -648,15 +648,15 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ case NFS3_CREATE_EXCLUSIVE:
+ args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE);
+ if (!args->verf)
+- return 0;
++ return false;
+ break;
+ default:
+- return 0;
++ return false;
+ }
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_createargs *args = rqstp->rq_argp;
+@@ -666,7 +666,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
+ }
+
+-int
++bool
+ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_symlinkargs *args = rqstp->rq_argp;
+@@ -675,33 +675,33 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ size_t remaining;
+
+ if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen))
+- return 0;
++ return false;
+ if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
+- return 0;
++ return false;
+
+ /* request sanity */
+ remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
+ remaining -= xdr_stream_pos(xdr);
+ if (remaining < xdr_align_size(args->tlen))
+- return 0;
++ return false;
+
+ args->first.iov_base = xdr->p;
+ args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_mknodargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->ftype) < 0)
+- return 0;
++ return false;
+ switch (args->ftype) {
+ case NF3CHR:
+ case NF3BLK:
+@@ -715,13 +715,13 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ /* Valid XDR but illegal file types */
+ break;
+ default:
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_renameargs *args = rqstp->rq_argp;
+@@ -732,7 +732,7 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ &args->tname, &args->tlen);
+ }
+
+-int
++bool
+ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_linkargs *args = rqstp->rq_argp;
+@@ -742,59 +742,59 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ &args->tname, &args->tlen);
+ }
+
+-int
++bool
+ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_readdirargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
+- return 0;
++ return false;
+ args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
+ if (!args->verf)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_readdirargs *args = rqstp->rq_argp;
+ u32 dircount;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
+- return 0;
++ return false;
+ args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
+ if (!args->verf)
+- return 0;
++ return false;
+ /* dircount is ignored */
+ if (xdr_stream_decode_u32(xdr, &dircount) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_commitargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+ /*
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index ec052e88d9008..9fcaf5f93f75d 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2319,7 +2319,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
+ return true;
+ }
+
+-static int
++static bool
+ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ {
+ struct nfsd4_op *op;
+@@ -2332,25 +2332,25 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ int i;
+
+ if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0)
+- return 0;
++ return false;
+ max_reply += XDR_UNIT;
+ argp->tag = NULL;
+ if (unlikely(argp->taglen)) {
+ if (argp->taglen > NFSD4_MAX_TAGLEN)
+- return 0;
++ return false;
+ p = xdr_inline_decode(argp->xdr, argp->taglen);
+ if (!p)
+- return 0;
++ return false;
+ argp->tag = svcxdr_savemem(argp, p, argp->taglen);
+ if (!argp->tag)
+- return 0;
++ return false;
+ max_reply += xdr_align_size(argp->taglen);
+ }
+
+ if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
+- return 0;
++ return false;
+
+ argp->opcnt = min_t(u32, argp->client_opcnt,
+ NFSD_MAX_OPS_PER_COMPOUND);
+@@ -2360,7 +2360,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ if (!argp->ops) {
+ argp->ops = argp->iops;
+ dprintk("nfsd: couldn't allocate room for COMPOUND\n");
+- return 0;
++ return false;
+ }
+ }
+
+@@ -2373,7 +2373,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ op->opdesc = NULL;
+
+ if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
+- return 0;
++ return false;
+ if (nfsd4_opnum_in_range(argp, op)) {
+ op->opdesc = OPDESC(op);
+ op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
+@@ -2421,7 +2421,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
+ clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+
+- return 1;
++ return true;
+ }
+
+ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+@@ -5412,7 +5412,7 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
+ }
+ }
+
+-int
++bool
+ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 6e8ad5f9757c8..bfcddd4c75345 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -78,7 +78,7 @@ extern const struct seq_operations nfs_exports_op;
+ */
+ struct nfsd_voidargs { };
+ struct nfsd_voidres { };
+-int nfssvc_decode_voidarg(struct svc_rqst *rqstp,
++bool nfssvc_decode_voidarg(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+ int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index be1d656548cfe..00aadc2635032 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1067,10 +1067,10 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ * @xdr: XDR stream positioned at arguments to decode
+ *
+ * Return values:
+- * %0: Arguments were not valid
+- * %1: Decoding was successful
++ * %false: Arguments were not valid
++ * %true: Decoding was successful
+ */
+-int nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
++bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ return 1;
+ }
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index 08e899180ee43..b5817a41b3de6 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -272,7 +272,7 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ * XDR decode functions
+ */
+
+-int
++bool
+ nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_fhandle *args = rqstp->rq_argp;
+@@ -280,7 +280,7 @@ nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ return svcxdr_decode_fhandle(xdr, &args->fh);
+ }
+
+-int
++bool
+ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_sattrargs *args = rqstp->rq_argp;
+@@ -289,7 +289,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
+ }
+
+-int
++bool
+ nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_diropargs *args = rqstp->rq_argp;
+@@ -297,54 +297,54 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len);
+ }
+
+-int
++bool
+ nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_readargs *args = rqstp->rq_argp;
+ u32 totalcount;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+- return 0;
++ return false;
+ /* totalcount is ignored */
+ if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_writeargs *args = rqstp->rq_argp;
+ u32 beginoffset, totalcount;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+- return 0;
++ return false;
+ /* beginoffset is ignored */
+ if (xdr_stream_decode_u32(xdr, &beginoffset) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
+- return 0;
++ return false;
+ /* totalcount is ignored */
+ if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
+- return 0;
++ return false;
+
+ /* opaque data */
+ if (xdr_stream_decode_u32(xdr, &args->len) < 0)
+- return 0;
++ return false;
+ if (args->len > NFSSVC_MAXBLKSIZE_V2)
+- return 0;
++ return false;
+ if (!xdr_stream_subsegment(xdr, &args->payload, args->len))
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_createargs *args = rqstp->rq_argp;
+@@ -354,7 +354,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
+ }
+
+-int
++bool
+ nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_renameargs *args = rqstp->rq_argp;
+@@ -365,7 +365,7 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ &args->tname, &args->tlen);
+ }
+
+-int
++bool
+ nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_linkargs *args = rqstp->rq_argp;
+@@ -375,39 +375,39 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ &args->tname, &args->tlen);
+ }
+
+-int
++bool
+ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_symlinkargs *args = rqstp->rq_argp;
+ struct kvec *head = rqstp->rq_arg.head;
+
+ if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
+- return 0;
++ return false;
+ if (args->tlen == 0)
+- return 0;
++ return false;
+
+ args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
+ args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
+ if (!args->first.iov_base)
+- return 0;
++ return false;
+ return svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
+ }
+
+-int
++bool
+ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_readdirargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->cookie) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+
+ /*
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index 19e281382bb98..d897c198c9126 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -141,16 +141,16 @@ union nfsd_xdrstore {
+ #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
+
+
+-int nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ int nfssvc_encode_statres(struct svc_rqst *, __be32 *);
+ int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *);
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index 60a8909205e5a..ef72bc4868da6 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -265,21 +265,21 @@ union nfsd3_xdrstore {
+
+ #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
+
+-int nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *);
+ int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 4c22eecd65de0..50242d8cd09e8 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -757,7 +757,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+
+
+ bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
+-int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
+ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
+ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
+index 931bd0b064e6f..a3d0bc4fd2109 100644
+--- a/include/linux/lockd/xdr.h
++++ b/include/linux/lockd/xdr.h
+@@ -98,15 +98,15 @@ struct nlm_reboot {
+ */
+ #define NLMSVC_XDRSIZE sizeof(struct nlm_args)
+
+-int nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ int nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
+ int nlmsvc_encode_res(struct svc_rqst *, __be32 *);
+diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
+index 44c9d03d261b9..6eec19629cd69 100644
+--- a/include/linux/lockd/xdr4.h
++++ b/include/linux/lockd/xdr4.h
+@@ -23,15 +23,15 @@
+ #define nlm4_failed cpu_to_be32(NLM_FAILED)
+
+ void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
+-int nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ int nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
+ int nlm4svc_encode_res(struct svc_rqst *, __be32 *);
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index f74ac0fdd5f32..2bb68625bc76c 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -456,7 +456,7 @@ struct svc_procedure {
+ /* process the request: */
+ __be32 (*pc_func)(struct svc_rqst *);
+ /* XDR decode args: */
+- int (*pc_decode)(struct svc_rqst *rqstp,
++ bool (*pc_decode)(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+ /* XDR encode result: */
+ int (*pc_encode)(struct svc_rqst *, __be32 *data);
+--
+2.43.0
+
--- /dev/null
+From 3854b079a99b4992184d2ed7f044b2c03e4178d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Oct 2021 10:41:13 -0400
+Subject: SUNRPC: Change return value type of .pc_encode
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 130e2054d4a652a2bd79fb1557ddcd19c053cb37 ]
+
+Returning an undecorated integer is an age-old trope, but it's
+not clear (even to previous experts in this code) that the only
+valid return values are 1 and 0. These functions do not return
+a negative errno, rpc_stat value, or a positive length.
+
+Document there are only two valid return values by having
+.pc_encode return only true or false.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/xdr.c | 18 ++--
+ fs/lockd/xdr4.c | 18 ++--
+ fs/nfs/callback_xdr.c | 4 +-
+ fs/nfsd/nfs2acl.c | 4 +-
+ fs/nfsd/nfs3acl.c | 18 ++--
+ fs/nfsd/nfs3xdr.c | 166 ++++++++++++++++++-------------------
+ fs/nfsd/nfs4xdr.c | 4 +-
+ fs/nfsd/nfsd.h | 2 +-
+ fs/nfsd/nfssvc.c | 8 +-
+ fs/nfsd/nfsxdr.c | 60 +++++++-------
+ fs/nfsd/xdr.h | 14 ++--
+ fs/nfsd/xdr3.h | 30 +++----
+ fs/nfsd/xdr4.h | 2 +-
+ include/linux/lockd/xdr.h | 8 +-
+ include/linux/lockd/xdr4.h | 8 +-
+ include/linux/sunrpc/svc.h | 2 +-
+ 16 files changed, 183 insertions(+), 183 deletions(-)
+
+diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
+index 2595b4d14cd44..2fb5748dae0c8 100644
+--- a/fs/lockd/xdr.c
++++ b/fs/lockd/xdr.c
+@@ -313,13 +313,13 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ * Encode Reply results
+ */
+
+-int
++bool
+ nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_resp;
+@@ -328,7 +328,7 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_encode_testrply(xdr, resp);
+ }
+
+-int
++bool
+ nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_resp;
+@@ -337,18 +337,18 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_encode_stats(xdr, resp->status);
+ }
+
+-int
++bool
+ nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_cookie(xdr, &resp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_encode_stats(xdr, resp->status))
+- return 0;
++ return false;
+ /* sequence */
+ if (xdr_stream_encode_u32(xdr, 0) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
+index 4c04b1e2bd9d8..5fcbf30cd2759 100644
+--- a/fs/lockd/xdr4.c
++++ b/fs/lockd/xdr4.c
+@@ -308,13 +308,13 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ * Encode Reply results
+ */
+
+-int
++bool
+ nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_resp;
+@@ -323,7 +323,7 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_encode_testrply(xdr, resp);
+ }
+
+-int
++bool
+ nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_resp;
+@@ -332,18 +332,18 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ svcxdr_encode_stats(xdr, resp->status);
+ }
+
+-int
++bool
+ nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_cookie(xdr, &resp->cookie))
+- return 0;
++ return false;
+ if (!svcxdr_encode_stats(xdr, resp->status))
+- return 0;
++ return false;
+ /* sequence */
+ if (xdr_stream_encode_u32(xdr, 0) < 0)
+- return 0;
++ return false;
+
+- return 1;
++ return true;
+ }
+diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
+index 688d58c036de7..8dcb08e1a885d 100644
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
+ * svc_process_common() looks for an XDR encoder to know when
+ * not to drop a Reply.
+ */
+-static int nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
++static bool nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return 1;
++ return true;
+ }
+
+ static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index cbd042fbe0f39..efcd429b0f28e 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -240,7 +240,7 @@ nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ /* GETACL */
+-static int
++static bool
+ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_getaclres *resp = rqstp->rq_resp;
+@@ -270,7 +270,7 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ }
+
+ /* ACCESS */
+-static int
++static bool
+ nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_accessres *resp = rqstp->rq_resp;
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index e186467b63ecb..35b2ebda14dac 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -166,7 +166,7 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ /* GETACL */
+-static int
++static bool
+ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_getaclres *resp = rqstp->rq_resp;
+@@ -178,14 +178,14 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ int w;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ inode = d_inode(dentry);
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
+- return 0;
++ return false;
+
+ base = (char *)xdr->p - (char *)head->iov_base;
+
+@@ -194,7 +194,7 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
+ while (w > 0) {
+ if (!*(rqstp->rq_next_page++))
+- return 0;
++ return false;
+ w -= PAGE_SIZE;
+ }
+
+@@ -207,18 +207,18 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ resp->mask & NFS_DFACL,
+ NFS_ACL_DEFAULT);
+ if (n <= 0)
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* SETACL */
+-static int
++static bool
+ nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_attrstat *resp = rqstp->rq_resp;
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index dd87076a8b0d7..48e8a02ebc83b 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -802,26 +802,26 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ /* GETATTR */
+-int
++bool
+ nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime);
+ if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat))
+- return 0;
++ return false;
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* SETATTR, REMOVE, RMDIR */
+-int
++bool
+ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_attrstat *resp = rqstp->rq_resp;
+@@ -831,166 +831,166 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ }
+
+ /* LOOKUP */
+-int
++bool
+ nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* ACCESS */
+-int
++bool
+ nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->access) < 0)
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* READLINK */
+-int
++bool
+ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->len) < 0)
+- return 0;
++ return false;
+ xdr_write_pages(xdr, resp->pages, 0, resp->len);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* READ */
+-int
++bool
+ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_readres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_encode_bool(xdr, resp->eof) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+- return 0;
++ return false;
+ xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
+ resp->count);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* WRITE */
+-int
++bool
+ nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_writeres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->committed) < 0)
+- return 0;
++ return false;
+ if (!svcxdr_encode_writeverf3(xdr, resp->verf))
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* CREATE, MKDIR, SYMLINK, MKNOD */
+-int
++bool
+ nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* RENAME */
+-int
++bool
+ nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_renameres *resp = rqstp->rq_resp;
+@@ -1001,7 +1001,7 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ }
+
+ /* LINK */
+-int
++bool
+ nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_linkres *resp = rqstp->rq_resp;
+@@ -1012,33 +1012,33 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ }
+
+ /* READDIR */
+-int
++bool
+ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_readdirres *resp = rqstp->rq_resp;
+ struct xdr_buf *dirlist = &resp->dirlist;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
+- return 0;
++ return false;
+ xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+ /* no more entries */
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ static __be32
+@@ -1265,26 +1265,26 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr,
+ }
+
+ /* FSSTAT */
+-int
++bool
+ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_fsstatres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_fsstat3resok(xdr, resp))
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ static bool
+@@ -1311,26 +1311,26 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr,
+ }
+
+ /* FSINFO */
+-int
++bool
+ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_fsinfores *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_fsinfo3resok(xdr, resp))
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ static bool
+@@ -1353,49 +1353,49 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr,
+ }
+
+ /* PATHCONF */
+-int
++bool
+ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_pathconf3resok(xdr, resp))
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /* COMMIT */
+-int
++bool
+ nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd3_commitres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_writeverf3(xdr, resp->verf))
+- return 0;
++ return false;
+ break;
+ default:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+- return 0;
++ return false;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /*
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index cc2367a6922a6..1483cd1b5eed7 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5427,7 +5427,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ return nfsd4_decode_compound(args);
+ }
+
+-int
++bool
+ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+@@ -5453,5 +5453,5 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ *p++ = htonl(resp->opcnt);
+
+ nfsd4_sequence_done(resp);
+- return 1;
++ return true;
+ }
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 345f8247d5da9..498e5a4898260 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -80,7 +80,7 @@ struct nfsd_voidargs { };
+ struct nfsd_voidres { };
+ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+-int nfssvc_encode_voidres(struct svc_rqst *rqstp,
++bool nfssvc_encode_voidres(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+
+ /*
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 195f2bcc65384..7df1505425edc 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1078,12 +1078,12 @@ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ * @xdr: XDR stream into which to encode results
+ *
+ * Return values:
+- * %0: Local error while encoding
+- * %1: Encoding was successful
++ * %false: Local error while encoding
++ * %true: Encoding was successful
+ */
+-int nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
++bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return 1;
++ return true;
+ }
+
+ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index 6aa8138ae2f7d..aba8520b4b8b6 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -414,7 +414,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ * XDR encode functions
+ */
+
+-int
++bool
+ nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_stat *resp = rqstp->rq_resp;
+@@ -422,110 +422,110 @@ nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ return svcxdr_encode_stat(xdr, resp->status);
+ }
+
+-int
++bool
+ nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_attrstat *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+- return 0;
++ return false;
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_diropres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fhandle(xdr, &resp->fh))
+- return 0;
++ return false;
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+- return 0;
++ return false;
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_readlinkres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (xdr_stream_encode_u32(xdr, resp->len) < 0)
+- return 0;
++ return false;
+ xdr_write_pages(xdr, &resp->page, 0, resp->len);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
+- return 0;
++ return false;
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_readres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+- return 0;
++ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+- return 0;
++ return false;
+ xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
+ resp->count);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
+- return 0;
++ return false;
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_readdirres *resp = rqstp->rq_resp;
+ struct xdr_buf *dirlist = &resp->dirlist;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+ /* no more entries */
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+- return 0;
++ return false;
+ if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
+- return 0;
++ return false;
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+-int
++bool
+ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd_statfsres *resp = rqstp->rq_resp;
+@@ -533,12 +533,12 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ __be32 *p;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+- return 0;
++ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ p = xdr_reserve_space(xdr, XDR_UNIT * 5);
+ if (!p)
+- return 0;
++ return false;
+ *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2);
+ *p++ = cpu_to_be32(stat->f_bsize);
+ *p++ = cpu_to_be32(stat->f_blocks);
+@@ -547,7 +547,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ break;
+ }
+
+- return 1;
++ return true;
+ }
+
+ /**
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index bff7258041fc4..852f71580bd06 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -152,13 +152,13 @@ bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset);
+ int nfssvc_encode_entry(void *data, const char *name, int namlen,
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index bb017fc7cba19..03fe4e21306cb 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -281,21 +281,21 @@ bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ void nfs3svc_release_fhandle(struct svc_rqst *);
+ void nfs3svc_release_fhandle2(struct svc_rqst *);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 9921915b4c163..4f1090c32c29b 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -759,7 +759,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+
+ bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
+ bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
+ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+ void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
+diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
+index 94f1ca900ca3a..67e4a2c5500bd 100644
+--- a/include/linux/lockd/xdr.h
++++ b/include/linux/lockd/xdr.h
+@@ -108,9 +108,9 @@ bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ #endif /* LOCKD_XDR_H */
+diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
+index ee44d7357a7f7..72831e35dca32 100644
+--- a/include/linux/lockd/xdr4.h
++++ b/include/linux/lockd/xdr4.h
+@@ -33,10 +33,10 @@ bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++bool nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ extern const struct rpc_version nlm_version4;
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index dc6fc8940261f..4813cc5613f27 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -459,7 +459,7 @@ struct svc_procedure {
+ bool (*pc_decode)(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+ /* XDR encode result: */
+- int (*pc_encode)(struct svc_rqst *rqstp,
++ bool (*pc_encode)(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+ /* XDR free result: */
+ void (*pc_release)(struct svc_rqst *);
+--
+2.43.0
+
--- /dev/null
+From 70a8338a69360790dace367439171fa8efb2c7dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: SUNRPC: change svc_get() to return the svc.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit df5e49c880ea0776806b8a9f8ab95e035272cf6f ]
+
+It is common for 'get' functions to return the object that was 'got',
+and there are a couple of places where users of svc_get() would be a
+little simpler if svc_get() did that.
+
+Make it so.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 6 ++----
+ fs/nfs/callback.c | 6 ++----
+ include/linux/sunrpc/svc.h | 3 ++-
+ 3 files changed, 6 insertions(+), 9 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index b220e1b917268..2f50d5b2a8a42 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -430,14 +430,12 @@ static struct svc_serv *lockd_create_svc(void)
+ /*
+ * Check whether we're already up and running.
+ */
+- if (nlmsvc_rqst) {
++ if (nlmsvc_rqst)
+ /*
+ * Note: increase service usage, because later in case of error
+ * svc_destroy() will be called.
+ */
+- svc_get(nlmsvc_rqst->rq_server);
+- return nlmsvc_rqst->rq_server;
+- }
++ return svc_get(nlmsvc_rqst->rq_server);
+
+ /*
+ * Sanity check: if there's no pid,
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 3c86a559a321a..674198e0eb5e1 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -266,14 +266,12 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
+ /*
+ * Check whether we're already up and running.
+ */
+- if (cb_info->serv) {
++ if (cb_info->serv)
+ /*
+ * Note: increase service usage, because later in case of error
+ * svc_destroy() will be called.
+ */
+- svc_get(cb_info->serv);
+- return cb_info->serv;
+- }
++ return svc_get(cb_info->serv);
+
+ switch (minorversion) {
+ case 0:
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 4813cc5613f27..80d44df8663db 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -120,9 +120,10 @@ struct svc_serv {
+ * change the number of threads. Horrible, but there it is.
+ * Should be called with the "service mutex" held.
+ */
+-static inline void svc_get(struct svc_serv *serv)
++static inline struct svc_serv *svc_get(struct svc_serv *serv)
+ {
+ serv->sv_nrthreads++;
++ return serv;
+ }
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From 99e231c95cc5679640b4edb0f7c4ab166b527b84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: SUNRPC: discard svo_setup and rename svc_set_num_threads_sync()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 3ebdbe5203a874614819700d3f470724cb803709 ]
+
+The ->svo_setup callback serves no purpose. It is always called from
+within the same module that chooses which callback is needed. So
+discard it and call the relevant function directly.
+
+Now that svc_set_num_threads() is no longer used remove it and rename
+svc_set_num_threads_sync() to remove the "_sync" suffix.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/callback.c | 8 +++----
+ fs/nfsd/nfssvc.c | 11 ++++-----
+ include/linux/sunrpc/svc.h | 4 ----
+ net/sunrpc/svc.c | 49 ++------------------------------------
+ 4 files changed, 10 insertions(+), 62 deletions(-)
+
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 09ec60b99f65e..422055a1092f0 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -172,9 +172,9 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
+ if (serv->sv_nrthreads == nrservs)
+ return 0;
+
+- ret = serv->sv_ops->svo_setup(serv, NULL, nrservs);
++ ret = svc_set_num_threads(serv, NULL, nrservs);
+ if (ret) {
+- serv->sv_ops->svo_setup(serv, NULL, 0);
++ svc_set_num_threads(serv, NULL, 0);
+ return ret;
+ }
+ dprintk("nfs_callback_up: service started\n");
+@@ -235,14 +235,12 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+ static const struct svc_serv_ops nfs40_cb_sv_ops = {
+ .svo_function = nfs4_callback_svc,
+ .svo_enqueue_xprt = svc_xprt_do_enqueue,
+- .svo_setup = svc_set_num_threads_sync,
+ .svo_module = THIS_MODULE,
+ };
+ #if defined(CONFIG_NFS_V4_1)
+ static const struct svc_serv_ops nfs41_cb_sv_ops = {
+ .svo_function = nfs41_callback_svc,
+ .svo_enqueue_xprt = svc_xprt_do_enqueue,
+- .svo_setup = svc_set_num_threads_sync,
+ .svo_module = THIS_MODULE,
+ };
+
+@@ -357,7 +355,7 @@ void nfs_callback_down(int minorversion, struct net *net)
+ cb_info->users--;
+ if (cb_info->users == 0) {
+ svc_get(serv);
+- serv->sv_ops->svo_setup(serv, NULL, 0);
++ svc_set_num_threads(serv, NULL, 0);
+ svc_put(serv);
+ dprintk("nfs_callback_down: service destroyed\n");
+ cb_info->serv = NULL;
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 6b10415e4006b..8d49dfbe03f85 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -593,7 +593,6 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = {
+ .svo_shutdown = nfsd_last_thread,
+ .svo_function = nfsd,
+ .svo_enqueue_xprt = svc_xprt_do_enqueue,
+- .svo_setup = svc_set_num_threads_sync,
+ .svo_module = THIS_MODULE,
+ };
+
+@@ -611,7 +610,7 @@ void nfsd_shutdown_threads(struct net *net)
+
+ svc_get(serv);
+ /* Kill outstanding nfsd threads */
+- serv->sv_ops->svo_setup(serv, NULL, 0);
++ svc_set_num_threads(serv, NULL, 0);
+ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+ }
+@@ -750,8 +749,9 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
+ /* apply the new numbers */
+ svc_get(nn->nfsd_serv);
+ for (i = 0; i < n; i++) {
+- err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
+- &nn->nfsd_serv->sv_pools[i], nthreads[i]);
++ err = svc_set_num_threads(nn->nfsd_serv,
++ &nn->nfsd_serv->sv_pools[i],
++ nthreads[i]);
+ if (err)
+ break;
+ }
+@@ -793,8 +793,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+ error = nfsd_startup_net(net, cred);
+ if (error)
+ goto out_put;
+- error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
+- NULL, nrservs);
++ error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
+ if (error)
+ goto out_shutdown;
+ error = nn->nfsd_serv->sv_nrthreads;
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index d22bd62093146..71b3a9e3fc4a8 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -64,9 +64,6 @@ struct svc_serv_ops {
+ /* queue up a transport for servicing */
+ void (*svo_enqueue_xprt)(struct svc_xprt *);
+
+- /* set up thread (or whatever) execution context */
+- int (*svo_setup)(struct svc_serv *, struct svc_pool *, int);
+-
+ /* optional module to count when adding threads (pooled svcs only) */
+ struct module *svo_module;
+ };
+@@ -543,7 +540,6 @@ void svc_pool_map_put(void);
+ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+ const struct svc_serv_ops *);
+ int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+-int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
+ void svc_shutdown_net(struct svc_serv *, struct net *);
+ int svc_process(struct svc_rqst *);
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index c8a0649e5cdf1..07443f7e2d870 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -743,58 +743,13 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ return 0;
+ }
+
+-
+-/* destroy old threads */
+-static int
+-svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+-{
+- struct task_struct *task;
+- unsigned int state = serv->sv_nrthreads-1;
+-
+- /* destroy old threads */
+- do {
+- task = choose_victim(serv, pool, &state);
+- if (task == NULL)
+- break;
+- send_sig(SIGINT, task, 1);
+- nrservs++;
+- } while (nrservs < 0);
+-
+- return 0;
+-}
+-
+ /*
+ * Create or destroy enough new threads to make the number
+ * of threads the given number. If `pool' is non-NULL, applies
+ * only to threads in that pool, otherwise round-robins between
+ * all pools. Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
+- *
+- * Destroying threads relies on the service threads filling in
+- * rqstp->rq_task, which only the nfs ones do. Assumes the serv
+- * has been created using svc_create_pooled().
+- *
+- * Based on code that used to be in nfsd_svc() but tweaked
+- * to be pool-aware.
+ */
+-int
+-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+-{
+- if (pool == NULL) {
+- nrservs -= serv->sv_nrthreads;
+- } else {
+- spin_lock_bh(&pool->sp_lock);
+- nrservs -= pool->sp_nrthreads;
+- spin_unlock_bh(&pool->sp_lock);
+- }
+-
+- if (nrservs > 0)
+- return svc_start_kthreads(serv, pool, nrservs);
+- if (nrservs < 0)
+- return svc_signal_kthreads(serv, pool, nrservs);
+- return 0;
+-}
+-EXPORT_SYMBOL_GPL(svc_set_num_threads);
+
+ /* destroy old threads */
+ static int
+@@ -819,7 +774,7 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ }
+
+ int
+-svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
++svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ {
+ if (pool == NULL) {
+ nrservs -= serv->sv_nrthreads;
+@@ -835,7 +790,7 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
+ return svc_stop_kthreads(serv, pool, nrservs);
+ return 0;
+ }
+-EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
++EXPORT_SYMBOL_GPL(svc_set_num_threads);
+
+ /**
+ * svc_rqst_replace_page - Replace one page in rq_pages[]
+--
+2.43.0
+
--- /dev/null
+From 437cf173f2a97852ca724371a2442a4d347e9ac9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jan 2022 17:57:23 -0500
+Subject: SUNRPC: Merge svc_do_enqueue_xprt() into svc_enqueue_xprt()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c0219c499799c1e92bd570c15a47e6257a27bb15 ]
+
+Neil says:
+"These functions were separated in commit 0971374e2818 ("SUNRPC:
+Reduce contention in svc_xprt_enqueue()") so that the XPT_BUSY check
+happened before taking any spinlocks.
+
+We have since moved or removed the spinlocks so the extra test is
+fairly pointless."
+
+I've made this a separate patch in case the XPT_BUSY change has
+unexpected consequences and needs to be reverted.
+
+Suggested-by: Neil Brown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ net/sunrpc/svc_xprt.c | 26 ++++++++++----------------
+ 1 file changed, 10 insertions(+), 16 deletions(-)
+
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 795dc8eb2e1d5..5a6d4ccb4a607 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -31,7 +31,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
+ static struct cache_deferred_req *svc_defer(struct cache_req *req);
+ static void svc_age_temp_xprts(struct timer_list *t);
+ static void svc_delete_xprt(struct svc_xprt *xprt);
+-static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
+
+ /* apparently the "standard" is that clients close
+ * idle connections after 5 minutes, servers after
+@@ -268,12 +267,12 @@ void svc_xprt_received(struct svc_xprt *xprt)
+ trace_svc_xprt_received(xprt);
+
+ /* As soon as we clear busy, the xprt could be closed and
+- * 'put', so we need a reference to call svc_xprt_do_enqueue with:
++ * 'put', so we need a reference to call svc_xprt_enqueue with:
+ */
+ svc_xprt_get(xprt);
+ smp_mb__before_atomic();
+ clear_bit(XPT_BUSY, &xprt->xpt_flags);
+- svc_xprt_do_enqueue(xprt);
++ svc_xprt_enqueue(xprt);
+ svc_xprt_put(xprt);
+ }
+ EXPORT_SYMBOL_GPL(svc_xprt_received);
+@@ -413,6 +412,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
+ smp_rmb();
+ xpt_flags = READ_ONCE(xprt->xpt_flags);
+
++ if (xpt_flags & BIT(XPT_BUSY))
++ return false;
+ if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
+ return true;
+ if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
+@@ -425,7 +426,12 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
+ return false;
+ }
+
+-static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
++/**
++ * svc_xprt_enqueue - Queue a transport on an idle nfsd thread
++ * @xprt: transport with data pending
++ *
++ */
++void svc_xprt_enqueue(struct svc_xprt *xprt)
+ {
+ struct svc_pool *pool;
+ struct svc_rqst *rqstp = NULL;
+@@ -469,18 +475,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+ put_cpu();
+ trace_svc_xprt_do_enqueue(xprt, rqstp);
+ }
+-
+-/*
+- * Queue up a transport with data pending. If there are idle nfsd
+- * processes, wake 'em up.
+- *
+- */
+-void svc_xprt_enqueue(struct svc_xprt *xprt)
+-{
+- if (test_bit(XPT_BUSY, &xprt->xpt_flags))
+- return;
+- svc_xprt_do_enqueue(xprt);
+-}
+ EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
+
+ /*
+--
+2.43.0
+
--- /dev/null
+From a77c6e23cf04915009c01786021902ab885d7ff6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: SUNRPC: move the pool_map definitions (back) into svc.c
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit cf0e124e0a489944d08fcc3c694d2b234d2cc658 ]
+
+These definitions are not used outside of svc.c, and there is no
+evidence that they ever have been. So move them into svc.c
+and make the declarations 'static'.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ include/linux/sunrpc/svc.h | 25 -------------------------
+ net/sunrpc/svc.c | 31 +++++++++++++++++++++++++------
+ 2 files changed, 25 insertions(+), 31 deletions(-)
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 71b3a9e3fc4a8..35bb1c4393400 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -496,29 +496,6 @@ struct svc_procedure {
+ const char * pc_name; /* for display */
+ };
+
+-/*
+- * Mode for mapping cpus to pools.
+- */
+-enum {
+- SVC_POOL_AUTO = -1, /* choose one of the others */
+- SVC_POOL_GLOBAL, /* no mapping, just a single global pool
+- * (legacy & UP mode) */
+- SVC_POOL_PERCPU, /* one pool per cpu */
+- SVC_POOL_PERNODE /* one pool per numa node */
+-};
+-
+-struct svc_pool_map {
+- int count; /* How many svc_servs use us */
+- int mode; /* Note: int not enum to avoid
+- * warnings about "enumeration value
+- * not handled in switch" */
+- unsigned int npools;
+- unsigned int *pool_to; /* maps pool id to cpu or node */
+- unsigned int *to_pool; /* maps cpu or node to pool id */
+-};
+-
+-extern struct svc_pool_map svc_pool_map;
+-
+ /*
+ * Function prototypes.
+ */
+@@ -535,8 +512,6 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp,
+ struct page *page);
+ void svc_rqst_free(struct svc_rqst *);
+ void svc_exit_thread(struct svc_rqst *);
+-unsigned int svc_pool_map_get(void);
+-void svc_pool_map_put(void);
+ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+ const struct svc_serv_ops *);
+ int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 07443f7e2d870..8fbfea9f2a04c 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -41,14 +41,35 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net);
+
+ #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
+
++/*
++ * Mode for mapping cpus to pools.
++ */
++enum {
++ SVC_POOL_AUTO = -1, /* choose one of the others */
++ SVC_POOL_GLOBAL, /* no mapping, just a single global pool
++ * (legacy & UP mode) */
++ SVC_POOL_PERCPU, /* one pool per cpu */
++ SVC_POOL_PERNODE /* one pool per numa node */
++};
++
+ /*
+ * Structure for mapping cpus to pools and vice versa.
+ * Setup once during sunrpc initialisation.
+ */
+-struct svc_pool_map svc_pool_map = {
++
++struct svc_pool_map {
++ int count; /* How many svc_servs use us */
++ int mode; /* Note: int not enum to avoid
++ * warnings about "enumeration value
++ * not handled in switch" */
++ unsigned int npools;
++ unsigned int *pool_to; /* maps pool id to cpu or node */
++ unsigned int *to_pool; /* maps cpu or node to pool id */
++};
++
++static struct svc_pool_map svc_pool_map = {
+ .mode = SVC_POOL_DEFAULT
+ };
+-EXPORT_SYMBOL_GPL(svc_pool_map);
+
+ static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
+
+@@ -222,7 +243,7 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
+ * vice versa). Initialise the map if we're the first user.
+ * Returns the number of pools.
+ */
+-unsigned int
++static unsigned int
+ svc_pool_map_get(void)
+ {
+ struct svc_pool_map *m = &svc_pool_map;
+@@ -257,7 +278,6 @@ svc_pool_map_get(void)
+ mutex_unlock(&svc_pool_map_mutex);
+ return m->npools;
+ }
+-EXPORT_SYMBOL_GPL(svc_pool_map_get);
+
+ /*
+ * Drop a reference to the global map of cpus to pools.
+@@ -266,7 +286,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map_get);
+ * mode using the pool_mode module option without
+ * rebooting or re-loading sunrpc.ko.
+ */
+-void
++static void
+ svc_pool_map_put(void)
+ {
+ struct svc_pool_map *m = &svc_pool_map;
+@@ -283,7 +303,6 @@ svc_pool_map_put(void)
+
+ mutex_unlock(&svc_pool_map_mutex);
+ }
+-EXPORT_SYMBOL_GPL(svc_pool_map_put);
+
+ static int svc_pool_map_get_node(unsigned int pidx)
+ {
+--
+2.43.0
+
--- /dev/null
+From 7ab64fad79c32767b95fbc6db9b636d16097ffd4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: SUNRPC/NFSD: clean up get/put functions.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 8c62d12740a1450d2e8456d5747f440e10db281a ]
+
+svc_destroy() is poorly named - it doesn't necessarily destroy the svc,
+it might just reduce the ref count.
+nfsd_destroy() is poorly named for the same reason.
+
+This patch:
+ - removes the refcount functionality from svc_destroy(), moving it to
+ a new svc_put(). Almost all previous callers of svc_destroy() now
+ call svc_put().
+ - renames nfsd_destroy() to nfsd_put() and improves the code, using
+ the new svc_destroy() rather than svc_put()
+ - removes a few comments that explain the important for balanced
+ get/put calls. This should be obvious.
+
+The only non-trivial part of this is that svc_destroy() would call
+svc_sock_update() on a non-final decrement. It can no longer do that,
+and svc_put() isn't really a good place of it. This call is now made
+from svc_exit_thread() which seems like a good place. This makes the
+call *before* sv_nrthreads is decremented rather than after. This
+is not particularly important as the call just sets a flag which
+causes sv_nrthreads set be checked later. A subsequent patch will
+improve the ordering.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 6 +-----
+ fs/nfs/callback.c | 14 ++------------
+ fs/nfsd/nfsctl.c | 4 ++--
+ fs/nfsd/nfsd.h | 2 +-
+ fs/nfsd/nfssvc.c | 30 ++++++++++++++++--------------
+ include/linux/sunrpc/svc.h | 26 +++++++++++++++++++++++---
+ net/sunrpc/svc.c | 19 +++++--------------
+ 7 files changed, 50 insertions(+), 51 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 2f50d5b2a8a42..135bd86ed3adb 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -431,10 +431,6 @@ static struct svc_serv *lockd_create_svc(void)
+ * Check whether we're already up and running.
+ */
+ if (nlmsvc_rqst)
+- /*
+- * Note: increase service usage, because later in case of error
+- * svc_destroy() will be called.
+- */
+ return svc_get(nlmsvc_rqst->rq_server);
+
+ /*
+@@ -495,7 +491,7 @@ int lockd_up(struct net *net, const struct cred *cred)
+ * so we exit through here on both success and failure.
+ */
+ err_put:
+- svc_destroy(serv);
++ svc_put(serv);
+ err_create:
+ mutex_unlock(&nlmsvc_mutex);
+ return error;
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 674198e0eb5e1..dddd66749a881 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -267,10 +267,6 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
+ * Check whether we're already up and running.
+ */
+ if (cb_info->serv)
+- /*
+- * Note: increase service usage, because later in case of error
+- * svc_destroy() will be called.
+- */
+ return svc_get(cb_info->serv);
+
+ switch (minorversion) {
+@@ -333,16 +329,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
+ goto err_start;
+
+ cb_info->users++;
+- /*
+- * svc_create creates the svc_serv with sv_nrthreads == 1, and then
+- * svc_prepare_thread increments that. So we need to call svc_destroy
+- * on both success and failure so that the refcount is 1 when the
+- * thread exits.
+- */
+ err_net:
+ if (!cb_info->users)
+ cb_info->serv = NULL;
+- svc_destroy(serv);
++ svc_put(serv);
+ err_create:
+ mutex_unlock(&nfs_callback_mutex);
+ return ret;
+@@ -368,7 +358,7 @@ void nfs_callback_down(int minorversion, struct net *net)
+ if (cb_info->users == 0) {
+ svc_get(serv);
+ serv->sv_ops->svo_setup(serv, NULL, 0);
+- svc_destroy(serv);
++ svc_put(serv);
+ dprintk("nfs_callback_down: service destroyed\n");
+ cb_info->serv = NULL;
+ }
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 162866cfe83a2..5c8d985acf5fb 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -743,7 +743,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
+
+ err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+ if (err < 0 && list_empty(&nn->nfsd_serv->sv_permsocks)) {
+- nfsd_destroy(net);
++ nfsd_put(net);
+ return err;
+ }
+
+@@ -796,7 +796,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ if (!list_empty(&nn->nfsd_serv->sv_permsocks))
+ nn->nfsd_serv->sv_nrthreads--;
+ else
+- nfsd_destroy(net);
++ nfsd_put(net);
+ return err;
+ }
+
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 498e5a4898260..3e5008b475ff0 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -97,7 +97,7 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
+ int nfsd_pool_stats_release(struct inode *, struct file *);
+ void nfsd_shutdown_threads(struct net *net);
+
+-void nfsd_destroy(struct net *net);
++void nfsd_put(struct net *net);
+
+ bool i_am_nfsd(void);
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 0f84151011088..4aee1cfe0d1bb 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -623,7 +623,7 @@ void nfsd_shutdown_threads(struct net *net)
+ svc_get(serv);
+ /* Kill outstanding nfsd threads */
+ serv->sv_ops->svo_setup(serv, NULL, 0);
+- nfsd_destroy(net);
++ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+ /* Wait for shutdown of nfsd_serv to complete */
+ wait_for_completion(&nn->nfsd_shutdown_complete);
+@@ -656,7 +656,10 @@ int nfsd_create_serv(struct net *net)
+ nn->nfsd_serv->sv_maxconn = nn->max_connections;
+ error = svc_bind(nn->nfsd_serv, net);
+ if (error < 0) {
+- svc_destroy(nn->nfsd_serv);
++ /* NOT nfsd_put() as notifiers (see below) haven't
++ * been set up yet.
++ */
++ svc_put(nn->nfsd_serv);
+ nfsd_complete_shutdown(net);
+ return error;
+ }
+@@ -697,16 +700,16 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
+ return 0;
+ }
+
+-void nfsd_destroy(struct net *net)
++void nfsd_put(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+- int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
+
+- if (destroy)
++ nn->nfsd_serv->sv_nrthreads -= 1;
++ if (nn->nfsd_serv->sv_nrthreads == 0) {
+ svc_shutdown_net(nn->nfsd_serv, net);
+- svc_destroy(nn->nfsd_serv);
+- if (destroy)
++ svc_destroy(nn->nfsd_serv);
+ nfsd_complete_shutdown(net);
++ }
+ }
+
+ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
+@@ -758,7 +761,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
+ if (err)
+ break;
+ }
+- nfsd_destroy(net);
++ nfsd_put(net);
+ return err;
+ }
+
+@@ -795,7 +798,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+
+ error = nfsd_startup_net(net, cred);
+ if (error)
+- goto out_destroy;
++ goto out_put;
+ error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
+ NULL, nrservs);
+ if (error)
+@@ -808,8 +811,8 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+ out_shutdown:
+ if (error < 0 && !nfsd_up_before)
+ nfsd_shutdown_net(net);
+-out_destroy:
+- nfsd_destroy(net); /* Release server */
++out_put:
++ nfsd_put(net);
+ out:
+ mutex_unlock(&nfsd_mutex);
+ return error;
+@@ -982,7 +985,7 @@ nfsd(void *vrqstp)
+ /* Release the thread */
+ svc_exit_thread(rqstp);
+
+- nfsd_destroy(net);
++ nfsd_put(net);
+
+ /* Release module */
+ mutex_unlock(&nfsd_mutex);
+@@ -1109,8 +1112,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file)
+ struct net *net = inode->i_sb->s_fs_info;
+
+ mutex_lock(&nfsd_mutex);
+- /* this function really, really should have been called svc_put() */
+- nfsd_destroy(net);
++ nfsd_put(net);
+ mutex_unlock(&nfsd_mutex);
+ return ret;
+ }
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 80d44df8663db..b378e9ad141b4 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -114,8 +114,13 @@ struct svc_serv {
+ #endif /* CONFIG_SUNRPC_BACKCHANNEL */
+ };
+
+-/*
+- * We use sv_nrthreads as a reference count. svc_destroy() drops
++/**
++ * svc_get() - increment reference count on a SUNRPC serv
++ * @serv: the svc_serv to have count incremented
++ *
++ * Returns: the svc_serv that was passed in.
++ *
++ * We use sv_nrthreads as a reference count. svc_put() drops
+ * this refcount, so we need to bump it up around operations that
+ * change the number of threads. Horrible, but there it is.
+ * Should be called with the "service mutex" held.
+@@ -126,6 +131,22 @@ static inline struct svc_serv *svc_get(struct svc_serv *serv)
+ return serv;
+ }
+
++void svc_destroy(struct svc_serv *serv);
++
++/**
++ * svc_put - decrement reference count on a SUNRPC serv
++ * @serv: the svc_serv to have count decremented
++ *
++ * When the reference count reaches zero, svc_destroy()
++ * is called to clean up and free the serv.
++ */
++static inline void svc_put(struct svc_serv *serv)
++{
++ serv->sv_nrthreads -= 1;
++ if (serv->sv_nrthreads == 0)
++ svc_destroy(serv);
++}
++
+ /*
+ * Maximum payload size supported by a kernel RPC server.
+ * This is use to determine the max number of pages nfsd is
+@@ -517,7 +538,6 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+ int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+ int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
+-void svc_destroy(struct svc_serv *);
+ void svc_shutdown_net(struct svc_serv *, struct net *);
+ int svc_process(struct svc_rqst *);
+ int bc_svc_process(struct svc_serv *, struct rpc_rqst *,
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 36a3ad9336d6f..85f324418d175 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -528,17 +528,7 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net);
+ void
+ svc_destroy(struct svc_serv *serv)
+ {
+- dprintk("svc: svc_destroy(%s, %d)\n",
+- serv->sv_program->pg_name,
+- serv->sv_nrthreads);
+-
+- if (serv->sv_nrthreads) {
+- if (--(serv->sv_nrthreads) != 0) {
+- svc_sock_update_bufs(serv);
+- return;
+- }
+- } else
+- printk("svc_destroy: no threads for serv=%p!\n", serv);
++ dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
+
+ del_timer_sync(&serv->sv_temptimer);
+
+@@ -896,9 +886,10 @@ svc_exit_thread(struct svc_rqst *rqstp)
+
+ svc_rqst_free(rqstp);
+
+- /* Release the server */
+- if (serv)
+- svc_destroy(serv);
++ if (!serv)
++ return;
++ svc_sock_update_bufs(serv);
++ svc_destroy(serv);
+ }
+ EXPORT_SYMBOL_GPL(svc_exit_thread);
+
+--
+2.43.0
+
--- /dev/null
+From b280bd02bf7d4c99c0a8c6c7b0382aedfc33bdcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Sep 2022 17:22:38 -0400
+Subject: SUNRPC: Parametrize how much of argsize should be zeroed
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 103cc1fafee48adb91fca0e19deb869fd23e46ab ]
+
+Currently, SUNRPC clears the whole of .pc_argsize before processing
+each incoming RPC transaction. Add an extra parameter to struct
+svc_procedure to enable upper layers to reduce the amount of each
+operation's argument structure that is zeroed by SUNRPC.
+
+The size of struct nfsd4_compoundargs, in particular, is a lot to
+clear on each incoming RPC Call. A subsequent patch will cut this
+down to something closer to what NFSv2 and NFSv3 uses.
+
+This patch should cause no behavior changes.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc4proc.c | 24 ++++++++++++++++++++++++
+ fs/lockd/svcproc.c | 24 ++++++++++++++++++++++++
+ fs/nfs/callback_xdr.c | 1 +
+ fs/nfsd/nfs2acl.c | 5 +++++
+ fs/nfsd/nfs3acl.c | 3 +++
+ fs/nfsd/nfs3proc.c | 22 ++++++++++++++++++++++
+ fs/nfsd/nfs4proc.c | 2 ++
+ fs/nfsd/nfsproc.c | 18 ++++++++++++++++++
+ include/linux/sunrpc/svc.h | 1 +
+ net/sunrpc/svc.c | 2 +-
+ 10 files changed, 101 insertions(+), 1 deletion(-)
+
+diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
+index 930e90f21b151..742b8d31d2fad 100644
+--- a/fs/lockd/svc4proc.c
++++ b/fs/lockd/svc4proc.c
+@@ -523,6 +523,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "NULL",
+@@ -532,6 +533,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_testargs,
+ .pc_encode = nlm4svc_encode_testres,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St+2+No+Rg,
+ .pc_name = "TEST",
+@@ -541,6 +543,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_lockargs,
+ .pc_encode = nlm4svc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "LOCK",
+@@ -550,6 +553,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_cancargs,
+ .pc_encode = nlm4svc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "CANCEL",
+@@ -559,6 +563,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_unlockargs,
+ .pc_encode = nlm4svc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "UNLOCK",
+@@ -568,6 +573,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_testargs,
+ .pc_encode = nlm4svc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "GRANTED",
+@@ -577,6 +583,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_testargs,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "TEST_MSG",
+@@ -586,6 +593,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_lockargs,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "LOCK_MSG",
+@@ -595,6 +603,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_cancargs,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "CANCEL_MSG",
+@@ -604,6 +613,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_unlockargs,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "UNLOCK_MSG",
+@@ -613,6 +623,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_testargs,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "GRANTED_MSG",
+@@ -622,6 +633,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "TEST_RES",
+@@ -631,6 +643,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "LOCK_RES",
+@@ -640,6 +653,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "CANCEL_RES",
+@@ -649,6 +663,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "UNLOCK_RES",
+@@ -658,6 +673,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_res,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "GRANTED_RES",
+@@ -667,6 +683,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_reboot,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_reboot),
++ .pc_argzero = sizeof(struct nlm_reboot),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "SM_NOTIFY",
+@@ -676,6 +693,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = 0,
+ .pc_name = "UNUSED",
+@@ -685,6 +703,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = 0,
+ .pc_name = "UNUSED",
+@@ -694,6 +713,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_void,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = 0,
+ .pc_name = "UNUSED",
+@@ -703,6 +723,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_shareargs,
+ .pc_encode = nlm4svc_encode_shareres,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St+1,
+ .pc_name = "SHARE",
+@@ -712,6 +733,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_shareargs,
+ .pc_encode = nlm4svc_encode_shareres,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St+1,
+ .pc_name = "UNSHARE",
+@@ -721,6 +743,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_lockargs,
+ .pc_encode = nlm4svc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "NM_LOCK",
+@@ -730,6 +753,7 @@ const struct svc_procedure nlmsvc_procedures4[24] = {
+ .pc_decode = nlm4svc_decode_notify,
+ .pc_encode = nlm4svc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "FREE_ALL",
+diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
+index c215a4599d5c8..7e25d5583c7d0 100644
+--- a/fs/lockd/svcproc.c
++++ b/fs/lockd/svcproc.c
+@@ -557,6 +557,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "NULL",
+@@ -566,6 +567,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_testargs,
+ .pc_encode = nlmsvc_encode_testres,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St+2+No+Rg,
+ .pc_name = "TEST",
+@@ -575,6 +577,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_lockargs,
+ .pc_encode = nlmsvc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "LOCK",
+@@ -584,6 +587,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_cancargs,
+ .pc_encode = nlmsvc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "CANCEL",
+@@ -593,6 +597,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_unlockargs,
+ .pc_encode = nlmsvc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "UNLOCK",
+@@ -602,6 +607,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_testargs,
+ .pc_encode = nlmsvc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "GRANTED",
+@@ -611,6 +617,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_testargs,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "TEST_MSG",
+@@ -620,6 +627,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_lockargs,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "LOCK_MSG",
+@@ -629,6 +637,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_cancargs,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "CANCEL_MSG",
+@@ -638,6 +647,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_unlockargs,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "UNLOCK_MSG",
+@@ -647,6 +657,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_testargs,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "GRANTED_MSG",
+@@ -656,6 +667,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "TEST_RES",
+@@ -665,6 +677,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "LOCK_RES",
+@@ -674,6 +687,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "CANCEL_RES",
+@@ -683,6 +697,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "UNLOCK_RES",
+@@ -692,6 +707,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_res,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_res),
++ .pc_argzero = sizeof(struct nlm_res),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "GRANTED_RES",
+@@ -701,6 +717,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_reboot,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_reboot),
++ .pc_argzero = sizeof(struct nlm_reboot),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "SM_NOTIFY",
+@@ -710,6 +727,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "UNUSED",
+@@ -719,6 +737,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "UNUSED",
+@@ -728,6 +747,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_void,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_void),
++ .pc_argzero = sizeof(struct nlm_void),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = St,
+ .pc_name = "UNUSED",
+@@ -737,6 +757,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_shareargs,
+ .pc_encode = nlmsvc_encode_shareres,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St+1,
+ .pc_name = "SHARE",
+@@ -746,6 +767,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_shareargs,
+ .pc_encode = nlmsvc_encode_shareres,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St+1,
+ .pc_name = "UNSHARE",
+@@ -755,6 +777,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_lockargs,
+ .pc_encode = nlmsvc_encode_res,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_res),
+ .pc_xdrressize = Ck+St,
+ .pc_name = "NM_LOCK",
+@@ -764,6 +787,7 @@ const struct svc_procedure nlmsvc_procedures[24] = {
+ .pc_decode = nlmsvc_decode_notify,
+ .pc_encode = nlmsvc_encode_void,
+ .pc_argsize = sizeof(struct nlm_args),
++ .pc_argzero = sizeof(struct nlm_args),
+ .pc_ressize = sizeof(struct nlm_void),
+ .pc_xdrressize = 0,
+ .pc_name = "FREE_ALL",
+diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
+index 8dcb08e1a885d..d0cccddb7d088 100644
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -1065,6 +1065,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
+ .pc_func = nfs4_callback_compound,
+ .pc_encode = nfs4_encode_void,
+ .pc_argsize = 256,
++ .pc_argzero = 256,
+ .pc_ressize = 256,
+ .pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
+ .pc_name = "COMPOUND",
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index 87f224cd30a85..65d4511b7af08 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -321,6 +321,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+@@ -332,6 +333,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
+ .pc_encode = nfsaclsvc_encode_getaclres,
+ .pc_release = nfsaclsvc_release_getacl,
+ .pc_argsize = sizeof(struct nfsd3_getaclargs),
++ .pc_argzero = sizeof(struct nfsd3_getaclargs),
+ .pc_ressize = sizeof(struct nfsd3_getaclres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+1+2*(1+ACL),
+@@ -343,6 +345,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd3_setaclargs),
++ .pc_argzero = sizeof(struct nfsd3_setaclargs),
+ .pc_ressize = sizeof(struct nfsd_attrstat),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+@@ -354,6 +357,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
++ .pc_argzero = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd_attrstat),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+@@ -365,6 +369,7 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = {
+ .pc_encode = nfsaclsvc_encode_accessres,
+ .pc_release = nfsaclsvc_release_access,
+ .pc_argsize = sizeof(struct nfsd3_accessargs),
++ .pc_argzero = sizeof(struct nfsd3_accessargs),
+ .pc_ressize = sizeof(struct nfsd3_accessres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT+1,
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index 9446c67436649..2fb9ee3564558 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -252,6 +252,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+@@ -263,6 +264,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
+ .pc_encode = nfs3svc_encode_getaclres,
+ .pc_release = nfs3svc_release_getacl,
+ .pc_argsize = sizeof(struct nfsd3_getaclargs),
++ .pc_argzero = sizeof(struct nfsd3_getaclargs),
+ .pc_ressize = sizeof(struct nfsd3_getaclres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+1+2*(1+ACL),
+@@ -274,6 +276,7 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = {
+ .pc_encode = nfs3svc_encode_setaclres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_setaclargs),
++ .pc_argzero = sizeof(struct nfsd3_setaclargs),
+ .pc_ressize = sizeof(struct nfsd3_attrstat),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT,
+diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
+index 5b1e771238b35..58695e4e18b46 100644
+--- a/fs/nfsd/nfs3proc.c
++++ b/fs/nfsd/nfs3proc.c
+@@ -809,6 +809,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+@@ -820,6 +821,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_getattrres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
++ .pc_argzero = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd3_attrstatres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+@@ -831,6 +833,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_wccstatres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_sattrargs),
++ .pc_argzero = sizeof(struct nfsd3_sattrargs),
+ .pc_ressize = sizeof(struct nfsd3_wccstatres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC,
+@@ -842,6 +845,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_lookupres,
+ .pc_release = nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_diropargs),
++ .pc_argzero = sizeof(struct nfsd3_diropargs),
+ .pc_ressize = sizeof(struct nfsd3_diropres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+FH+pAT+pAT,
+@@ -853,6 +857,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_accessres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_accessargs),
++ .pc_argzero = sizeof(struct nfsd3_accessargs),
+ .pc_ressize = sizeof(struct nfsd3_accessres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+1,
+@@ -864,6 +869,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_readlinkres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
++ .pc_argzero = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd3_readlinkres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
+@@ -875,6 +881,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_readres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_readargs),
++ .pc_argzero = sizeof(struct nfsd3_readargs),
+ .pc_ressize = sizeof(struct nfsd3_readres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
+@@ -886,6 +893,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_writeres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_writeargs),
++ .pc_argzero = sizeof(struct nfsd3_writeargs),
+ .pc_ressize = sizeof(struct nfsd3_writeres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC+4,
+@@ -897,6 +905,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_createres,
+ .pc_release = nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_createargs),
++ .pc_argzero = sizeof(struct nfsd3_createargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+@@ -908,6 +917,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_createres,
+ .pc_release = nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_mkdirargs),
++ .pc_argzero = sizeof(struct nfsd3_mkdirargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+@@ -919,6 +929,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_createres,
+ .pc_release = nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_symlinkargs),
++ .pc_argzero = sizeof(struct nfsd3_symlinkargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+@@ -930,6 +941,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_createres,
+ .pc_release = nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_mknodargs),
++ .pc_argzero = sizeof(struct nfsd3_mknodargs),
+ .pc_ressize = sizeof(struct nfsd3_createres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+(1+FH+pAT)+WC,
+@@ -941,6 +953,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_wccstatres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_diropargs),
++ .pc_argzero = sizeof(struct nfsd3_diropargs),
+ .pc_ressize = sizeof(struct nfsd3_wccstatres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC,
+@@ -952,6 +965,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_wccstatres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_diropargs),
++ .pc_argzero = sizeof(struct nfsd3_diropargs),
+ .pc_ressize = sizeof(struct nfsd3_wccstatres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC,
+@@ -963,6 +977,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_renameres,
+ .pc_release = nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_renameargs),
++ .pc_argzero = sizeof(struct nfsd3_renameargs),
+ .pc_ressize = sizeof(struct nfsd3_renameres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+WC+WC,
+@@ -974,6 +989,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_linkres,
+ .pc_release = nfs3svc_release_fhandle2,
+ .pc_argsize = sizeof(struct nfsd3_linkargs),
++ .pc_argzero = sizeof(struct nfsd3_linkargs),
+ .pc_ressize = sizeof(struct nfsd3_linkres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+pAT+WC,
+@@ -985,6 +1001,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_readdirres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_readdirargs),
++ .pc_argzero = sizeof(struct nfsd3_readdirargs),
+ .pc_ressize = sizeof(struct nfsd3_readdirres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_name = "READDIR",
+@@ -995,6 +1012,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_readdirres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_readdirplusargs),
++ .pc_argzero = sizeof(struct nfsd3_readdirplusargs),
+ .pc_ressize = sizeof(struct nfsd3_readdirres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_name = "READDIRPLUS",
+@@ -1004,6 +1022,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_decode = nfs3svc_decode_fhandleargs,
+ .pc_encode = nfs3svc_encode_fsstatres,
+ .pc_argsize = sizeof(struct nfsd3_fhandleargs),
++ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
+ .pc_ressize = sizeof(struct nfsd3_fsstatres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+2*6+1,
+@@ -1014,6 +1033,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_decode = nfs3svc_decode_fhandleargs,
+ .pc_encode = nfs3svc_encode_fsinfores,
+ .pc_argsize = sizeof(struct nfsd3_fhandleargs),
++ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
+ .pc_ressize = sizeof(struct nfsd3_fsinfores),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+12,
+@@ -1024,6 +1044,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_decode = nfs3svc_decode_fhandleargs,
+ .pc_encode = nfs3svc_encode_pathconfres,
+ .pc_argsize = sizeof(struct nfsd3_fhandleargs),
++ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
+ .pc_ressize = sizeof(struct nfsd3_pathconfres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT+6,
+@@ -1035,6 +1056,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
+ .pc_encode = nfs3svc_encode_commitres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_commitargs),
++ .pc_argzero = sizeof(struct nfsd3_commitargs),
+ .pc_ressize = sizeof(struct nfsd3_commitres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+WC+2,
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index ce8062c959315..8aae0fb4846bc 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -3579,6 +3579,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 1,
+@@ -3589,6 +3590,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
+ .pc_decode = nfs4svc_decode_compoundargs,
+ .pc_encode = nfs4svc_encode_compoundres,
+ .pc_argsize = sizeof(struct nfsd4_compoundargs),
++ .pc_argzero = sizeof(struct nfsd4_compoundargs),
+ .pc_ressize = sizeof(struct nfsd4_compoundres),
+ .pc_release = nfsd4_release_compoundargs,
+ .pc_cachetype = RC_NOCACHE,
+diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
+index ee02ede74bf53..49778ff410e32 100644
+--- a/fs/nfsd/nfsproc.c
++++ b/fs/nfsd/nfsproc.c
+@@ -645,6 +645,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+@@ -656,6 +657,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
++ .pc_argzero = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd_attrstat),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+@@ -667,6 +669,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd_sattrargs),
++ .pc_argzero = sizeof(struct nfsd_sattrargs),
+ .pc_ressize = sizeof(struct nfsd_attrstat),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+AT,
+@@ -677,6 +680,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+@@ -688,6 +692,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_encode = nfssvc_encode_diropres,
+ .pc_release = nfssvc_release_diropres,
+ .pc_argsize = sizeof(struct nfsd_diropargs),
++ .pc_argzero = sizeof(struct nfsd_diropargs),
+ .pc_ressize = sizeof(struct nfsd_diropres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+FH+AT,
+@@ -698,6 +703,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_fhandleargs,
+ .pc_encode = nfssvc_encode_readlinkres,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
++ .pc_argzero = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd_readlinkres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
+@@ -709,6 +715,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_encode = nfssvc_encode_readres,
+ .pc_release = nfssvc_release_readres,
+ .pc_argsize = sizeof(struct nfsd_readargs),
++ .pc_argzero = sizeof(struct nfsd_readargs),
+ .pc_ressize = sizeof(struct nfsd_readres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
+@@ -719,6 +726,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
++ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+@@ -730,6 +738,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd_writeargs),
++ .pc_argzero = sizeof(struct nfsd_writeargs),
+ .pc_ressize = sizeof(struct nfsd_attrstat),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+AT,
+@@ -741,6 +750,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_encode = nfssvc_encode_diropres,
+ .pc_release = nfssvc_release_diropres,
+ .pc_argsize = sizeof(struct nfsd_createargs),
++ .pc_argzero = sizeof(struct nfsd_createargs),
+ .pc_ressize = sizeof(struct nfsd_diropres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+FH+AT,
+@@ -751,6 +761,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_diropargs,
+ .pc_encode = nfssvc_encode_statres,
+ .pc_argsize = sizeof(struct nfsd_diropargs),
++ .pc_argzero = sizeof(struct nfsd_diropargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
+ .pc_cachetype = RC_REPLSTAT,
+ .pc_xdrressize = ST,
+@@ -761,6 +772,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_renameargs,
+ .pc_encode = nfssvc_encode_statres,
+ .pc_argsize = sizeof(struct nfsd_renameargs),
++ .pc_argzero = sizeof(struct nfsd_renameargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
+ .pc_cachetype = RC_REPLSTAT,
+ .pc_xdrressize = ST,
+@@ -771,6 +783,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_linkargs,
+ .pc_encode = nfssvc_encode_statres,
+ .pc_argsize = sizeof(struct nfsd_linkargs),
++ .pc_argzero = sizeof(struct nfsd_linkargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
+ .pc_cachetype = RC_REPLSTAT,
+ .pc_xdrressize = ST,
+@@ -781,6 +794,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_symlinkargs,
+ .pc_encode = nfssvc_encode_statres,
+ .pc_argsize = sizeof(struct nfsd_symlinkargs),
++ .pc_argzero = sizeof(struct nfsd_symlinkargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
+ .pc_cachetype = RC_REPLSTAT,
+ .pc_xdrressize = ST,
+@@ -792,6 +806,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_encode = nfssvc_encode_diropres,
+ .pc_release = nfssvc_release_diropres,
+ .pc_argsize = sizeof(struct nfsd_createargs),
++ .pc_argzero = sizeof(struct nfsd_createargs),
+ .pc_ressize = sizeof(struct nfsd_diropres),
+ .pc_cachetype = RC_REPLBUFF,
+ .pc_xdrressize = ST+FH+AT,
+@@ -802,6 +817,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_diropargs,
+ .pc_encode = nfssvc_encode_statres,
+ .pc_argsize = sizeof(struct nfsd_diropargs),
++ .pc_argzero = sizeof(struct nfsd_diropargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
+ .pc_cachetype = RC_REPLSTAT,
+ .pc_xdrressize = ST,
+@@ -812,6 +828,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_readdirargs,
+ .pc_encode = nfssvc_encode_readdirres,
+ .pc_argsize = sizeof(struct nfsd_readdirargs),
++ .pc_argzero = sizeof(struct nfsd_readdirargs),
+ .pc_ressize = sizeof(struct nfsd_readdirres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_name = "READDIR",
+@@ -821,6 +838,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
+ .pc_decode = nfssvc_decode_fhandleargs,
+ .pc_encode = nfssvc_encode_statfsres,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
++ .pc_argzero = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd_statfsres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+5,
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 1d9a81bab3fa2..53405c282209f 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -475,6 +475,7 @@ struct svc_procedure {
+ /* XDR free result: */
+ void (*pc_release)(struct svc_rqst *);
+ unsigned int pc_argsize; /* argument struct size */
++ unsigned int pc_argzero; /* how much of argument to clear */
+ unsigned int pc_ressize; /* result struct size */
+ unsigned int pc_cachetype; /* cache info (NFS) */
+ unsigned int pc_xdrressize; /* maximum size of XDR reply */
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index f2a8c1ee8530e..86f00019d0ebb 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -1242,7 +1242,7 @@ svc_generic_init_request(struct svc_rqst *rqstp,
+ goto err_bad_proc;
+
+ /* Initialize storage for argp and resp */
+- memset(rqstp->rq_argp, 0, procp->pc_argsize);
++ memset(rqstp->rq_argp, 0, procp->pc_argzero);
+ memset(rqstp->rq_resp, 0, procp->pc_ressize);
+
+ /* Bump per-procedure stats counter */
+--
+2.43.0
+
--- /dev/null
+From 34766b5cd47c50504d6283020c37ba77952a78b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jan 2022 11:30:55 -0500
+Subject: SUNRPC: Remove svc_shutdown_net()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit c7d7ec8f043e53ad16e30f5ebb8b9df415ec0f2b ]
+
+Clean up: svc_shutdown_net() now does nothing but call
+svc_close_net(). Replace all external call sites.
+
+svc_close_net() is renamed to be the inverse of svc_xprt_create().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 4 ++--
+ fs/nfs/callback.c | 2 +-
+ fs/nfsd/nfssvc.c | 2 +-
+ include/linux/sunrpc/svc.h | 1 -
+ include/linux/sunrpc/svc_xprt.h | 1 +
+ net/sunrpc/svc.c | 6 ------
+ net/sunrpc/svc_xprt.c | 9 +++++++--
+ 7 files changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index bba6f2b45b64a..c83ec4a375bc1 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -248,7 +248,7 @@ static int make_socks(struct svc_serv *serv, struct net *net,
+ if (warned++ == 0)
+ printk(KERN_WARNING
+ "lockd_up: makesock failed, error=%d\n", err);
+- svc_shutdown_net(serv, net);
++ svc_xprt_destroy_all(serv, net);
+ svc_rpcb_cleanup(serv, net);
+ return err;
+ }
+@@ -287,7 +287,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
+ nlm_shutdown_hosts_net(net);
+ cancel_delayed_work_sync(&ln->grace_period_end);
+ locks_end_grace(&ln->lockd_manager);
+- svc_shutdown_net(serv, net);
++ svc_xprt_destroy_all(serv, net);
+ svc_rpcb_cleanup(serv, net);
+ }
+ } else {
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index c1a8767100ae9..c98c68513590f 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -189,7 +189,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
+ return;
+
+ dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
+- svc_shutdown_net(serv, net);
++ svc_xprt_destroy_all(serv, net);
+ }
+
+ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 5790b1eaff72b..38895372ec393 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -722,7 +722,7 @@ void nfsd_put(struct net *net)
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
+- svc_shutdown_net(nn->nfsd_serv, net);
++ svc_xprt_destroy_all(nn->nfsd_serv, net);
+ nfsd_last_thread(nn->nfsd_serv, net);
+ svc_destroy(&nn->nfsd_serv->sv_refcnt);
+ spin_lock(&nfsd_notifier_lock);
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 6ea779b66199f..fd7ccba415f51 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -510,7 +510,6 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+ const struct svc_serv_ops *);
+ int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
+ int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
+-void svc_shutdown_net(struct svc_serv *, struct net *);
+ int svc_process(struct svc_rqst *);
+ int bc_svc_process(struct svc_serv *, struct rpc_rqst *,
+ struct svc_rqst *);
+diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
+index 30c645583cd06..1f7368f5b4e72 100644
+--- a/include/linux/sunrpc/svc_xprt.h
++++ b/include/linux/sunrpc/svc_xprt.h
+@@ -131,6 +131,7 @@ int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, const int family,
+ const unsigned short port, int flags,
+ const struct cred *cred);
++void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net);
+ void svc_xprt_received(struct svc_xprt *xprt);
+ void svc_xprt_enqueue(struct svc_xprt *xprt);
+ void svc_xprt_put(struct svc_xprt *xprt);
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 6a52942c501c5..6f45f3f45514c 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -536,12 +536,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
+ }
+ EXPORT_SYMBOL_GPL(svc_create_pooled);
+
+-void svc_shutdown_net(struct svc_serv *serv, struct net *net)
+-{
+- svc_close_net(serv, net);
+-}
+-EXPORT_SYMBOL_GPL(svc_shutdown_net);
+-
+ /*
+ * Destroy an RPC service. Should be called with appropriate locking to
+ * protect sv_permsocks and sv_tempsocks.
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 19c11b3253f8d..67ccf1a6459ae 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -1150,7 +1150,11 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
+ }
+ }
+
+-/*
++/**
++ * svc_xprt_destroy_all - Destroy transports associated with @serv
++ * @serv: RPC service to be shut down
++ * @net: target network namespace
++ *
+ * Server threads may still be running (especially in the case where the
+ * service is still running in other network namespaces).
+ *
+@@ -1162,7 +1166,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
+ * threads, we may need to wait a little while and then check again to
+ * see if they're done.
+ */
+-void svc_close_net(struct svc_serv *serv, struct net *net)
++void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
+ {
+ int delay = 0;
+
+@@ -1173,6 +1177,7 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
+ msleep(delay++);
+ }
+ }
++EXPORT_SYMBOL_GPL(svc_xprt_destroy_all);
+
+ /*
+ * Handle defer and revisit of requests
+--
+2.43.0
+
--- /dev/null
+From be150e33644b8762261e50872efc96e3e82423e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jan 2022 13:49:29 -0500
+Subject: SUNRPC: Remove svo_shutdown method
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 87cdd8641c8a1ec6afd2468265e20840a57fd888 ]
+
+Clean up. Neil observed that "any code that calls svc_shutdown_net()
+knows what the shutdown function should be, and so can call it
+directly."
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Reviewed-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 5 ++---
+ fs/nfsd/nfssvc.c | 2 +-
+ include/linux/sunrpc/svc.h | 3 ---
+ net/sunrpc/svc.c | 3 ---
+ 4 files changed, 3 insertions(+), 10 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 3a05af8736259..f5b688a844aa5 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -249,6 +249,7 @@ static int make_socks(struct svc_serv *serv, struct net *net,
+ printk(KERN_WARNING
+ "lockd_up: makesock failed, error=%d\n", err);
+ svc_shutdown_net(serv, net);
++ svc_rpcb_cleanup(serv, net);
+ return err;
+ }
+
+@@ -287,8 +288,7 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
+ cancel_delayed_work_sync(&ln->grace_period_end);
+ locks_end_grace(&ln->lockd_manager);
+ svc_shutdown_net(serv, net);
+- dprintk("%s: per-net data destroyed; net=%x\n",
+- __func__, net->ns.inum);
++ svc_rpcb_cleanup(serv, net);
+ }
+ } else {
+ pr_err("%s: no users! net=%x\n",
+@@ -351,7 +351,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
+ #endif
+
+ static const struct svc_serv_ops lockd_sv_ops = {
+- .svo_shutdown = svc_rpcb_cleanup,
+ .svo_function = lockd,
+ .svo_module = THIS_MODULE,
+ };
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 3b79b97f2715d..a1765e751b739 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -613,7 +613,6 @@ static int nfsd_get_default_max_blksize(void)
+ }
+
+ static const struct svc_serv_ops nfsd_thread_sv_ops = {
+- .svo_shutdown = nfsd_last_thread,
+ .svo_function = nfsd,
+ .svo_module = THIS_MODULE,
+ };
+@@ -724,6 +723,7 @@ void nfsd_put(struct net *net)
+
+ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
+ svc_shutdown_net(nn->nfsd_serv, net);
++ nfsd_last_thread(nn->nfsd_serv, net);
+ svc_destroy(&nn->nfsd_serv->sv_refcnt);
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 2c1a80ec7c3dc..6ea779b66199f 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -55,9 +55,6 @@ struct svc_pool {
+ struct svc_serv;
+
+ struct svc_serv_ops {
+- /* Callback to use when last thread exits. */
+- void (*svo_shutdown)(struct svc_serv *, struct net *);
+-
+ /* function for service threads to run */
+ int (*svo_function)(void *);
+
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index f53ff8f2602f2..7c7cf0d4ffcb0 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -539,9 +539,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
+ void svc_shutdown_net(struct svc_serv *serv, struct net *net)
+ {
+ svc_close_net(serv, net);
+-
+- if (serv->sv_ops->svo_shutdown)
+- serv->sv_ops->svo_shutdown(serv, net);
+ }
+ EXPORT_SYMBOL_GPL(svc_shutdown_net);
+
+--
+2.43.0
+
--- /dev/null
+From c5c46bfe288975f54ef2d3ec53322c243dc35814 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jan 2022 10:17:59 -0500
+Subject: SUNRPC: Remove the .svo_enqueue_xprt method
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit a9ff2e99e9fa501ec965da03c18a5422b37a2f44 ]
+
+We have never been able to track down and address the underlying
+cause of the performance issues with workqueue-based service
+support. svo_enqueue_xprt is called multiple times per RPC, so
+it adds instruction path length, but always ends up at the same
+function: svc_xprt_do_enqueue(). We do not anticipate needing
+this flexibility for dynamic nfsd thread management support.
+
+As a micro-optimization, remove .svo_enqueue_xprt because
+Spectre/Meltdown makes virtual function calls more costly.
+
+This change essentially reverts commit b9e13cdfac70 ("nfsd/sunrpc:
+turn enqueueing a svc_xprt into a svc_serv operation").
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 1 -
+ fs/nfs/callback.c | 2 --
+ fs/nfsd/nfssvc.c | 1 -
+ include/linux/sunrpc/svc.h | 3 ---
+ include/linux/sunrpc/svc_xprt.h | 1 -
+ net/sunrpc/svc_xprt.c | 10 +++++-----
+ 6 files changed, 5 insertions(+), 13 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 0475c5a5d061e..3a05af8736259 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -353,7 +353,6 @@ static struct notifier_block lockd_inet6addr_notifier = {
+ static const struct svc_serv_ops lockd_sv_ops = {
+ .svo_shutdown = svc_rpcb_cleanup,
+ .svo_function = lockd,
+- .svo_enqueue_xprt = svc_xprt_do_enqueue,
+ .svo_module = THIS_MODULE,
+ };
+
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 054cc1255fac6..7a810f8850632 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -234,13 +234,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
+
+ static const struct svc_serv_ops nfs40_cb_sv_ops = {
+ .svo_function = nfs4_callback_svc,
+- .svo_enqueue_xprt = svc_xprt_do_enqueue,
+ .svo_module = THIS_MODULE,
+ };
+ #if defined(CONFIG_NFS_V4_1)
+ static const struct svc_serv_ops nfs41_cb_sv_ops = {
+ .svo_function = nfs41_callback_svc,
+- .svo_enqueue_xprt = svc_xprt_do_enqueue,
+ .svo_module = THIS_MODULE,
+ };
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 2efe9d33a2827..3b79b97f2715d 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -615,7 +615,6 @@ static int nfsd_get_default_max_blksize(void)
+ static const struct svc_serv_ops nfsd_thread_sv_ops = {
+ .svo_shutdown = nfsd_last_thread,
+ .svo_function = nfsd,
+- .svo_enqueue_xprt = svc_xprt_do_enqueue,
+ .svo_module = THIS_MODULE,
+ };
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index be535cc4fea07..2c1a80ec7c3dc 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -61,9 +61,6 @@ struct svc_serv_ops {
+ /* function for service threads to run */
+ int (*svo_function)(void *);
+
+- /* queue up a transport for servicing */
+- void (*svo_enqueue_xprt)(struct svc_xprt *);
+-
+ /* optional module to count when adding threads.
+ * Thread function must call module_put_and_kthread_exit() to exit.
+ */
+diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
+index 154eee6bc6a01..05a3ccd837f57 100644
+--- a/include/linux/sunrpc/svc_xprt.h
++++ b/include/linux/sunrpc/svc_xprt.h
+@@ -131,7 +131,6 @@ int svc_create_xprt(struct svc_serv *, const char *, struct net *,
+ const int, const unsigned short, int,
+ const struct cred *);
+ void svc_xprt_received(struct svc_xprt *xprt);
+-void svc_xprt_do_enqueue(struct svc_xprt *xprt);
+ void svc_xprt_enqueue(struct svc_xprt *xprt);
+ void svc_xprt_put(struct svc_xprt *xprt);
+ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 5ff8f902f14d2..795dc8eb2e1d5 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -31,6 +31,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
+ static struct cache_deferred_req *svc_defer(struct cache_req *req);
+ static void svc_age_temp_xprts(struct timer_list *t);
+ static void svc_delete_xprt(struct svc_xprt *xprt);
++static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
+
+ /* apparently the "standard" is that clients close
+ * idle connections after 5 minutes, servers after
+@@ -267,12 +268,12 @@ void svc_xprt_received(struct svc_xprt *xprt)
+ trace_svc_xprt_received(xprt);
+
+ /* As soon as we clear busy, the xprt could be closed and
+- * 'put', so we need a reference to call svc_enqueue_xprt with:
++ * 'put', so we need a reference to call svc_xprt_do_enqueue with:
+ */
+ svc_xprt_get(xprt);
+ smp_mb__before_atomic();
+ clear_bit(XPT_BUSY, &xprt->xpt_flags);
+- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
++ svc_xprt_do_enqueue(xprt);
+ svc_xprt_put(xprt);
+ }
+ EXPORT_SYMBOL_GPL(svc_xprt_received);
+@@ -424,7 +425,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
+ return false;
+ }
+
+-void svc_xprt_do_enqueue(struct svc_xprt *xprt)
++static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+ {
+ struct svc_pool *pool;
+ struct svc_rqst *rqstp = NULL;
+@@ -468,7 +469,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+ put_cpu();
+ trace_svc_xprt_do_enqueue(xprt, rqstp);
+ }
+-EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
+
+ /*
+ * Queue up a transport with data pending. If there are idle nfsd
+@@ -479,7 +479,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
+ {
+ if (test_bit(XPT_BUSY, &xprt->xpt_flags))
+ return;
+- xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt);
++ svc_xprt_do_enqueue(xprt);
+ }
+ EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
+
+--
+2.43.0
+
--- /dev/null
+From 83d447d716a941d80c50cef1bb6326fb3f223445 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jan 2022 13:34:29 -0500
+Subject: SUNRPC: Rename svc_close_xprt()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 4355d767a21b9445958fc11bce9a9701f76529d3 ]
+
+Clean up: Use the "svc_xprt_<task>" function naming convention as
+is used for other external APIs.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfsctl.c | 2 +-
+ include/linux/sunrpc/svc_xprt.h | 2 +-
+ net/sunrpc/svc.c | 2 +-
+ net/sunrpc/svc_xprt.c | 9 +++++++--
+ net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 2 +-
+ 5 files changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 8fec779994f7b..16920e4512bde 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -790,7 +790,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ out_close:
+ xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
+ if (xprt != NULL) {
+- svc_close_xprt(xprt);
++ svc_xprt_close(xprt);
+ svc_xprt_put(xprt);
+ }
+ out_err:
+diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
+index 8bcb6ccf32839..30c645583cd06 100644
+--- a/include/linux/sunrpc/svc_xprt.h
++++ b/include/linux/sunrpc/svc_xprt.h
+@@ -135,7 +135,7 @@ void svc_xprt_received(struct svc_xprt *xprt);
+ void svc_xprt_enqueue(struct svc_xprt *xprt);
+ void svc_xprt_put(struct svc_xprt *xprt);
+ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
+-void svc_close_xprt(struct svc_xprt *xprt);
++void svc_xprt_close(struct svc_xprt *xprt);
+ int svc_port_is_privileged(struct sockaddr *sin);
+ int svc_print_xprts(char *buf, int maxlen);
+ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 7c7cf0d4ffcb0..6a52942c501c5 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -1408,7 +1408,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
+ svc_authorise(rqstp);
+ close_xprt:
+ if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+- svc_close_xprt(rqstp->rq_xprt);
++ svc_xprt_close(rqstp->rq_xprt);
+ dprintk("svc: svc_process close\n");
+ return 0;
+
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 87433ce9cb15a..19c11b3253f8d 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -1078,7 +1078,12 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
+ svc_xprt_put(xprt);
+ }
+
+-void svc_close_xprt(struct svc_xprt *xprt)
++/**
++ * svc_xprt_close - Close a client connection
++ * @xprt: transport to disconnect
++ *
++ */
++void svc_xprt_close(struct svc_xprt *xprt)
+ {
+ trace_svc_xprt_close(xprt);
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+@@ -1093,7 +1098,7 @@ void svc_close_xprt(struct svc_xprt *xprt)
+ */
+ svc_delete_xprt(xprt);
+ }
+-EXPORT_SYMBOL_GPL(svc_close_xprt);
++EXPORT_SYMBOL_GPL(svc_xprt_close);
+
+ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
+ {
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+index 16897fcb659c1..85c8cdda98b18 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+@@ -198,7 +198,7 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
+
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+ if (ret == -ENOTCONN)
+- svc_close_xprt(sxprt);
++ svc_xprt_close(sxprt);
+ return ret;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 35d7e54ca0b55ffe3801a7176e0f4f9b3d1be916 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jan 2022 11:42:08 -0500
+Subject: SUNRPC: Rename svc_create_xprt()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 352ad31448fecc78a2e9b78da64eea5d63b8d0ce ]
+
+Clean up: Use the "svc_xprt_<task>" function naming convention as
+is used for other external APIs.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 4 ++--
+ fs/nfs/callback.c | 12 ++++++------
+ fs/nfsd/nfsctl.c | 8 ++++----
+ fs/nfsd/nfssvc.c | 8 ++++----
+ include/linux/sunrpc/svc_xprt.h | 7 ++++---
+ net/sunrpc/svc_xprt.c | 24 +++++++++++++++++++-----
+ 6 files changed, 39 insertions(+), 24 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index f5b688a844aa5..bba6f2b45b64a 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -197,8 +197,8 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
+
+ xprt = svc_find_xprt(serv, name, net, family, 0);
+ if (xprt == NULL)
+- return svc_create_xprt(serv, name, net, family, port,
+- SVC_SOCK_DEFAULTS, cred);
++ return svc_xprt_create(serv, name, net, family, port,
++ SVC_SOCK_DEFAULTS, cred);
+ svc_xprt_put(xprt);
+ return 0;
+ }
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index 7a810f8850632..c1a8767100ae9 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -45,18 +45,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
+ int ret;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+- ret = svc_create_xprt(serv, "tcp", net, PF_INET,
+- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+- cred);
++ ret = svc_xprt_create(serv, "tcp", net, PF_INET,
++ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
++ cred);
+ if (ret <= 0)
+ goto out_err;
+ nn->nfs_callback_tcpport = ret;
+ dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
+ nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
+
+- ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
+- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+- cred);
++ ret = svc_xprt_create(serv, "tcp", net, PF_INET6,
++ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
++ cred);
+ if (ret > 0) {
+ nn->nfs_callback_tcpport6 = ret;
+ dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 68b020f2002b7..8fec779994f7b 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -772,13 +772,13 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ if (err != 0)
+ return err;
+
+- err = svc_create_xprt(nn->nfsd_serv, transport, net,
+- PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
++ err = svc_xprt_create(nn->nfsd_serv, transport, net,
++ PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
+ if (err < 0)
+ goto out_err;
+
+- err = svc_create_xprt(nn->nfsd_serv, transport, net,
+- PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
++ err = svc_xprt_create(nn->nfsd_serv, transport, net,
++ PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
+ if (err < 0 && err != -EAFNOSUPPORT)
+ goto out_close;
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index a1765e751b739..5790b1eaff72b 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -293,13 +293,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred)
+ if (!list_empty(&nn->nfsd_serv->sv_permsocks))
+ return 0;
+
+- error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
+- SVC_SOCK_DEFAULTS, cred);
++ error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
++ SVC_SOCK_DEFAULTS, cred);
+ if (error < 0)
+ return error;
+
+- error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
+- SVC_SOCK_DEFAULTS, cred);
++ error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
++ SVC_SOCK_DEFAULTS, cred);
+ if (error < 0)
+ return error;
+
+diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
+index 05a3ccd837f57..8bcb6ccf32839 100644
+--- a/include/linux/sunrpc/svc_xprt.h
++++ b/include/linux/sunrpc/svc_xprt.h
+@@ -127,9 +127,10 @@ int svc_reg_xprt_class(struct svc_xprt_class *);
+ void svc_unreg_xprt_class(struct svc_xprt_class *);
+ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
+ struct svc_serv *);
+-int svc_create_xprt(struct svc_serv *, const char *, struct net *,
+- const int, const unsigned short, int,
+- const struct cred *);
++int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
++ struct net *net, const int family,
++ const unsigned short port, int flags,
++ const struct cred *cred);
+ void svc_xprt_received(struct svc_xprt *xprt);
+ void svc_xprt_enqueue(struct svc_xprt *xprt);
+ void svc_xprt_put(struct svc_xprt *xprt);
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 5a6d4ccb4a607..87433ce9cb15a 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -286,7 +286,7 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
+ svc_xprt_received(new);
+ }
+
+-static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
++static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, const int family,
+ const unsigned short port, int flags,
+ const struct cred *cred)
+@@ -322,21 +322,35 @@ static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
+ return -EPROTONOSUPPORT;
+ }
+
+-int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
++/**
++ * svc_xprt_create - Add a new listener to @serv
++ * @serv: target RPC service
++ * @xprt_name: transport class name
++ * @net: network namespace
++ * @family: network address family
++ * @port: listener port
++ * @flags: SVC_SOCK flags
++ * @cred: credential to bind to this transport
++ *
++ * Return values:
++ * %0: New listener added successfully
++ * %-EPROTONOSUPPORT: Requested transport type not supported
++ */
++int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, const int family,
+ const unsigned short port, int flags,
+ const struct cred *cred)
+ {
+ int err;
+
+- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
++ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+ if (err == -EPROTONOSUPPORT) {
+ request_module("svc%s", xprt_name);
+- err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
++ err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+ }
+ return err;
+ }
+-EXPORT_SYMBOL_GPL(svc_create_xprt);
++EXPORT_SYMBOL_GPL(svc_xprt_create);
+
+ /*
+ * Copy the local and remote xprt addresses to the rqstp structure
+--
+2.43.0
+
--- /dev/null
+From f84fc2acfd9fc5fb7a4777c99559f5ea24f20861 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Oct 2021 11:57:22 -0400
+Subject: SUNRPC: Replace the "__be32 *p" parameter to .pc_decode
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 16c663642c7ec03cd4cee5fec520bb69e97babe4 ]
+
+The passed-in value of the "__be32 *p" parameter is now unused in
+every server-side XDR decoder, and can be removed.
+
+Note also that there is a line in each decoder that sets up a local
+pointer to a struct xdr_stream. Passing that pointer from the
+dispatcher instead saves one line per decoder function.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 3 +--
+ fs/lockd/xdr.c | 27 +++++++++--------------
+ fs/lockd/xdr4.c | 27 +++++++++--------------
+ fs/nfsd/nfs2acl.c | 12 +++++-----
+ fs/nfsd/nfs3acl.c | 8 +++----
+ fs/nfsd/nfs3xdr.c | 45 +++++++++++++-------------------------
+ fs/nfsd/nfs4xdr.c | 4 ++--
+ fs/nfsd/nfsd.h | 3 ++-
+ fs/nfsd/nfssvc.c | 7 +++---
+ fs/nfsd/nfsxdr.c | 30 +++++++++----------------
+ fs/nfsd/xdr.h | 21 +++++++++---------
+ fs/nfsd/xdr3.h | 31 +++++++++++++-------------
+ fs/nfsd/xdr4.h | 2 +-
+ include/linux/lockd/xdr.h | 19 ++++++++--------
+ include/linux/lockd/xdr4.h | 21 +++++++++---------
+ include/linux/sunrpc/svc.h | 3 ++-
+ 16 files changed, 113 insertions(+), 150 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index b632be3ad57b2..9a82471bda071 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -780,11 +780,10 @@ module_exit(exit_nlm);
+ static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ {
+ const struct svc_procedure *procp = rqstp->rq_procinfo;
+- struct kvec *argv = rqstp->rq_arg.head;
+ struct kvec *resv = rqstp->rq_res.head;
+
+ svcxdr_init_decode(rqstp);
+- if (!procp->pc_decode(rqstp, argv->iov_base))
++ if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream))
+ goto out_decode_err;
+
+ *statp = procp->pc_func(rqstp);
+diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
+index 9235e60b17694..895f152221048 100644
+--- a/fs/lockd/xdr.c
++++ b/fs/lockd/xdr.c
+@@ -146,15 +146,14 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
+ */
+
+ int
+-nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ return 1;
+ }
+
+ int
+-nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+@@ -171,9 +170,8 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+@@ -197,9 +195,8 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+@@ -218,9 +215,8 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+@@ -233,9 +229,8 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_res *resp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &resp->cookie))
+@@ -247,10 +242,10 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_reboot *argp = rqstp->rq_argp;
++ __be32 *p;
+ u32 len;
+
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+@@ -273,9 +268,8 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_lock *lock = &argp->lock;
+
+@@ -301,9 +295,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_lock *lock = &argp->lock;
+
+diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
+index b303ecd74f330..5e6885d1b92de 100644
+--- a/fs/lockd/xdr4.c
++++ b/fs/lockd/xdr4.c
+@@ -141,15 +141,14 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
+ */
+
+ int
+-nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ return 1;
+ }
+
+ int
+-nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+@@ -166,9 +165,8 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+@@ -192,9 +190,8 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ u32 exclusive;
+
+@@ -212,9 +209,8 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &argp->cookie))
+@@ -227,9 +223,8 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_res *resp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_cookie(xdr, &resp->cookie))
+@@ -241,10 +236,10 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_reboot *argp = rqstp->rq_argp;
++ __be32 *p;
+ u32 len;
+
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+@@ -267,9 +262,8 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_lock *lock = &argp->lock;
+
+@@ -295,9 +289,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm_lock *lock = &argp->lock;
+
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index 30a1782a03f01..9bd0899455903 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -188,9 +188,9 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
+ * XDR decode functions
+ */
+
+-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_getaclargs *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &argp->fh))
+@@ -201,9 +201,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
+ return 1;
+ }
+
+-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &argp->fh))
+@@ -222,9 +222,9 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
+ return 1;
+ }
+
+-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_accessargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index 5dfe7644a5172..b1e352ed2436e 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -127,9 +127,9 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
+ * XDR decode functions
+ */
+
+-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_getaclargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+@@ -140,9 +140,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
+ return 1;
+ }
+
+-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh))
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index c69d0dc50a669..b4a36989b3e24 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -547,18 +547,16 @@ void fill_post_wcc(struct svc_fh *fhp)
+ */
+
+ int
+-nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_fhandle *args = rqstp->rq_argp;
+
+ return svcxdr_decode_nfs_fh3(xdr, &args->fh);
+ }
+
+ int
+-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_sattrargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_nfs_fh3(xdr, &args->fh) &&
+@@ -567,18 +565,16 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_diropargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len);
+ }
+
+ int
+-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_accessargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+@@ -590,9 +586,8 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_readargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+@@ -606,9 +601,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_writeargs *args = rqstp->rq_argp;
+ u32 max_blocksize = svc_max_payload(rqstp);
+
+@@ -639,9 +633,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_createargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
+@@ -664,9 +657,8 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_createargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_diropargs3(xdr, &args->fh,
+@@ -675,9 +667,8 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_symlinkargs *args = rqstp->rq_argp;
+ struct kvec *head = rqstp->rq_arg.head;
+ struct kvec *tail = rqstp->rq_arg.tail;
+@@ -703,9 +694,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_mknodargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
+@@ -732,9 +722,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_renameargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_diropargs3(xdr, &args->ffh,
+@@ -744,9 +733,8 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_linkargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_nfs_fh3(xdr, &args->ffh) &&
+@@ -755,9 +743,8 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_readdirargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+@@ -774,9 +761,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_readdirargs *args = rqstp->rq_argp;
+ u32 dircount;
+
+@@ -797,9 +783,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd3_commitargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 1474af184368d..ec052e88d9008 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5413,14 +5413,14 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
+ }
+
+ int
+-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
++nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+
+ /* svcxdr_tmp_alloc */
+ args->to_free = NULL;
+
+- args->xdr = &rqstp->rq_arg_stream;
++ args->xdr = xdr;
+ args->ops = args->iops;
+ args->rqstp = rqstp;
+
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index 9664303afdaf3..6e8ad5f9757c8 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -78,7 +78,8 @@ extern const struct seq_operations nfs_exports_op;
+ */
+ struct nfsd_voidargs { };
+ struct nfsd_voidres { };
+-int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p);
++int nfssvc_decode_voidarg(struct svc_rqst *rqstp,
++ struct xdr_stream *xdr);
+ int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
+
+ /*
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 373695cc62a7a..be1d656548cfe 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1004,7 +1004,6 @@ nfsd(void *vrqstp)
+ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ {
+ const struct svc_procedure *proc = rqstp->rq_procinfo;
+- struct kvec *argv = &rqstp->rq_arg.head[0];
+ struct kvec *resv = &rqstp->rq_res.head[0];
+ __be32 *p;
+
+@@ -1015,7 +1014,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ rqstp->rq_cachetype = proc->pc_cachetype;
+
+ svcxdr_init_decode(rqstp);
+- if (!proc->pc_decode(rqstp, argv->iov_base))
++ if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
+ goto out_decode_err;
+
+ switch (nfsd_cache_lookup(rqstp)) {
+@@ -1065,13 +1064,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ /**
+ * nfssvc_decode_voidarg - Decode void arguments
+ * @rqstp: Server RPC transaction context
+- * @p: buffer containing arguments to decode
++ * @xdr: XDR stream positioned at arguments to decode
+ *
+ * Return values:
+ * %0: Arguments were not valid
+ * %1: Decoding was successful
+ */
+-int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
++int nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ return 1;
+ }
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index ddcc18adfeb1a..08e899180ee43 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -273,18 +273,16 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ */
+
+ int
+-nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_fhandle *args = rqstp->rq_argp;
+
+ return svcxdr_decode_fhandle(xdr, &args->fh);
+ }
+
+ int
+-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_sattrargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_fhandle(xdr, &args->fh) &&
+@@ -292,18 +290,16 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_diropargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len);
+ }
+
+ int
+-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_readargs *args = rqstp->rq_argp;
+ u32 totalcount;
+
+@@ -321,9 +317,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_writeargs *args = rqstp->rq_argp;
+ u32 beginoffset, totalcount;
+
+@@ -350,9 +345,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_createargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_diropargs(xdr, &args->fh,
+@@ -361,9 +355,8 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_renameargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_diropargs(xdr, &args->ffh,
+@@ -373,9 +366,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_linkargs *args = rqstp->rq_argp;
+
+ return svcxdr_decode_fhandle(xdr, &args->ffh) &&
+@@ -384,9 +376,8 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_symlinkargs *args = rqstp->rq_argp;
+ struct kvec *head = rqstp->rq_arg.head;
+
+@@ -405,9 +396,8 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct nfsd_readdirargs *args = rqstp->rq_argp;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index 863a35f24910a..19e281382bb98 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -141,16 +141,17 @@ union nfsd_xdrstore {
+ #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
+
+
+-int nfssvc_decode_fhandleargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
++int nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++
+ int nfssvc_encode_statres(struct svc_rqst *, __be32 *);
+ int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *);
+ int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index 712c117300cb7..60a8909205e5a 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -265,21 +265,22 @@ union nfsd3_xdrstore {
+
+ #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
+
+-int nfs3svc_decode_fhandleargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
+-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
++int nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++
+ int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *);
+ int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+ int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index 45257666a6888..4c22eecd65de0 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -757,7 +757,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+
+
+ bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
+-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
++int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
+ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
+ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
+index bed63156b0521..931bd0b064e6f 100644
+--- a/include/linux/lockd/xdr.h
++++ b/include/linux/lockd/xdr.h
+@@ -98,18 +98,19 @@ struct nlm_reboot {
+ */
+ #define NLMSVC_XDRSIZE sizeof(struct nlm_args)
+
+-int nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
++int nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++
+ int nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
+ int nlmsvc_encode_res(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_res(struct svc_rqst *, __be32 *);
+ int nlmsvc_encode_void(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_void(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
+ int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
+-int nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
+
+ #endif /* LOCKD_XDR_H */
+diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
+index 025250ade98e2..44c9d03d261b9 100644
+--- a/include/linux/lockd/xdr4.h
++++ b/include/linux/lockd/xdr4.h
+@@ -22,22 +22,21 @@
+ #define nlm4_fbig cpu_to_be32(NLM_FBIG)
+ #define nlm4_failed cpu_to_be32(NLM_FAILED)
+
+-
+-
+ void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
+-int nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
++int nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++
+ int nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
+ int nlm4svc_encode_res(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_res(struct svc_rqst *, __be32 *);
+ int nlm4svc_encode_void(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_void(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
+ int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
+-int nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
+
+ extern const struct rpc_version nlm_version4;
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 664a54e330af3..f74ac0fdd5f32 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -456,7 +456,8 @@ struct svc_procedure {
+ /* process the request: */
+ __be32 (*pc_func)(struct svc_rqst *);
+ /* XDR decode args: */
+- int (*pc_decode)(struct svc_rqst *, __be32 *data);
++ int (*pc_decode)(struct svc_rqst *rqstp,
++ struct xdr_stream *xdr);
+ /* XDR encode result: */
+ int (*pc_encode)(struct svc_rqst *, __be32 *data);
+ /* XDR free result: */
+--
+2.43.0
+
--- /dev/null
+From ac3590d7e8f37e4517b1720d459d3b4365fc1509 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Oct 2021 10:41:06 -0400
+Subject: SUNRPC: Replace the "__be32 *p" parameter to .pc_encode
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit fda494411485aff91768842c532f90fb8eb54943 ]
+
+The passed-in value of the "__be32 *p" parameter is now unused in
+every server-side XDR encoder, and can be removed.
+
+Note also that there is a line in each encoder that sets up a local
+pointer to a struct xdr_stream. Passing that pointer from the
+dispatcher instead saves one line per encoder function.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 3 +--
+ fs/lockd/xdr.c | 11 ++++-----
+ fs/lockd/xdr4.c | 11 ++++-----
+ fs/nfs/callback_xdr.c | 4 ++--
+ fs/nfsd/nfs2acl.c | 8 +++----
+ fs/nfsd/nfs3acl.c | 8 +++----
+ fs/nfsd/nfs3xdr.c | 46 +++++++++++++-------------------------
+ fs/nfsd/nfs4xdr.c | 7 +++---
+ fs/nfsd/nfsd.h | 3 ++-
+ fs/nfsd/nfssvc.c | 9 +++-----
+ fs/nfsd/nfsxdr.c | 22 +++++++-----------
+ fs/nfsd/xdr.h | 14 ++++++------
+ fs/nfsd/xdr3.h | 30 ++++++++++++-------------
+ fs/nfsd/xdr4.h | 2 +-
+ include/linux/lockd/xdr.h | 8 +++----
+ include/linux/lockd/xdr4.h | 8 +++----
+ include/linux/sunrpc/svc.h | 3 ++-
+ 17 files changed, 85 insertions(+), 112 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 9a82471bda071..b220e1b917268 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -780,7 +780,6 @@ module_exit(exit_nlm);
+ static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ {
+ const struct svc_procedure *procp = rqstp->rq_procinfo;
+- struct kvec *resv = rqstp->rq_res.head;
+
+ svcxdr_init_decode(rqstp);
+ if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream))
+@@ -793,7 +792,7 @@ static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ return 1;
+
+ svcxdr_init_encode(rqstp);
+- if (!procp->pc_encode(rqstp, resv->iov_base + resv->iov_len))
++ if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream))
+ goto out_encode_err;
+
+ return 1;
+diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
+index 622c2ca37dbfd..2595b4d14cd44 100644
+--- a/fs/lockd/xdr.c
++++ b/fs/lockd/xdr.c
+@@ -314,15 +314,14 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ int
+-nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ return 1;
+ }
+
+ int
+-nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_cookie(xdr, &resp->cookie) &&
+@@ -330,9 +329,8 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_cookie(xdr, &resp->cookie) &&
+@@ -340,9 +338,8 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
++nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_cookie(xdr, &resp->cookie))
+diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
+index 11d93e9de85b9..4c04b1e2bd9d8 100644
+--- a/fs/lockd/xdr4.c
++++ b/fs/lockd/xdr4.c
+@@ -309,15 +309,14 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ int
+-nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ return 1;
+ }
+
+ int
+-nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_cookie(xdr, &resp->cookie) &&
+@@ -325,9 +324,8 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_cookie(xdr, &resp->cookie) &&
+@@ -335,9 +333,8 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
++nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nlm_res *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_cookie(xdr, &resp->cookie))
+diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
+index ea17085ef884b..688d58c036de7 100644
+--- a/fs/nfs/callback_xdr.c
++++ b/fs/nfs/callback_xdr.c
+@@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
+ * svc_process_common() looks for an XDR encoder to know when
+ * not to drop a Reply.
+ */
+-static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
++static int nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- return xdr_ressize_check(rqstp, p);
++ return 1;
+ }
+
+ static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
+diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
+index 7b1df500e8f41..cbd042fbe0f39 100644
+--- a/fs/nfsd/nfs2acl.c
++++ b/fs/nfsd/nfs2acl.c
+@@ -240,9 +240,9 @@ nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ /* GETACL */
+-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_getaclres *resp = rqstp->rq_resp;
+ struct dentry *dentry = resp->fh.fh_dentry;
+ struct inode *inode;
+@@ -270,9 +270,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ /* ACCESS */
+-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
+index 9e9f6afb2e00b..e186467b63ecb 100644
+--- a/fs/nfsd/nfs3acl.c
++++ b/fs/nfsd/nfs3acl.c
+@@ -166,9 +166,9 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ /* GETACL */
+-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_getaclres *resp = rqstp->rq_resp;
+ struct dentry *dentry = resp->fh.fh_dentry;
+ struct kvec *head = rqstp->rq_res.head;
+@@ -218,9 +218,9 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ /* SETACL */
+-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
++static int
++nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
+index a1395049db9f8..dd87076a8b0d7 100644
+--- a/fs/nfsd/nfs3xdr.c
++++ b/fs/nfsd/nfs3xdr.c
+@@ -803,9 +803,8 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+
+ /* GETATTR */
+ int
+-nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -823,9 +822,8 @@ nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* SETATTR, REMOVE, RMDIR */
+ int
+-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_attrstat *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+@@ -833,9 +831,9 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ /* LOOKUP */
+-int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p)
++int
++nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -859,9 +857,8 @@ int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* ACCESS */
+ int
+-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_accessres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -883,9 +880,8 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* READLINK */
+ int
+-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+@@ -911,9 +907,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* READ */
+ int
+-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_readres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+@@ -944,9 +939,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* WRITE */
+ int
+-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_writeres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -972,9 +966,8 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* CREATE, MKDIR, SYMLINK, MKNOD */
+ int
+-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_diropres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -998,9 +991,8 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* RENAME */
+ int
+-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_renameres *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+@@ -1010,9 +1002,8 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* LINK */
+ int
+-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_linkres *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+@@ -1022,9 +1013,8 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* READDIR */
+ int
+-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_readdirres *resp = rqstp->rq_resp;
+ struct xdr_buf *dirlist = &resp->dirlist;
+
+@@ -1276,9 +1266,8 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr,
+
+ /* FSSTAT */
+ int
+-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_fsstatres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -1323,9 +1312,8 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr,
+
+ /* FSINFO */
+ int
+-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_fsinfores *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -1366,9 +1354,8 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr,
+
+ /* PATHCONF */
+ int
+-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_pathconfres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+@@ -1390,9 +1377,8 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
+
+ /* COMMIT */
+ int
+-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
++nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd3_commitres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index e94f57f174f12..cc2367a6922a6 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -5428,10 +5428,11 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ }
+
+ int
+-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
++nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+- struct xdr_buf *buf = resp->xdr->buf;
++ struct xdr_buf *buf = xdr->buf;
++ __be32 *p;
+
+ WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
+ buf->tail[0].iov_len);
+@@ -5444,7 +5445,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
+
+ *p++ = resp->cstate.status;
+
+- rqstp->rq_next_page = resp->xdr->page_ptr + 1;
++ rqstp->rq_next_page = xdr->page_ptr + 1;
+
+ *p++ = htonl(resp->taglen);
+ memcpy(p, resp->tag, resp->taglen);
+diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
+index bfcddd4c75345..345f8247d5da9 100644
+--- a/fs/nfsd/nfsd.h
++++ b/fs/nfsd/nfsd.h
+@@ -80,7 +80,8 @@ struct nfsd_voidargs { };
+ struct nfsd_voidres { };
+ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+-int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
++int nfssvc_encode_voidres(struct svc_rqst *rqstp,
++ struct xdr_stream *xdr);
+
+ /*
+ * Function prototypes.
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 00aadc2635032..195f2bcc65384 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -1004,8 +1004,6 @@ nfsd(void *vrqstp)
+ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ {
+ const struct svc_procedure *proc = rqstp->rq_procinfo;
+- struct kvec *resv = &rqstp->rq_res.head[0];
+- __be32 *p;
+
+ /*
+ * Give the xdr decoder a chance to change this if it wants
+@@ -1030,14 +1028,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
+ * Need to grab the location to store the status, as
+ * NFSv4 does some encoding while processing
+ */
+- p = resv->iov_base + resv->iov_len;
+ svcxdr_init_encode(rqstp);
+
+ *statp = proc->pc_func(rqstp);
+ if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
+ goto out_update_drop;
+
+- if (!proc->pc_encode(rqstp, p))
++ if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
+ goto out_encode_err;
+
+ nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
+@@ -1078,13 +1075,13 @@ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ /**
+ * nfssvc_encode_voidres - Encode void results
+ * @rqstp: Server RPC transaction context
+- * @p: buffer in which to encode results
++ * @xdr: XDR stream into which to encode results
+ *
+ * Return values:
+ * %0: Local error while encoding
+ * %1: Encoding was successful
+ */
+-int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
++int nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+ return 1;
+ }
+diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
+index b5817a41b3de6..6aa8138ae2f7d 100644
+--- a/fs/nfsd/nfsxdr.c
++++ b/fs/nfsd/nfsxdr.c
+@@ -415,18 +415,16 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ */
+
+ int
+-nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd_stat *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_stat(xdr, resp->status);
+ }
+
+ int
+-nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd_attrstat *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+@@ -442,9 +440,8 @@ nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd_diropres *resp = rqstp->rq_resp;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+@@ -462,9 +459,8 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd_readlinkres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+@@ -484,9 +480,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd_readres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+@@ -509,9 +504,8 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd_readdirres *resp = rqstp->rq_resp;
+ struct xdr_buf *dirlist = &resp->dirlist;
+
+@@ -532,11 +526,11 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
+ }
+
+ int
+-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
++nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+ {
+- struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct nfsd_statfsres *resp = rqstp->rq_resp;
+ struct kstatfs *stat = &resp->stats;
++ __be32 *p;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return 0;
+diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
+index d897c198c9126..bff7258041fc4 100644
+--- a/fs/nfsd/xdr.h
++++ b/fs/nfsd/xdr.h
+@@ -152,13 +152,13 @@ bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nfssvc_encode_statres(struct svc_rqst *, __be32 *);
+-int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *);
+-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
+-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
+-int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
+-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
+-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
++int nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset);
+ int nfssvc_encode_entry(void *data, const char *name, int namlen,
+diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
+index ef72bc4868da6..bb017fc7cba19 100644
+--- a/fs/nfsd/xdr3.h
++++ b/fs/nfsd/xdr3.h
+@@ -281,21 +281,21 @@ bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
+-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
++int nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ void nfs3svc_release_fhandle(struct svc_rqst *);
+ void nfs3svc_release_fhandle2(struct svc_rqst *);
+diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
+index f20c1ae97fec5..9921915b4c163 100644
+--- a/fs/nfsd/xdr4.h
++++ b/fs/nfsd/xdr4.h
+@@ -759,7 +759,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+
+ bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
+ bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
++int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
+ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+ void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
+diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
+index a3d0bc4fd2109..94f1ca900ca3a 100644
+--- a/include/linux/lockd/xdr.h
++++ b/include/linux/lockd/xdr.h
+@@ -108,9 +108,9 @@ bool nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
+-int nlmsvc_encode_res(struct svc_rqst *, __be32 *);
+-int nlmsvc_encode_void(struct svc_rqst *, __be32 *);
+-int nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
++int nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ #endif /* LOCKD_XDR_H */
+diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
+index 6eec19629cd69..ee44d7357a7f7 100644
+--- a/include/linux/lockd/xdr4.h
++++ b/include/linux/lockd/xdr4.h
+@@ -33,10 +33,10 @@ bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+ bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+-int nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
+-int nlm4svc_encode_res(struct svc_rqst *, __be32 *);
+-int nlm4svc_encode_void(struct svc_rqst *, __be32 *);
+-int nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
++int nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
++int nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+ extern const struct rpc_version nlm_version4;
+
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index 2bb68625bc76c..dc6fc8940261f 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -459,7 +459,8 @@ struct svc_procedure {
+ bool (*pc_decode)(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+ /* XDR encode result: */
+- int (*pc_encode)(struct svc_rqst *, __be32 *data);
++ int (*pc_encode)(struct svc_rqst *rqstp,
++ struct xdr_stream *xdr);
+ /* XDR free result: */
+ void (*pc_release)(struct svc_rqst *);
+ unsigned int pc_argsize; /* argument struct size */
+--
+2.43.0
+
--- /dev/null
+From 06a33fa908c4c477439eaeb7496cc2dc5f95e3a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: SUNRPC: stop using ->sv_nrthreads as a refcount
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit ec52361df99b490f6af412b046df9799b92c1050 ]
+
+The use of sv_nrthreads as a general refcount results in clumsy code, as
+is seen by various comments needed to explain the situation.
+
+This patch introduces a 'struct kref' and uses that for reference
+counting, leaving sv_nrthreads to be a pure count of threads. The kref
+is managed particularly in svc_get() and svc_put(), and also nfsd_put();
+
+svc_destroy() now takes a pointer to the embedded kref, rather than to
+the serv.
+
+nfsd allows the svc_serv to exist with ->sv_nrhtreads being zero. This
+happens when a transport is created before the first thread is started.
+To support this, a 'keep_active' flag is introduced which holds a ref on
+the svc_serv. This is set when any listening socket is successfully
+added (unless there are running threads), and cleared when the number of
+threads is set. So when the last thread exits, the nfs_serv will be
+destroyed.
+The use of 'keep_active' replaces previous code which checked if there
+were any permanent sockets.
+
+We no longer clear ->rq_server when nfsd() exits. This was done
+to prevent svc_exit_thread() from calling svc_destroy().
+Instead we take an extra reference to the svc_serv to prevent
+svc_destroy() from being called.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/lockd/svc.c | 4 ----
+ fs/nfs/callback.c | 2 +-
+ fs/nfsd/netns.h | 7 +++++++
+ fs/nfsd/nfsctl.c | 22 +++++++++-----------
+ fs/nfsd/nfssvc.c | 42 +++++++++++++++++++++++---------------
+ include/linux/sunrpc/svc.h | 14 ++++---------
+ net/sunrpc/svc.c | 22 ++++++++++----------
+ 7 files changed, 59 insertions(+), 54 deletions(-)
+
+diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
+index 135bd86ed3adb..a9669b106dbde 100644
+--- a/fs/lockd/svc.c
++++ b/fs/lockd/svc.c
+@@ -486,10 +486,6 @@ int lockd_up(struct net *net, const struct cred *cred)
+ goto err_put;
+ }
+ nlmsvc_users++;
+- /*
+- * Note: svc_serv structures have an initial use count of 1,
+- * so we exit through here on both success and failure.
+- */
+ err_put:
+ svc_put(serv);
+ err_create:
+diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
+index dddd66749a881..09ec60b99f65e 100644
+--- a/fs/nfs/callback.c
++++ b/fs/nfs/callback.c
+@@ -169,7 +169,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
+ if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS)
+ nrservs = NFS4_MIN_NR_CALLBACK_THREADS;
+
+- if (serv->sv_nrthreads-1 == nrservs)
++ if (serv->sv_nrthreads == nrservs)
+ return 0;
+
+ ret = serv->sv_ops->svo_setup(serv, NULL, nrservs);
+diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
+index 935c1028c2175..08bcd8f23b013 100644
+--- a/fs/nfsd/netns.h
++++ b/fs/nfsd/netns.h
+@@ -123,6 +123,13 @@ struct nfsd_net {
+ u32 clverifier_counter;
+
+ struct svc_serv *nfsd_serv;
++ /* When a listening socket is added to nfsd, keep_active is set
++ * and this justifies a reference on nfsd_serv. This stops
++ * nfsd_serv from being freed. When the number of threads is
++ * set, keep_active is cleared and the reference is dropped. So
++ * when the last thread exits, the service will be destroyed.
++ */
++ int keep_active;
+
+ wait_queue_head_t ntf_wq;
+ atomic_t ntf_refcnt;
+diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
+index 5c8d985acf5fb..53076c5afe62c 100644
+--- a/fs/nfsd/nfsctl.c
++++ b/fs/nfsd/nfsctl.c
+@@ -742,13 +742,12 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
+ return err;
+
+ err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+- if (err < 0 && list_empty(&nn->nfsd_serv->sv_permsocks)) {
+- nfsd_put(net);
+- return err;
+- }
+
+- /* Decrease the count, but don't shut down the service */
+- nn->nfsd_serv->sv_nrthreads--;
++ if (err >= 0 &&
++ !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
++ svc_get(nn->nfsd_serv);
++
++ nfsd_put(net);
+ return err;
+ }
+
+@@ -783,8 +782,10 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ if (err < 0 && err != -EAFNOSUPPORT)
+ goto out_close;
+
+- /* Decrease the count, but don't shut down the service */
+- nn->nfsd_serv->sv_nrthreads--;
++ if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
++ svc_get(nn->nfsd_serv);
++
++ nfsd_put(net);
+ return 0;
+ out_close:
+ xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
+@@ -793,10 +794,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
+ svc_xprt_put(xprt);
+ }
+ out_err:
+- if (!list_empty(&nn->nfsd_serv->sv_permsocks))
+- nn->nfsd_serv->sv_nrthreads--;
+- else
+- nfsd_put(net);
++ nfsd_put(net);
+ return err;
+ }
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 4aee1cfe0d1bb..141d884fee4f4 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -60,13 +60,13 @@ static __be32 nfsd_init_request(struct svc_rqst *,
+ * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ *
+ * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
+- * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
+- * of nfsd threads must exist and each must listed in ->sp_all_threads in each
+- * entry of ->sv_pools[].
++ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
++ * nn->keep_active is set). That number of nfsd threads must
++ * exist and each must be listed in ->sp_all_threads in some entry of
++ * ->sv_pools[].
+ *
+- * Transitions of the thread count between zero and non-zero are of particular
+- * interest since the svc_serv needs to be created and initialized at that
+- * point, or freed.
++ * Each active thread holds a counted reference on nn->nfsd_serv, as does
++ * the nn->keep_active flag and various transient calls to svc_get().
+ *
+ * Finally, the nfsd_mutex also protects some of the global variables that are
+ * accessed when nfsd starts and that are settable via the write_* routines in
+@@ -700,14 +700,22 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
+ return 0;
+ }
+
++/* This is the callback for kref_put() below.
++ * There is no code here as the first thing to be done is
++ * call svc_shutdown_net(), but we cannot get the 'net' from
++ * the kref. So do all the work when kref_put returns true.
++ */
++static void nfsd_noop(struct kref *ref)
++{
++}
++
+ void nfsd_put(struct net *net)
+ {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+- nn->nfsd_serv->sv_nrthreads -= 1;
+- if (nn->nfsd_serv->sv_nrthreads == 0) {
++ if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
+ svc_shutdown_net(nn->nfsd_serv, net);
+- svc_destroy(nn->nfsd_serv);
++ svc_destroy(&nn->nfsd_serv->sv_refcnt);
+ nfsd_complete_shutdown(net);
+ }
+ }
+@@ -803,15 +811,14 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+ NULL, nrservs);
+ if (error)
+ goto out_shutdown;
+- /* We are holding a reference to nn->nfsd_serv which
+- * we don't want to count in the return value,
+- * so subtract 1
+- */
+- error = nn->nfsd_serv->sv_nrthreads - 1;
++ error = nn->nfsd_serv->sv_nrthreads;
+ out_shutdown:
+ if (error < 0 && !nfsd_up_before)
+ nfsd_shutdown_net(net);
+ out_put:
++ /* Threads now hold service active */
++ if (xchg(&nn->keep_active, 0))
++ nfsd_put(net);
+ nfsd_put(net);
+ out:
+ mutex_unlock(&nfsd_mutex);
+@@ -980,11 +987,15 @@ nfsd(void *vrqstp)
+ nfsdstats.th_cnt --;
+
+ out:
+- rqstp->rq_server = NULL;
++ /* Take an extra ref so that the svc_put in svc_exit_thread()
++ * doesn't call svc_destroy()
++ */
++ svc_get(nn->nfsd_serv);
+
+ /* Release the thread */
+ svc_exit_thread(rqstp);
+
++ /* Now if needed we call svc_destroy in appropriate context */
+ nfsd_put(net);
+
+ /* Release module */
+@@ -1099,7 +1110,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
+ mutex_unlock(&nfsd_mutex);
+ return -ENODEV;
+ }
+- /* bump up the psudo refcount while traversing */
+ svc_get(nn->nfsd_serv);
+ ret = svc_pool_stats_open(nn->nfsd_serv, file);
+ mutex_unlock(&nfsd_mutex);
+diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
+index b378e9ad141b4..fdc32ffef0184 100644
+--- a/include/linux/sunrpc/svc.h
++++ b/include/linux/sunrpc/svc.h
+@@ -85,6 +85,7 @@ struct svc_serv {
+ struct svc_program * sv_program; /* RPC program */
+ struct svc_stat * sv_stats; /* RPC statistics */
+ spinlock_t sv_lock;
++ struct kref sv_refcnt;
+ unsigned int sv_nrthreads; /* # of server threads */
+ unsigned int sv_maxconn; /* max connections allowed or
+ * '0' causing max to be based
+@@ -119,19 +120,14 @@ struct svc_serv {
+ * @serv: the svc_serv to have count incremented
+ *
+ * Returns: the svc_serv that was passed in.
+- *
+- * We use sv_nrthreads as a reference count. svc_put() drops
+- * this refcount, so we need to bump it up around operations that
+- * change the number of threads. Horrible, but there it is.
+- * Should be called with the "service mutex" held.
+ */
+ static inline struct svc_serv *svc_get(struct svc_serv *serv)
+ {
+- serv->sv_nrthreads++;
++ kref_get(&serv->sv_refcnt);
+ return serv;
+ }
+
+-void svc_destroy(struct svc_serv *serv);
++void svc_destroy(struct kref *);
+
+ /**
+ * svc_put - decrement reference count on a SUNRPC serv
+@@ -142,9 +138,7 @@ void svc_destroy(struct svc_serv *serv);
+ */
+ static inline void svc_put(struct svc_serv *serv)
+ {
+- serv->sv_nrthreads -= 1;
+- if (serv->sv_nrthreads == 0)
+- svc_destroy(serv);
++ kref_put(&serv->sv_refcnt, svc_destroy);
+ }
+
+ /*
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 85f324418d175..6cde8c87e5733 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -435,7 +435,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+ return NULL;
+ serv->sv_name = prog->pg_name;
+ serv->sv_program = prog;
+- serv->sv_nrthreads = 1;
++ kref_init(&serv->sv_refcnt);
+ serv->sv_stats = prog->pg_stats;
+ if (bufsize > RPCSVC_MAXPAYLOAD)
+ bufsize = RPCSVC_MAXPAYLOAD;
+@@ -526,10 +526,11 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net);
+ * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
+ */
+ void
+-svc_destroy(struct svc_serv *serv)
++svc_destroy(struct kref *ref)
+ {
+- dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
++ struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+
++ dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
+ del_timer_sync(&serv->sv_temptimer);
+
+ /*
+@@ -637,6 +638,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
+ if (!rqstp)
+ return ERR_PTR(-ENOMEM);
+
++ svc_get(serv);
+ serv->sv_nrthreads++;
+ spin_lock_bh(&pool->sp_lock);
+ pool->sp_nrthreads++;
+@@ -776,8 +778,7 @@ int
+ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ {
+ if (pool == NULL) {
+- /* The -1 assumes caller has done a svc_get() */
+- nrservs -= (serv->sv_nrthreads-1);
++ nrservs -= serv->sv_nrthreads;
+ } else {
+ spin_lock_bh(&pool->sp_lock);
+ nrservs -= pool->sp_nrthreads;
+@@ -818,8 +819,7 @@ int
+ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ {
+ if (pool == NULL) {
+- /* The -1 assumes caller has done a svc_get() */
+- nrservs -= (serv->sv_nrthreads-1);
++ nrservs -= serv->sv_nrthreads;
+ } else {
+ spin_lock_bh(&pool->sp_lock);
+ nrservs -= pool->sp_nrthreads;
+@@ -884,12 +884,12 @@ svc_exit_thread(struct svc_rqst *rqstp)
+ list_del_rcu(&rqstp->rq_all);
+ spin_unlock_bh(&pool->sp_lock);
+
++ serv->sv_nrthreads -= 1;
++ svc_sock_update_bufs(serv);
++
+ svc_rqst_free(rqstp);
+
+- if (!serv)
+- return;
+- svc_sock_update_bufs(serv);
+- svc_destroy(serv);
++ svc_put(serv);
+ }
+ EXPORT_SYMBOL_GPL(svc_exit_thread);
+
+--
+2.43.0
+
--- /dev/null
+From 3b32616d70fa9bf6ff9a840a7cc93347ec7ae058 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Oct 2021 18:02:24 -0400
+Subject: SUNRPC: Tracepoints should display tk_pid and cl_clid as a fixed-size
+ field
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit b4776a341ec05e809d21e98db5ed49dbdc81d5d8 ]
+
+For certain special cases, RPC-related tracepoints record a -1 as
+the task ID or the client ID. It's ugly for a trace event to display
+4 billion in these cases.
+
+To help keep SUNRPC tracepoints consistent, create a macro that
+defines the print format specifiers for tk_pid and cl_clid. At some
+point in the future we might try tk_pid with a wider range of values
+than 0..64K so this makes it easier to make that change.
+
+RPC tracepoints now look like this:
+
+<...>-1276 [009] 149.720358: rpc_clnt_new: client=00000005 peer=[192.168.2.55]:20049 program=nfs server=klimt.ib
+
+<...>-1342 [004] 149.921234: rpc_xdr_recvfrom: task:0000001a@00000005 head=[0xff1242d9ab6dc01c,144] page=0 tail=[(nil),0] len=144
+<...>-1342 [004] 149.921235: xprt_release_cong: task:0000001a@00000005 snd_task:ffffffff cong=256 cwnd=16384
+<...>-1342 [004] 149.921235: xprt_put_cong: task:0000001a@00000005 snd_task:ffffffff cong=0 cwnd=16384
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfs/nfs4trace.h | 9 ++--
+ fs/nfs/nfstrace.h | 6 ++-
+ include/trace/events/rpcgss.h | 18 +++++---
+ include/trace/events/rpcrdma.h | 42 +++++++++--------
+ include/trace/events/sunrpc.h | 74 ++++++++++++++++++------------
+ include/trace/events/sunrpc_base.h | 18 ++++++++
+ 6 files changed, 108 insertions(+), 59 deletions(-)
+ create mode 100644 include/trace/events/sunrpc_base.h
+
+diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
+index bcd18e96b44fa..39a45bb7d4311 100644
+--- a/fs/nfs/nfs4trace.h
++++ b/fs/nfs/nfs4trace.h
+@@ -9,6 +9,7 @@
+ #define _TRACE_NFS4_H
+
+ #include <linux/tracepoint.h>
++#include <trace/events/sunrpc_base.h>
+
+ TRACE_DEFINE_ENUM(EPERM);
+ TRACE_DEFINE_ENUM(ENOENT);
+@@ -696,8 +697,8 @@ TRACE_EVENT(nfs4_xdr_bad_operation,
+ __entry->expected = expected;
+ ),
+
+- TP_printk(
+- "task:%u@%d xid=0x%08x operation=%u, expected=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x operation=%u, expected=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->op, __entry->expected
+ )
+@@ -731,8 +732,8 @@ DECLARE_EVENT_CLASS(nfs4_xdr_event,
+ __entry->error = error;
+ ),
+
+- TP_printk(
+- "task:%u@%d xid=0x%08x error=%ld (%s) operation=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x error=%ld (%s) operation=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ -__entry->error, show_nfsv4_errors(__entry->error),
+ __entry->op
+diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
+index 589f32fdbe637..69fa637a4aba8 100644
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -11,6 +11,8 @@
+ #include <linux/tracepoint.h>
+ #include <linux/iversion.h>
+
++#include <trace/events/sunrpc_base.h>
++
+ #define nfs_show_file_type(ftype) \
+ __print_symbolic(ftype, \
+ { DT_UNKNOWN, "UNKNOWN" }, \
+@@ -1359,8 +1361,8 @@ DECLARE_EVENT_CLASS(nfs_xdr_event,
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name);
+ ),
+
+- TP_printk(
+- "task:%u@%d xid=0x%08x %sv%d %s error=%ld (%s)",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x %sv%d %s error=%ld (%s)",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __get_str(program), __entry->version,
+ __get_str(procedure), -__entry->error,
+diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
+index b2a2672e66322..3ba63319af3cd 100644
+--- a/include/trace/events/rpcgss.h
++++ b/include/trace/events/rpcgss.h
+@@ -13,6 +13,8 @@
+
+ #include <linux/tracepoint.h>
+
++#include <trace/events/sunrpc_base.h>
++
+ /**
+ ** GSS-API related trace events
+ **/
+@@ -99,7 +101,7 @@ DECLARE_EVENT_CLASS(rpcgss_gssapi_event,
+ __entry->maj_stat = maj_stat;
+ ),
+
+- TP_printk("task:%u@%u maj_stat=%s",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " maj_stat=%s",
+ __entry->task_id, __entry->client_id,
+ __entry->maj_stat == 0 ?
+ "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat))
+@@ -332,7 +334,8 @@ TRACE_EVENT(rpcgss_unwrap_failed,
+ __entry->client_id = task->tk_client->cl_clid;
+ ),
+
+- TP_printk("task:%u@%u", __entry->task_id, __entry->client_id)
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER,
++ __entry->task_id, __entry->client_id)
+ );
+
+ TRACE_EVENT(rpcgss_bad_seqno,
+@@ -358,7 +361,8 @@ TRACE_EVENT(rpcgss_bad_seqno,
+ __entry->received = received;
+ ),
+
+- TP_printk("task:%u@%u expected seqno %u, received seqno %u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " expected seqno %u, received seqno %u",
+ __entry->task_id, __entry->client_id,
+ __entry->expected, __entry->received)
+ );
+@@ -386,7 +390,7 @@ TRACE_EVENT(rpcgss_seqno,
+ __entry->seqno = rqst->rq_seqno;
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x seqno=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x seqno=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->xid, __entry->seqno)
+ );
+@@ -418,7 +422,8 @@ TRACE_EVENT(rpcgss_need_reencode,
+ __entry->ret = ret;
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded",
+ __entry->task_id, __entry->client_id,
+ __entry->xid, __entry->seqno, __entry->seq_xmit,
+ __entry->ret ? "" : "un")
+@@ -452,7 +457,8 @@ TRACE_EVENT(rpcgss_update_slack,
+ __entry->verfsize = auth->au_verfsize;
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x auth=%p rslack=%u ralign=%u verfsize=%u\n",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->auth, __entry->rslack, __entry->ralign,
+ __entry->verfsize)
+diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
+index de41954995926..28ea73bba4e78 100644
+--- a/include/trace/events/rpcrdma.h
++++ b/include/trace/events/rpcrdma.h
+@@ -14,7 +14,9 @@
+ #include <linux/sunrpc/rpc_rdma_cid.h>
+ #include <linux/tracepoint.h>
+ #include <rdma/ib_cm.h>
++
+ #include <trace/events/rdma.h>
++#include <trace/events/sunrpc_base.h>
+
+ /**
+ ** Event classes
+@@ -279,7 +281,8 @@ DECLARE_EVENT_CLASS(xprtrdma_rdch_event,
+ __entry->nsegs = nsegs;
+ ),
+
+- TP_printk("task:%u@%u pos=%u %u@0x%016llx:0x%08x (%s)",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " pos=%u %u@0x%016llx:0x%08x (%s)",
+ __entry->task_id, __entry->client_id,
+ __entry->pos, __entry->length,
+ (unsigned long long)__entry->offset, __entry->handle,
+@@ -326,7 +329,8 @@ DECLARE_EVENT_CLASS(xprtrdma_wrch_event,
+ __entry->nsegs = nsegs;
+ ),
+
+- TP_printk("task:%u@%u %u@0x%016llx:0x%08x (%s)",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " %u@0x%016llx:0x%08x (%s)",
+ __entry->task_id, __entry->client_id,
+ __entry->length, (unsigned long long)__entry->offset,
+ __entry->handle,
+@@ -387,7 +391,8 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class,
+ __entry->dir = mr->mr_dir;
+ ),
+
+- TP_printk("task:%u@%u mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)",
+ __entry->task_id, __entry->client_id,
+ __entry->mr_id, __entry->nents, __entry->length,
+ (unsigned long long)__entry->offset, __entry->handle,
+@@ -630,9 +635,9 @@ TRACE_EVENT(xprtrdma_nomrs_err,
+ __assign_str(port, rpcrdma_portstr(r_xprt));
+ ),
+
+- TP_printk("peer=[%s]:%s task:%u@%u",
+- __get_str(addr), __get_str(port),
+- __entry->task_id, __entry->client_id
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " peer=[%s]:%s",
++ __entry->task_id, __entry->client_id,
++ __get_str(addr), __get_str(port)
+ )
+ );
+
+@@ -693,7 +698,8 @@ TRACE_EVENT(xprtrdma_marshal,
+ __entry->wtype = wtype;
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x hdr=%u xdr=%u/%u/%u %s/%s",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->hdrlen,
+ __entry->headlen, __entry->pagelen, __entry->taillen,
+@@ -723,7 +729,7 @@ TRACE_EVENT(xprtrdma_marshal_failed,
+ __entry->ret = ret;
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x: ret=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->ret
+ )
+@@ -750,7 +756,7 @@ TRACE_EVENT(xprtrdma_prepsend_failed,
+ __entry->ret = ret;
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x: ret=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x ret=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->ret
+ )
+@@ -785,7 +791,7 @@ TRACE_EVENT(xprtrdma_post_send,
+ __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED;
+ ),
+
+- TP_printk("task:%u@%u cq.id=%u cid=%d (%d SGE%s) %s",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u cid=%d (%d SGE%s) %s",
+ __entry->task_id, __entry->client_id,
+ __entry->cq_id, __entry->completion_id,
+ __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"),
+@@ -820,7 +826,7 @@ TRACE_EVENT(xprtrdma_post_send_err,
+ __entry->rc = rc;
+ ),
+
+- TP_printk("task:%u@%u cq.id=%u rc=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " cq.id=%u rc=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->cq_id, __entry->rc
+ )
+@@ -932,7 +938,7 @@ TRACE_EVENT(xprtrdma_post_linv_err,
+ __entry->status = status;
+ ),
+
+- TP_printk("task:%u@%u status=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d",
+ __entry->task_id, __entry->client_id, __entry->status
+ )
+ );
+@@ -1120,7 +1126,7 @@ TRACE_EVENT(xprtrdma_reply,
+ __entry->credits = credits;
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x credits=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x credits=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->credits
+ )
+@@ -1156,7 +1162,7 @@ TRACE_EVENT(xprtrdma_err_vers,
+ __entry->max = be32_to_cpup(max);
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x versions=[%u, %u]",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x versions=[%u, %u]",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->min, __entry->max
+ )
+@@ -1181,7 +1187,7 @@ TRACE_EVENT(xprtrdma_err_chunk,
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x",
+ __entry->task_id, __entry->client_id, __entry->xid
+ )
+ );
+@@ -1207,7 +1213,7 @@ TRACE_EVENT(xprtrdma_err_unrecognized,
+ __entry->procedure = be32_to_cpup(procedure);
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x procedure=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x procedure=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->procedure
+ )
+@@ -1239,7 +1245,7 @@ TRACE_EVENT(xprtrdma_fixup,
+ __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len;
+ ),
+
+- TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " fixup=%lu xdr=%zu/%u/%zu",
+ __entry->task_id, __entry->client_id, __entry->fixup,
+ __entry->headlen, __entry->pagelen, __entry->taillen
+ )
+@@ -1289,7 +1295,7 @@ TRACE_EVENT(xprtrdma_mrs_zap,
+ __entry->client_id = task->tk_client->cl_clid;
+ ),
+
+- TP_printk("task:%u@%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER,
+ __entry->task_id, __entry->client_id
+ )
+ );
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index f09bbb6c918e2..68ae89c9a1c20 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -14,6 +14,8 @@
+ #include <linux/net.h>
+ #include <linux/tracepoint.h>
+
++#include <trace/events/sunrpc_base.h>
++
+ TRACE_DEFINE_ENUM(SOCK_STREAM);
+ TRACE_DEFINE_ENUM(SOCK_DGRAM);
+ TRACE_DEFINE_ENUM(SOCK_RAW);
+@@ -78,7 +80,8 @@ DECLARE_EVENT_CLASS(rpc_xdr_buf_class,
+ __entry->msg_len = xdr->len;
+ ),
+
+- TP_printk("task:%u@%u head=[%p,%zu] page=%u tail=[%p,%zu] len=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " head=[%p,%zu] page=%u tail=[%p,%zu] len=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->head_base, __entry->head_len, __entry->page_len,
+ __entry->tail_base, __entry->tail_len, __entry->msg_len
+@@ -114,7 +117,7 @@ DECLARE_EVENT_CLASS(rpc_clnt_class,
+ __entry->client_id = clnt->cl_clid;
+ ),
+
+- TP_printk("clid=%u", __entry->client_id)
++ TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER, __entry->client_id)
+ );
+
+ #define DEFINE_RPC_CLNT_EVENT(name) \
+@@ -158,7 +161,8 @@ TRACE_EVENT(rpc_clnt_new,
+ __assign_str(server, server);
+ ),
+
+- TP_printk("client=%u peer=[%s]:%s program=%s server=%s",
++ TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER
++ " peer=[%s]:%s program=%s server=%s",
+ __entry->client_id, __get_str(addr), __get_str(port),
+ __get_str(program), __get_str(server))
+ );
+@@ -206,7 +210,8 @@ TRACE_EVENT(rpc_clnt_clone_err,
+ __entry->error = error;
+ ),
+
+- TP_printk("client=%u error=%d", __entry->client_id, __entry->error)
++ TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER " error=%d",
++ __entry->client_id, __entry->error)
+ );
+
+
+@@ -248,7 +253,7 @@ DECLARE_EVENT_CLASS(rpc_task_status,
+ __entry->status = task->tk_status;
+ ),
+
+- TP_printk("task:%u@%u status=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->status)
+ );
+@@ -288,7 +293,7 @@ TRACE_EVENT(rpc_request,
+ __assign_str(procname, rpc_proc_name(task));
+ ),
+
+- TP_printk("task:%u@%u %sv%d %s (%ssync)",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " %sv%d %s (%ssync)",
+ __entry->task_id, __entry->client_id,
+ __get_str(progname), __entry->version,
+ __get_str(procname), __entry->async ? "a": ""
+@@ -348,7 +353,8 @@ DECLARE_EVENT_CLASS(rpc_task_running,
+ __entry->flags = task->tk_flags;
+ ),
+
+- TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%ps",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " flags=%s runstate=%s status=%d action=%ps",
+ __entry->task_id, __entry->client_id,
+ rpc_show_task_flags(__entry->flags),
+ rpc_show_runstate(__entry->runstate),
+@@ -400,7 +406,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
+ __assign_str(q_name, rpc_qname(q));
+ ),
+
+- TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " flags=%s runstate=%s status=%d timeout=%lu queue=%s",
+ __entry->task_id, __entry->client_id,
+ rpc_show_task_flags(__entry->flags),
+ rpc_show_runstate(__entry->runstate),
+@@ -436,7 +443,7 @@ DECLARE_EVENT_CLASS(rpc_failure,
+ __entry->client_id = task->tk_client->cl_clid;
+ ),
+
+- TP_printk("task:%u@%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER,
+ __entry->task_id, __entry->client_id)
+ );
+
+@@ -478,7 +485,8 @@ DECLARE_EVENT_CLASS(rpc_reply_event,
+ __assign_str(servername, task->tk_xprt->servername);
+ ),
+
+- TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " server=%s xid=0x%08x %sv%d %s",
+ __entry->task_id, __entry->client_id, __get_str(servername),
+ __entry->xid, __get_str(progname), __entry->version,
+ __get_str(procname))
+@@ -538,7 +546,8 @@ TRACE_EVENT(rpc_buf_alloc,
+ __entry->status = status;
+ ),
+
+- TP_printk("task:%u@%u callsize=%zu recvsize=%zu status=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " callsize=%zu recvsize=%zu status=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->callsize, __entry->recvsize, __entry->status
+ )
+@@ -567,7 +576,8 @@ TRACE_EVENT(rpc_call_rpcerror,
+ __entry->rpc_status = rpc_status;
+ ),
+
+- TP_printk("task:%u@%u tk_status=%d rpc_status=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " tk_status=%d rpc_status=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->tk_status, __entry->rpc_status)
+ );
+@@ -607,7 +617,8 @@ TRACE_EVENT(rpc_stats_latency,
+ __entry->execute = ktime_to_us(execute);
+ ),
+
+- TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __get_str(progname), __entry->version, __get_str(procname),
+ __entry->backlog, __entry->rtt, __entry->execute)
+@@ -651,8 +662,8 @@ TRACE_EVENT(rpc_xdr_overflow,
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name);
+ } else {
+- __entry->task_id = 0;
+- __entry->client_id = 0;
++ __entry->task_id = -1;
++ __entry->client_id = -1;
+ __assign_str(progname, "unknown");
+ __entry->version = 0;
+ __assign_str(procedure, "unknown");
+@@ -668,8 +679,8 @@ TRACE_EVENT(rpc_xdr_overflow,
+ __entry->len = xdr->buf->len;
+ ),
+
+- TP_printk(
+- "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ __entry->task_id, __entry->client_id,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->requested, __entry->p, __entry->end,
+@@ -727,8 +738,8 @@ TRACE_EVENT(rpc_xdr_alignment,
+ __entry->len = xdr->buf->len;
+ ),
+
+- TP_printk(
+- "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ __entry->task_id, __entry->client_id,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->offset, __entry->copied,
+@@ -917,7 +928,8 @@ TRACE_EVENT(rpc_socket_nospace,
+ __entry->remaining = rqst->rq_slen - transport->xmit.offset;
+ ),
+
+- TP_printk("task:%u@%u total=%u remaining=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " total=%u remaining=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->total, __entry->remaining
+ )
+@@ -1042,8 +1054,8 @@ TRACE_EVENT(xprt_transmit,
+ __entry->status = status;
+ ),
+
+- TP_printk(
+- "task:%u@%u xid=0x%08x seqno=%u status=%d",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x seqno=%u status=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->seqno, __entry->status)
+ );
+@@ -1082,8 +1094,8 @@ TRACE_EVENT(xprt_retransmit,
+ __assign_str(procname, rpc_proc_name(task));
+ ),
+
+- TP_printk(
+- "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d timeout=%lu",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " xid=0x%08x %sv%d %s ntrans=%d timeout=%lu",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __get_str(progname), __entry->version, __get_str(procname),
+ __entry->ntrans, __entry->timeout
+@@ -1140,7 +1152,8 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
+ __entry->snd_task_id = -1;
+ ),
+
+- TP_printk("task:%u@%u snd_task:%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " snd_task:" SUNRPC_TRACE_PID_SPECIFIER,
+ __entry->task_id, __entry->client_id,
+ __entry->snd_task_id)
+ );
+@@ -1192,7 +1205,9 @@ DECLARE_EVENT_CLASS(xprt_cong_event,
+ __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
+ ),
+
+- TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " snd_task:" SUNRPC_TRACE_PID_SPECIFIER
++ " cong=%lu cwnd=%lu%s",
+ __entry->task_id, __entry->client_id,
+ __entry->snd_task_id, __entry->cong, __entry->cwnd,
+ __entry->wait ? " (wait)" : "")
+@@ -1230,7 +1245,7 @@ TRACE_EVENT(xprt_reserve,
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ ),
+
+- TP_printk("task:%u@%u xid=0x%08x",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " xid=0x%08x",
+ __entry->task_id, __entry->client_id, __entry->xid
+ )
+ );
+@@ -1319,7 +1334,8 @@ TRACE_EVENT(rpcb_getport,
+ __assign_str(servername, task->tk_xprt->servername);
+ ),
+
+- TP_printk("task:%u@%u server=%s program=%u version=%u protocol=%d bind_version=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
++ " server=%s program=%u version=%u protocol=%d bind_version=%u",
+ __entry->task_id, __entry->client_id, __get_str(servername),
+ __entry->program, __entry->version, __entry->protocol,
+ __entry->bind_version
+@@ -1349,7 +1365,7 @@ TRACE_EVENT(rpcb_setport,
+ __entry->port = port;
+ ),
+
+- TP_printk("task:%u@%u status=%d port=%u",
++ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER " status=%d port=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->status, __entry->port
+ )
+diff --git a/include/trace/events/sunrpc_base.h b/include/trace/events/sunrpc_base.h
+new file mode 100644
+index 0000000000000..588557d07ea82
+--- /dev/null
++++ b/include/trace/events/sunrpc_base.h
+@@ -0,0 +1,18 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Copyright (c) 2021 Oracle and/or its affiliates.
++ *
++ * Common types and format specifiers for sunrpc.
++ */
++
++#if !defined(_TRACE_SUNRPC_BASE_H)
++#define _TRACE_SUNRPC_BASE_H
++
++#include <linux/tracepoint.h>
++
++#define SUNRPC_TRACE_PID_SPECIFIER "%08x"
++#define SUNRPC_TRACE_CLID_SPECIFIER "%08x"
++#define SUNRPC_TRACE_TASK_SPECIFIER \
++ "task:" SUNRPC_TRACE_PID_SPECIFIER "@" SUNRPC_TRACE_CLID_SPECIFIER
++
++#endif /* _TRACE_SUNRPC_BASE_H */
+--
+2.43.0
+
--- /dev/null
+From 989cd25c8fc4466d88a2d04f625e4a652fc7b89a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 10:06:21 -0400
+Subject: SUNRPC: Use RMW bitops in single-threaded hot paths
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 28df0988815f63e2af5e6718193c9f68681ad7ff ]
+
+I noticed CPU pipeline stalls while using perf.
+
+Once an svc thread is scheduled and executing an RPC, no other
+processes will touch svc_rqst::rq_flags. Thus bus-locked atomics are
+not needed outside the svc thread scheduler.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfs4proc.c | 7 ++++---
+ fs/nfsd/nfs4xdr.c | 2 +-
+ net/sunrpc/auth_gss/svcauth_gss.c | 4 ++--
+ net/sunrpc/svc.c | 6 +++---
+ net/sunrpc/svc_xprt.c | 2 +-
+ net/sunrpc/svcsock.c | 8 ++++----
+ net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +-
+ 7 files changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
+index 3ac2978c596ae..5b56877c7fb57 100644
+--- a/fs/nfsd/nfs4proc.c
++++ b/fs/nfsd/nfs4proc.c
+@@ -970,7 +970,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ * the client wants us to do more in this compound:
+ */
+ if (!nfsd4_last_compound_op(rqstp))
+- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
+ /* check stateid */
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+@@ -2644,11 +2644,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ cstate->minorversion = args->minorversion;
+ fh_init(current_fh, NFS4_FHSIZE);
+ fh_init(save_fh, NFS4_FHSIZE);
++
+ /*
+ * Don't use the deferral mechanism for NFSv4; compounds make it
+ * too hard to avoid non-idempotency problems.
+ */
+- clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
++ __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+
+ /*
+ * According to RFC3010, this takes precedence over all other errors.
+@@ -2770,7 +2771,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
+ out:
+ cstate->status = status;
+ /* Reset deferral mechanism for RPC deferrals */
+- set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
++ __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ return rpc_success;
+ }
+
+diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
+index 771d3057577ef..96d41b1cc2d17 100644
+--- a/fs/nfsd/nfs4xdr.c
++++ b/fs/nfsd/nfs4xdr.c
+@@ -2408,7 +2408,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
+
+ if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
+- clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
++ __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+
+ return true;
+ }
+diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
+index 48b608cb5f5ec..2381c5d1b0710 100644
+--- a/net/sunrpc/auth_gss/svcauth_gss.c
++++ b/net/sunrpc/auth_gss/svcauth_gss.c
+@@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
+ * rejecting the server-computed MIC in this somewhat rare case,
+ * do not use splice with the GSS integrity service.
+ */
+- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
+ /* Did we already verify the signature on the original pass through? */
+ if (rqstp->rq_deferred)
+@@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
+ int pad, remaining_len, offset;
+ u32 rseqno;
+
+- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+
+ priv_len = svc_getnl(&buf->head[0]);
+ if (rqstp->rq_deferred) {
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 87da3ff46ce9a..f2a8c1ee8530e 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -1281,10 +1281,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
+ goto err_short_len;
+
+ /* Will be turned off by GSS integrity and privacy services */
+- set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
++ __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ /* Will be turned off only when NFSv4 Sessions are used */
+- set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+- clear_bit(RQ_DROPME, &rqstp->rq_flags);
++ __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
++ __clear_bit(RQ_DROPME, &rqstp->rq_flags);
+
+ svc_putu32(resv, rqstp->rq_xid);
+
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 67ccf1a6459ae..39acef5134f5c 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -1251,7 +1251,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
+ trace_svc_defer(rqstp);
+ svc_xprt_get(rqstp->rq_xprt);
+ dr->xprt = rqstp->rq_xprt;
+- set_bit(RQ_DROPME, &rqstp->rq_flags);
++ __set_bit(RQ_DROPME, &rqstp->rq_flags);
+
+ dr->handle.revisit = svc_revisit;
+ return &dr->handle;
+diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
+index be7081284a098..46cea0e413aeb 100644
+--- a/net/sunrpc/svcsock.c
++++ b/net/sunrpc/svcsock.c
+@@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
+ static void svc_sock_secure_port(struct svc_rqst *rqstp)
+ {
+ if (svc_port_is_privileged(svc_addr(rqstp)))
+- set_bit(RQ_SECURE, &rqstp->rq_flags);
++ __set_bit(RQ_SECURE, &rqstp->rq_flags);
+ else
+- clear_bit(RQ_SECURE, &rqstp->rq_flags);
++ __clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ }
+
+ /*
+@@ -1005,9 +1005,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
+ rqstp->rq_xprt_ctxt = NULL;
+ rqstp->rq_prot = IPPROTO_TCP;
+ if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
+- set_bit(RQ_LOCAL, &rqstp->rq_flags);
++ __set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ else
+- clear_bit(RQ_LOCAL, &rqstp->rq_flags);
++ __clear_bit(RQ_LOCAL, &rqstp->rq_flags);
+
+ p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
+ calldir = p[1];
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+index f776f0cb471f0..ac147304fb0e9 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
+
+ static void svc_rdma_secure_port(struct svc_rqst *rqstp)
+ {
+- set_bit(RQ_SECURE, &rqstp->rq_flags);
++ __set_bit(RQ_SECURE, &rqstp->rq_flags);
+ }
+
+ static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
+--
+2.43.0
+
--- /dev/null
+From 1a9c9b021ee2e4b4022e059a1b9f6353f64987ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Nov 2021 15:51:25 +1100
+Subject: SUNRPC: use sv_lock to protect updates to sv_nrthreads.
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 2a36395fac3b72771f87c3ee4387e3a96d85a7cc ]
+
+Using sv_lock means we don't need to hold the service mutex over these
+updates.
+
+In particular, svc_exit_thread() no longer requires synchronisation, so
+threads can exit asynchronously.
+
+Note that we could use an atomic_t, but as there are many more read
+sites than writes, that would add unnecessary noise to the code.
+Some reads are already racy, and there is no need for them to not be.
+
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ fs/nfsd/nfssvc.c | 5 ++---
+ net/sunrpc/svc.c | 9 +++++++--
+ 2 files changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 32f2c46a38323..16884a90e1ab0 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -55,9 +55,8 @@ static __be32 nfsd_init_request(struct svc_rqst *,
+ struct svc_process_info *);
+
+ /*
+- * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
+- * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+- * extent ->sv_temp_socks and ->sv_permsocks.
++ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members
++ * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks.
+ *
+ * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index 6cde8c87e5733..c8a0649e5cdf1 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -523,7 +523,7 @@ EXPORT_SYMBOL_GPL(svc_shutdown_net);
+
+ /*
+ * Destroy an RPC service. Should be called with appropriate locking to
+- * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
++ * protect sv_permsocks and sv_tempsocks.
+ */
+ void
+ svc_destroy(struct kref *ref)
+@@ -639,7 +639,10 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
+ return ERR_PTR(-ENOMEM);
+
+ svc_get(serv);
+- serv->sv_nrthreads++;
++ spin_lock_bh(&serv->sv_lock);
++ serv->sv_nrthreads += 1;
++ spin_unlock_bh(&serv->sv_lock);
++
+ spin_lock_bh(&pool->sp_lock);
+ pool->sp_nrthreads++;
+ list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
+@@ -884,7 +887,9 @@ svc_exit_thread(struct svc_rqst *rqstp)
+ list_del_rcu(&rqstp->rq_all);
+ spin_unlock_bh(&pool->sp_lock);
+
++ spin_lock_bh(&serv->sv_lock);
+ serv->sv_nrthreads -= 1;
++ spin_unlock_bh(&serv->sv_lock);
+ svc_sock_update_bufs(serv);
+
+ svc_rqst_free(rqstp);
+--
+2.43.0
+
--- /dev/null
+From e61b9f4ce802a3ad295d618d501105563b8536e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 08:57:43 -0500
+Subject: trace: Relocate event helper files
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 247c01ff5f8d66e62a404c91733be52fecb8b7f6 ]
+
+Steven Rostedt says:
+> The include/trace/events/ directory should only hold files that
+> are to create events, not headers that hold helper functions.
+>
+> Can you please move them out of include/trace/events/ as that
+> directory is "special" in the creation of events.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Acked-by: Leon Romanovsky <leonro@nvidia.com>
+Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Acked-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Stable-dep-of: 638593be55c0 ("NFSD: add CB_RECALL_ANY tracepoints")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+---
+ MAINTAINERS | 7 +++++++
+ drivers/infiniband/core/cm_trace.h | 2 +-
+ drivers/infiniband/core/cma_trace.h | 2 +-
+ fs/nfs/nfs4trace.h | 6 +++---
+ fs/nfs/nfstrace.h | 6 +++---
+ include/trace/events/rpcgss.h | 2 +-
+ include/trace/events/rpcrdma.h | 4 ++--
+ include/trace/events/sunrpc.h | 2 +-
+ include/trace/{events => misc}/fs.h | 0
+ include/trace/{events => misc}/nfs.h | 0
+ include/trace/{events => misc}/rdma.h | 0
+ include/trace/{events/sunrpc_base.h => misc/sunrpc.h} | 0
+ 12 files changed, 19 insertions(+), 12 deletions(-)
+ rename include/trace/{events => misc}/fs.h (100%)
+ rename include/trace/{events => misc}/nfs.h (100%)
+ rename include/trace/{events => misc}/rdma.h (100%)
+ rename include/trace/{events/sunrpc_base.h => misc/sunrpc.h} (100%)
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 9216b9c85ce92..6bfc75861c8c0 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9200,6 +9200,7 @@ F: drivers/infiniband/
+ F: include/rdma/
+ F: include/trace/events/ib_mad.h
+ F: include/trace/events/ib_umad.h
++F: include/trace/misc/rdma.h
+ F: include/uapi/linux/if_infiniband.h
+ F: include/uapi/rdma/
+ F: samples/bpf/ibumad_kern.c
+@@ -10181,6 +10182,12 @@ F: fs/nfs_common/
+ F: fs/nfsd/
+ F: include/linux/lockd/
+ F: include/linux/sunrpc/
++F: include/trace/events/rpcgss.h
++F: include/trace/events/rpcrdma.h
++F: include/trace/events/sunrpc.h
++F: include/trace/misc/fs.h
++F: include/trace/misc/nfs.h
++F: include/trace/misc/sunrpc.h
+ F: include/uapi/linux/nfsd/
+ F: include/uapi/linux/sunrpc/
+ F: net/sunrpc/
+diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
+index e9d282679ef15..944d9071245d2 100644
+--- a/drivers/infiniband/core/cm_trace.h
++++ b/drivers/infiniband/core/cm_trace.h
+@@ -16,7 +16,7 @@
+
+ #include <linux/tracepoint.h>
+ #include <rdma/ib_cm.h>
+-#include <trace/events/rdma.h>
++#include <trace/misc/rdma.h>
+
+ /*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
+index e45264267bcc9..47f3c6e4be893 100644
+--- a/drivers/infiniband/core/cma_trace.h
++++ b/drivers/infiniband/core/cma_trace.h
+@@ -15,7 +15,7 @@
+ #define _TRACE_RDMA_CMA_H
+
+ #include <linux/tracepoint.h>
+-#include <trace/events/rdma.h>
++#include <trace/misc/rdma.h>
+
+
+ DECLARE_EVENT_CLASS(cma_fsm_class,
+diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
+index 635f13a8d44aa..8565fa654f59a 100644
+--- a/fs/nfs/nfs4trace.h
++++ b/fs/nfs/nfs4trace.h
+@@ -9,10 +9,10 @@
+ #define _TRACE_NFS4_H
+
+ #include <linux/tracepoint.h>
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/sunrpc.h>
+
+-#include <trace/events/fs.h>
+-#include <trace/events/nfs.h>
++#include <trace/misc/fs.h>
++#include <trace/misc/nfs.h>
+
+ #define show_nfs_fattr_flags(valid) \
+ __print_flags((unsigned long)valid, "|", \
+diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
+index f6013d3b110b8..6804ca2efbf99 100644
+--- a/fs/nfs/nfstrace.h
++++ b/fs/nfs/nfstrace.h
+@@ -11,9 +11,9 @@
+ #include <linux/tracepoint.h>
+ #include <linux/iversion.h>
+
+-#include <trace/events/fs.h>
+-#include <trace/events/nfs.h>
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/fs.h>
++#include <trace/misc/nfs.h>
++#include <trace/misc/sunrpc.h>
+
+ #define nfs_show_cache_validity(v) \
+ __print_flags(v, "|", \
+diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
+index 3ba63319af3cd..b8fd13303ee7e 100644
+--- a/include/trace/events/rpcgss.h
++++ b/include/trace/events/rpcgss.h
+@@ -13,7 +13,7 @@
+
+ #include <linux/tracepoint.h>
+
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/sunrpc.h>
+
+ /**
+ ** GSS-API related trace events
+diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
+index 28ea73bba4e78..513c09774e4f3 100644
+--- a/include/trace/events/rpcrdma.h
++++ b/include/trace/events/rpcrdma.h
+@@ -15,8 +15,8 @@
+ #include <linux/tracepoint.h>
+ #include <rdma/ib_cm.h>
+
+-#include <trace/events/rdma.h>
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/rdma.h>
++#include <trace/misc/sunrpc.h>
+
+ /**
+ ** Event classes
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index 68ae89c9a1c20..e8eb83315f4f2 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -14,7 +14,7 @@
+ #include <linux/net.h>
+ #include <linux/tracepoint.h>
+
+-#include <trace/events/sunrpc_base.h>
++#include <trace/misc/sunrpc.h>
+
+ TRACE_DEFINE_ENUM(SOCK_STREAM);
+ TRACE_DEFINE_ENUM(SOCK_DGRAM);
+diff --git a/include/trace/events/fs.h b/include/trace/misc/fs.h
+similarity index 100%
+rename from include/trace/events/fs.h
+rename to include/trace/misc/fs.h
+diff --git a/include/trace/events/nfs.h b/include/trace/misc/nfs.h
+similarity index 100%
+rename from include/trace/events/nfs.h
+rename to include/trace/misc/nfs.h
+diff --git a/include/trace/events/rdma.h b/include/trace/misc/rdma.h
+similarity index 100%
+rename from include/trace/events/rdma.h
+rename to include/trace/misc/rdma.h
+diff --git a/include/trace/events/sunrpc_base.h b/include/trace/misc/sunrpc.h
+similarity index 100%
+rename from include/trace/events/sunrpc_base.h
+rename to include/trace/misc/sunrpc.h
+--
+2.43.0
+