--- /dev/null
+From d432d1006b60bd6b5c38974727bdce78f449eeea Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 5 Feb 2024 13:22:39 +1100
+Subject: nfsd: don't take fi_lock in nfsd_break_deleg_cb()
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit 5ea9a7c5fe4149f165f0e3b624fe08df02b6c301 ]
+
+A recent change to check_for_locks() changed it to take ->flc_lock while
+holding ->fi_lock. This creates a lock inversion (reported by lockdep)
+because there is a case where ->fi_lock is taken while holding
+->flc_lock.
+
+->flc_lock is held across ->fl_lmops callbacks, and
+nfsd_break_deleg_cb() is one of those and does take ->fi_lock. However
+it doesn't need to.
+
+Prior to v4.17-rc1~110^2~22 ("nfsd: create a separate lease for each
+delegation") nfsd_break_deleg_cb() would walk the ->fi_delegations list
+and so needed the lock. Since then it doesn't walk the list and doesn't
+need the lock.
+
+Two actions are performed under the lock. One is to call
+nfsd_break_one_deleg which calls nfsd4_run_cb(). These doesn't act on
+the nfs4_file at all, so don't need the lock.
+
+The other is to set ->fi_had_conflict which is in the nfs4_file.
+This field is only ever set here (except when initialised to false)
+so there is no possible problem will multiple threads racing when
+setting it.
+
+The field is tested twice in nfs4_set_delegation(). The first test does
+not hold a lock and is documented as an opportunistic optimisation, so
+it doesn't impose any need to hold ->fi_lock while setting
+->fi_had_conflict.
+
+The second test in nfs4_set_delegation() *is* make under ->fi_lock, so
+removing the locking when ->fi_had_conflict is set could make a change.
+The change could only be interesting if ->fi_had_conflict tested as
+false even though nfsd_break_one_deleg() ran before ->fi_lock was
+unlocked. i.e. while hash_delegation_locked() was running.
+As hash_delegation_lock() doesn't interact in any way with nfs4_run_cb()
+there can be no importance to this interaction.
+
+So this patch removes the locking from nfsd_break_one_deleg() and moves
+the final test on ->fi_had_conflict out of the locked region to make it
+clear that locking isn't important to the test. It is still tested
+*after* vfs_setlease() has succeeded. This might be significant and as
+vfs_setlease() takes ->flc_lock, and nfsd_break_one_deleg() is called
+under ->flc_lock this "after" is a true ordering provided by a spinlock.
+
+Fixes: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -4908,10 +4908,8 @@ nfsd_break_deleg_cb(struct file_lock *fl
+ */
+ fl->fl_break_time = 0;
+
+- spin_lock(&fp->fi_lock);
+ fp->fi_had_conflict = true;
+ nfsd_break_one_deleg(dp);
+- spin_unlock(&fp->fi_lock);
+ return false;
+ }
+
+@@ -5499,12 +5497,13 @@ nfs4_set_delegation(struct nfsd4_open *o
+ if (status)
+ goto out_unlock;
+
++ status = -EAGAIN;
++ if (fp->fi_had_conflict)
++ goto out_unlock;
++
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+- if (fp->fi_had_conflict)
+- status = -EAGAIN;
+- else
+- status = hash_delegation_locked(dp, fp);
++ status = hash_delegation_locked(dp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
--- /dev/null
+From 86bf255a373c1f0166f340b613f0632f90786748 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 22 Jan 2024 14:58:16 +1100
+Subject: nfsd: fix RELEASE_LOCKOWNER
+
+From: NeilBrown <neilb@suse.de>
+
+[ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ]
+
+The test on so_count in nfsd4_release_lockowner() is nonsense and
+harmful. Revert to using check_for_locks(), changing that to not sleep.
+
+First: harmful.
+As is documented in the kdoc comment for nfsd4_release_lockowner(), the
+test on so_count can transiently return a false positive resulting in a
+return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is
+clearly a protocol violation and with the Linux NFS client it can cause
+incorrect behaviour.
+
+If RELEASE_LOCKOWNER is sent while some other thread is still
+processing a LOCK request which failed because, at the time that request
+was received, the given owner held a conflicting lock, then the nfsd
+thread processing that LOCK request can hold a reference (conflock) to
+the lock owner that causes nfsd4_release_lockowner() to return an
+incorrect error.
+
+The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it
+never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so
+it knows that the error is impossible. It assumes the lock owner was in
+fact released so it feels free to use the same lock owner identifier in
+some later locking request.
+
+When it does reuse a lock owner identifier for which a previous RELEASE
+failed, it will naturally use a lock_seqid of zero. However the server,
+which didn't release the lock owner, will expect a larger lock_seqid and
+so will respond with NFS4ERR_BAD_SEQID.
+
+So clearly it is harmful to allow a false positive, which testing
+so_count allows.
+
+The test is nonsense because ... well... it doesn't mean anything.
+
+so_count is the sum of three different counts.
+1/ the set of states listed on so_stateids
+2/ the set of active vfs locks owned by any of those states
+3/ various transient counts such as for conflicting locks.
+
+When it is tested against '2' it is clear that one of these is the
+transient reference obtained by find_lockowner_str_locked(). It is not
+clear what the other one is expected to be.
+
+In practice, the count is often 2 because there is precisely one state
+on so_stateids. If there were more, this would fail.
+
+In my testing I see two circumstances when RELEASE_LOCKOWNER is called.
+In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in
+all the lock states being removed, and so the lockowner being discarded
+(it is removed when there are no more references which usually happens
+when the lock state is discarded). When nfsd4_release_lockowner() finds
+that the lock owner doesn't exist, it returns success.
+
+The other case shows an so_count of '2' and precisely one state listed
+in so_stateid. It appears that the Linux client uses a separate lock
+owner for each file resulting in one lock state per lock owner, so this
+test on '2' is safe. For another client it might not be safe.
+
+So this patch changes check_for_locks() to use the (newish)
+find_any_file_locked() so that it doesn't take a reference on the
+nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With
+this check is it safe to restore the use of check_for_locks() rather
+than testing so_count against the mysterious '2'.
+
+Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()")
+Signed-off-by: NeilBrown <neilb@suse.de>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Cc: stable@vger.kernel.org # v6.2+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nfsd/nfs4state.c | 26 +++++++++++++++-----------
+ 1 file changed, 15 insertions(+), 11 deletions(-)
+
+--- a/fs/nfsd/nfs4state.c
++++ b/fs/nfsd/nfs4state.c
+@@ -7736,14 +7736,16 @@ check_for_locks(struct nfs4_file *fp, st
+ {
+ struct file_lock *fl;
+ int status = false;
+- struct nfsd_file *nf = find_any_file(fp);
++ struct nfsd_file *nf;
+ struct inode *inode;
+ struct file_lock_context *flctx;
+
++ spin_lock(&fp->fi_lock);
++ nf = find_any_file_locked(fp);
+ if (!nf) {
+ /* Any valid lock stateid should have some sort of access */
+ WARN_ON_ONCE(1);
+- return status;
++ goto out;
+ }
+
+ inode = locks_inode(nf->nf_file);
+@@ -7759,7 +7761,8 @@ check_for_locks(struct nfs4_file *fp, st
+ }
+ spin_unlock(&flctx->flc_lock);
+ }
+- nfsd_file_put(nf);
++out:
++ spin_unlock(&fp->fi_lock);
+ return status;
+ }
+
+@@ -7769,10 +7772,8 @@ check_for_locks(struct nfs4_file *fp, st
+ * @cstate: NFSv4 COMPOUND state
+ * @u: RELEASE_LOCKOWNER arguments
+ *
+- * The lockowner's so_count is bumped when a lock record is added
+- * or when copying a conflicting lock. The latter case is brief,
+- * but can lead to fleeting false positives when looking for
+- * locks-in-use.
++ * Check if theree are any locks still held and if not - free the lockowner
++ * and any lock state that is owned.
+ *
+ * Return values:
+ * %nfs_ok: lockowner released or not found
+@@ -7808,10 +7809,13 @@ nfsd4_release_lockowner(struct svc_rqst
+ spin_unlock(&clp->cl_lock);
+ return nfs_ok;
+ }
+- if (atomic_read(&lo->lo_owner.so_count) != 2) {
+- spin_unlock(&clp->cl_lock);
+- nfs4_put_stateowner(&lo->lo_owner);
+- return nfserr_locks_held;
++
++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
++ if (check_for_locks(stp->st_stid.sc_file, lo)) {
++ spin_unlock(&clp->cl_lock);
++ nfs4_put_stateowner(&lo->lo_owner);
++ return nfserr_locks_held;
++ }
+ }
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {