Lingfeng identified a bug and suggested two solutions, but both appear
to have issues.
Generally, we cannot release flc_lock while iterating over the file lock
list to avoid use-after-free (UAF) problems with file locks. However,
functions like nfs_delegation_claim_locks and nfs4_reclaim_locks cannot
adhere to this rule because recover_lock or nfs4_lock_delegation_recall
may take a long time. To resolve this, NFS switches to using nfsi->rwsem
for the same protection, and nfs_reclaim_locks follows this approach.
Although nfs_delegation_claim_locks uses so_delegreturn_mutex instead,
this is inadequate since a single inode can have multiple nfs4_state
instances. Therefore, the fix is to also use nfsi->rwsem in this case.
Furthermore, after commit
c69899a17ca4 ("NFSv4: Update of VFS byte range
lock must be atomic with the stateid update"), the functions
nfs4_locku_done and nfs4_lock_done also break this rule because they
call locks_lock_inode_wait without holding nfsi->rwsem. Simply adding
this protection could cause many deadlocks, so instead, the call to
locks_lock_inode_wait is moved into _nfs4_proc_setlk. Regarding the bug
fixed by commit
c69899a17ca4 ("NFSv4: Update of VFS byte range
lock must be atomic with the stateid update"), it has been resolved
after commit
0460253913e5 ("NFSv4: nfs4_do_open() is incorrectly triggering
state recovery") because all slots are drained before calling
nfs4_do_reclaim, which prevents concurrent stateid changes along this path.
Also, nfs_delegation_claim_locks does not cause this concurrency either
since when _nfs4_proc_setlk is called with NFS_DELEGATED_STATE, no RPC is
sent, so nfs4_lock_done is not called. Therefore,
nfs4_lock_delegation_recall from nfs_delegation_claim_locks is the first
time the stateid is set.
Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
Closes: https://lore.kernel.org/all/20250419085709.1452492-1-lilingfeng3@huawei.com/
Closes: https://lore.kernel.org/all/20250715030559.2906634-1-lilingfeng3@huawei.com/
Fixes: c69899a17ca4 ("NFSv4: Update of VFS byte range lock must be atomic with the stateid update")
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@hammerspace.com>
static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
struct file_lock *fl;
struct file_lock_context *flctx = locks_inode_context(inode);
struct list_head *list;
goto out;
list = &flctx->flc_posix;
+
+ /* Guard against reclaim and new lock/unlock calls */
+ down_write(&nfsi->rwsem);
spin_lock(&flctx->flc_lock);
restart:
for_each_file_lock(fl, list) {
continue;
spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
- if (status < 0)
+ if (status < 0) {
+ up_write(&nfsi->rwsem);
goto out;
+ }
spin_lock(&flctx->flc_lock);
}
if (list == &flctx->flc_posix) {
goto restart;
}
spin_unlock(&flctx->flc_lock);
+ up_write(&nfsi->rwsem);
out:
return status;
}
switch (task->tk_status) {
case 0:
renew_lease(calldata->server, calldata->timestamp);
- locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl);
if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
break;
case 0:
renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
data->timestamp);
- if (data->arg.new_lock && !data->cancelled) {
- data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
- if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
- goto out_restart;
- }
if (data->arg.new_lock_owner != 0) {
nfs_confirm_seqid(&lsp->ls_seqid, 0);
nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
msg.rpc_argp = &data->arg;
msg.rpc_resp = &data->res;
task_setup_data.callback_data = data;
- if (recovery_type > NFS_LOCK_NEW) {
- if (recovery_type == NFS_LOCK_RECLAIM)
- data->arg.reclaim = NFS_LOCK_RECLAIM;
- } else
- data->arg.new_lock = 1;
+
+ if (recovery_type == NFS_LOCK_RECLAIM)
+ data->arg.reclaim = NFS_LOCK_RECLAIM;
+
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
+ if (status)
+ goto out;
+
+ down_read(&nfsi->rwsem);
+ request->c.flc_flags &= ~(FL_SLEEP | FL_ACCESS);
+ status = locks_lock_inode_wait(state->inode, request);
+ up_read(&nfsi->rwsem);
out:
request->c.flc_flags = flags;
return status;
struct nfs_lowner lock_owner;
unsigned char block : 1;
unsigned char reclaim : 1;
- unsigned char new_lock : 1;
unsigned char new_lock_owner : 1;
};