]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
NFSv4/flexfiles: Fix handling of NFS level errors in I/O
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Thu, 19 Jun 2025 19:16:11 +0000 (15:16 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 17 Jul 2025 16:25:04 +0000 (18:25 +0200)
[ Upstream commit 38074de35b015df5623f524d6f2b49a0cd395c40 ]

Allow the flexfiles error handling to recognise NFS level errors (as
opposed to RPC level errors) and handle them separately. The main
motivator is the NFSERR_PERM errors that get returned if the NFS client
connects to the data server through a port number that is lower than
1024. In that case, the client should disconnect and retry a READ on a
different data server, or it should retry a WRITE after reconnecting.

Reviewed-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/nfs/flexfilelayout/flexfilelayout.c

index 87e8b869d96962141abf5f3c3a89eed804c654ba..31ae042f5a75814bceb61155258c5fe19ff7df39 100644 (file)
@@ -1129,6 +1129,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
 }
 
 static int ff_layout_async_handle_error_v4(struct rpc_task *task,
+                                          u32 op_status,
                                           struct nfs4_state *state,
                                           struct nfs_client *clp,
                                           struct pnfs_layout_segment *lseg,
@@ -1139,34 +1140,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
        struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
        struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
 
-       switch (task->tk_status) {
-       case -NFS4ERR_BADSESSION:
-       case -NFS4ERR_BADSLOT:
-       case -NFS4ERR_BAD_HIGH_SLOT:
-       case -NFS4ERR_DEADSESSION:
-       case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
-       case -NFS4ERR_SEQ_FALSE_RETRY:
-       case -NFS4ERR_SEQ_MISORDERED:
+       switch (op_status) {
+       case NFS4_OK:
+       case NFS4ERR_NXIO:
+               break;
+       case NFSERR_PERM:
+               if (!task->tk_xprt)
+                       break;
+               xprt_force_disconnect(task->tk_xprt);
+               goto out_retry;
+       case NFS4ERR_BADSESSION:
+       case NFS4ERR_BADSLOT:
+       case NFS4ERR_BAD_HIGH_SLOT:
+       case NFS4ERR_DEADSESSION:
+       case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+       case NFS4ERR_SEQ_FALSE_RETRY:
+       case NFS4ERR_SEQ_MISORDERED:
                dprintk("%s ERROR %d, Reset session. Exchangeid "
                        "flags 0x%x\n", __func__, task->tk_status,
                        clp->cl_exchange_flags);
                nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
-               break;
-       case -NFS4ERR_DELAY:
+               goto out_retry;
+       case NFS4ERR_DELAY:
                nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
                fallthrough;
-       case -NFS4ERR_GRACE:
+       case NFS4ERR_GRACE:
                rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
-               break;
-       case -NFS4ERR_RETRY_UNCACHED_REP:
-               break;
+               goto out_retry;
+       case NFS4ERR_RETRY_UNCACHED_REP:
+               goto out_retry;
        /* Invalidate Layout errors */
-       case -NFS4ERR_PNFS_NO_LAYOUT:
-       case -ESTALE:           /* mapped NFS4ERR_STALE */
-       case -EBADHANDLE:       /* mapped NFS4ERR_BADHANDLE */
-       case -EISDIR:           /* mapped NFS4ERR_ISDIR */
-       case -NFS4ERR_FHEXPIRED:
-       case -NFS4ERR_WRONG_TYPE:
+       case NFS4ERR_PNFS_NO_LAYOUT:
+       case NFS4ERR_STALE:
+       case NFS4ERR_BADHANDLE:
+       case NFS4ERR_ISDIR:
+       case NFS4ERR_FHEXPIRED:
+       case NFS4ERR_WRONG_TYPE:
                dprintk("%s Invalid layout error %d\n", __func__,
                        task->tk_status);
                /*
@@ -1179,6 +1188,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
                pnfs_destroy_layout(NFS_I(inode));
                rpc_wake_up(&tbl->slot_tbl_waitq);
                goto reset;
+       default:
+               break;
+       }
+
+       switch (task->tk_status) {
        /* RPC connection errors */
        case -ECONNREFUSED:
        case -EHOSTDOWN:
@@ -1192,26 +1206,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
                nfs4_delete_deviceid(devid->ld, devid->nfs_client,
                                &devid->deviceid);
                rpc_wake_up(&tbl->slot_tbl_waitq);
-               /* fall through */
+               break;
        default:
-               if (ff_layout_avoid_mds_available_ds(lseg))
-                       return -NFS4ERR_RESET_TO_PNFS;
-reset:
-               dprintk("%s Retry through MDS. Error %d\n", __func__,
-                       task->tk_status);
-               return -NFS4ERR_RESET_TO_MDS;
+               break;
        }
+
+       if (ff_layout_avoid_mds_available_ds(lseg))
+               return -NFS4ERR_RESET_TO_PNFS;
+reset:
+       dprintk("%s Retry through MDS. Error %d\n", __func__,
+               task->tk_status);
+       return -NFS4ERR_RESET_TO_MDS;
+
+out_retry:
        task->tk_status = 0;
        return -EAGAIN;
 }
 
 /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
 static int ff_layout_async_handle_error_v3(struct rpc_task *task,
+                                          u32 op_status,
+                                          struct nfs_client *clp,
                                           struct pnfs_layout_segment *lseg,
                                           int idx)
 {
        struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
 
+       switch (op_status) {
+       case NFS_OK:
+       case NFSERR_NXIO:
+               break;
+       case NFSERR_PERM:
+               if (!task->tk_xprt)
+                       break;
+               xprt_force_disconnect(task->tk_xprt);
+               goto out_retry;
+       case NFSERR_ACCES:
+       case NFSERR_BADHANDLE:
+       case NFSERR_FBIG:
+       case NFSERR_IO:
+       case NFSERR_NOSPC:
+       case NFSERR_ROFS:
+       case NFSERR_STALE:
+               goto out_reset_to_pnfs;
+       case NFSERR_JUKEBOX:
+               nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+               goto out_retry;
+       default:
+               break;
+       }
+
        switch (task->tk_status) {
        /* File access problems. Don't mark the device as unavailable */
        case -EACCES:
@@ -1230,6 +1274,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
                nfs4_delete_deviceid(devid->ld, devid->nfs_client,
                                &devid->deviceid);
        }
+out_reset_to_pnfs:
        /* FIXME: Need to prevent infinite looping here. */
        return -NFS4ERR_RESET_TO_PNFS;
 out_retry:
@@ -1240,6 +1285,7 @@ out_retry:
 }
 
 static int ff_layout_async_handle_error(struct rpc_task *task,
+                                       u32 op_status,
                                        struct nfs4_state *state,
                                        struct nfs_client *clp,
                                        struct pnfs_layout_segment *lseg,
@@ -1258,10 +1304,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
 
        switch (vers) {
        case 3:
-               return ff_layout_async_handle_error_v3(task, lseg, idx);
-       case 4:
-               return ff_layout_async_handle_error_v4(task, state, clp,
+               return ff_layout_async_handle_error_v3(task, op_status, clp,
                                                       lseg, idx);
+       case 4:
+               return ff_layout_async_handle_error_v4(task, op_status, state,
+                                                      clp, lseg, idx);
        default:
                /* should never happen */
                WARN_ON_ONCE(1);
@@ -1306,7 +1353,17 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
        switch (status) {
        case NFS4ERR_DELAY:
        case NFS4ERR_GRACE:
-               return;
+       case NFS4ERR_PERM:
+               break;
+       case NFS4ERR_NXIO:
+               ff_layout_mark_ds_unreachable(lseg, idx);
+               /*
+                * Don't return the layout if this is a read and we still
+                * have layouts to try
+                */
+               if (opnum == OP_READ)
+                       break;
+               fallthrough;
        default:
                break;
        }
@@ -1329,12 +1386,15 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
        int err;
 
        trace_nfs4_pnfs_read(hdr, task->tk_status);
-       if (task->tk_status < 0)
+       if (task->tk_status < 0) {
                ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
                                            hdr->args.offset, hdr->args.count,
                                            hdr->res.op_status, OP_READ,
                                            task->tk_status);
-       err = ff_layout_async_handle_error(task, hdr->args.context->state,
+       }
+
+       err = ff_layout_async_handle_error(task, hdr->res.op_status,
+                                          hdr->args.context->state,
                                           hdr->ds_clp, hdr->lseg,
                                           hdr->pgio_mirror_idx);
 
@@ -1502,12 +1562,15 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
        int err;
 
        trace_nfs4_pnfs_write(hdr, task->tk_status);
-       if (task->tk_status < 0)
+       if (task->tk_status < 0) {
                ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
                                            hdr->args.offset, hdr->args.count,
                                            hdr->res.op_status, OP_WRITE,
                                            task->tk_status);
-       err = ff_layout_async_handle_error(task, hdr->args.context->state,
+       }
+
+       err = ff_layout_async_handle_error(task, hdr->res.op_status,
+                                          hdr->args.context->state,
                                           hdr->ds_clp, hdr->lseg,
                                           hdr->pgio_mirror_idx);
 
@@ -1545,13 +1608,16 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
        int err;
 
        trace_nfs4_pnfs_commit_ds(data, task->tk_status);
-       if (task->tk_status < 0)
+       if (task->tk_status < 0) {
                ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
                                            data->args.offset, data->args.count,
                                            data->res.op_status, OP_COMMIT,
                                            task->tk_status);
-       err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
-                                          data->lseg, data->ds_commit_index);
+       }
+
+       err = ff_layout_async_handle_error(task, data->res.op_status,
+                                          NULL, data->ds_clp, data->lseg,
+                                          data->ds_commit_index);
 
        switch (err) {
        case -NFS4ERR_RESET_TO_PNFS: