]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
SUNRPC: improve 'swap' handling: scheduling and PF_MEMALLOC
authorNeilBrown <neilb@suse.de>
Sun, 6 Mar 2022 23:41:44 +0000 (10:41 +1100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 8 Apr 2022 12:06:28 +0000 (14:06 +0200)
[ Upstream commit 8db55a032ac7ac1ed7b98d6b1dc980e6378c652f ]

rpc tasks can be marked as RPC_TASK_SWAPPER.  This causes GFP_MEMALLOC
to be used for some allocations.  This is needed in some cases, but not
in all where it is currently provided, and in some where it isn't
provided.

Currently *all* tasks associated with a rpc_client on which swap is
enabled get the flag and hence some GFP_MEMALLOC support.

GFP_MEMALLOC is provided for ->buf_alloc() but only swap-writes need it.
However xdr_alloc_bvec does not get GFP_MEMALLOC - though it often does
need it.

xdr_alloc_bvec is called while the XPRT_LOCK is held.  If this blocks,
then it blocks all other queued tasks.  So this allocation needs
GFP_MEMALLOC for *all* requests, not just writes, when the xprt is used
for any swap writes.

Similarly, if the transport is not connected, that will block all
requests including swap writes, so memory allocations should get
GFP_MEMALLOC if swap writes are possible.

So with this patch:
 1/ we ONLY set RPC_TASK_SWAPPER for swap writes.
 2/ __rpc_execute() sets PF_MEMALLOC while handling any task
    with RPC_TASK_SWAPPER set, or when handling any task that
    holds the XPRT_LOCKED lock on an xprt used for swap.
    This removes the need for the RPC_IS_SWAPPER() test
    in ->buf_alloc handlers.
 3/ xprt_prepare_transmit() sets PF_MEMALLOC after locking
    any task to a swapper xprt.  __rpc_execute() will clear it.
 3/ PF_MEMALLOC is set for all the connect workers.

Reviewed-by: Chuck Lever <chuck.lever@oracle.com> (for xprtrdma parts)
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/nfs/write.c
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtsock.c

index 7a23b46445073ce3e4cba3a6f7cca70c7027426d..e86aff429993efbf5a2762ed745bb618a2847e71 100644 (file)
@@ -1411,6 +1411,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
 {
        int priority = flush_task_priority(how);
 
+       if (IS_SWAPFILE(hdr->inode))
+               task_setup_data->flags |= RPC_TASK_SWAPPER;
        task_setup_data->priority = priority;
        rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
        trace_nfs_initiate_write(hdr);
index c83fe618767c4d486ee9a467d901f44e48042685..5985b78eddf19e893aa1df552cb6e0464b893735 100644 (file)
@@ -1085,8 +1085,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
                task->tk_flags |= RPC_TASK_TIMEOUT;
        if (clnt->cl_noretranstimeo)
                task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
-       if (atomic_read(&clnt->cl_swapper))
-               task->tk_flags |= RPC_TASK_SWAPPER;
        /* Add to the client's list of all tasks */
        spin_lock(&clnt->cl_lock);
        list_add_tail(&task->tk_task, &clnt->cl_tasks);
index d5b6e897f5a52e358451070dc9a1b2ab073d7656..ae295844ac55a89ca2634ca0206822df42f38031 100644 (file)
@@ -876,6 +876,15 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
                ops->rpc_release(calldata);
 }
 
+static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk)
+{
+       if (!xprt)
+               return false;
+       if (!atomic_read(&xprt->swapper))
+               return false;
+       return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk;
+}
+
 /*
  * This is the RPC `scheduler' (or rather, the finite state machine).
  */
@@ -884,6 +893,7 @@ static void __rpc_execute(struct rpc_task *task)
        struct rpc_wait_queue *queue;
        int task_is_async = RPC_IS_ASYNC(task);
        int status = 0;
+       unsigned long pflags = current->flags;
 
        WARN_ON_ONCE(RPC_IS_QUEUED(task));
        if (RPC_IS_QUEUED(task))
@@ -906,6 +916,10 @@ static void __rpc_execute(struct rpc_task *task)
                }
                if (!do_action)
                        break;
+               if (RPC_IS_SWAPPER(task) ||
+                   xprt_needs_memalloc(task->tk_xprt, task))
+                       current->flags |= PF_MEMALLOC;
+
                trace_rpc_task_run_action(task, do_action);
                do_action(task);
 
@@ -943,7 +957,7 @@ static void __rpc_execute(struct rpc_task *task)
                rpc_clear_running(task);
                spin_unlock(&queue->lock);
                if (task_is_async)
-                       return;
+                       goto out;
 
                /* sync task: sleep here */
                trace_rpc_task_sync_sleep(task, task->tk_action);
@@ -967,6 +981,8 @@ static void __rpc_execute(struct rpc_task *task)
 
        /* Release all resources associated with the task */
        rpc_release_task(task);
+out:
+       current_restore_flags(pflags, PF_MEMALLOC);
 }
 
 /*
@@ -1025,8 +1041,6 @@ int rpc_malloc(struct rpc_task *task)
 
        if (RPC_IS_ASYNC(task))
                gfp = GFP_NOWAIT | __GFP_NOWARN;
-       if (RPC_IS_SWAPPER(task))
-               gfp |= __GFP_MEMALLOC;
 
        size += sizeof(struct rpc_buffer);
        if (size <= RPC_BUFFER_MAXSIZE)
index 5388263f8fc8a6dae95c5acaf8ec79cccf11f53e..396a74974f60f543645019d1d9f6d0caa21d465c 100644 (file)
@@ -1503,6 +1503,9 @@ bool xprt_prepare_transmit(struct rpc_task *task)
                return false;
 
        }
+       if (atomic_read(&xprt->swapper))
+               /* This will be clear in __rpc_execute */
+               current->flags |= PF_MEMALLOC;
        return true;
 }
 
index a522771155002948321708f64e10a113f9caaabc..6268af7e031019f16e10e15e9899056cecdca464 100644 (file)
@@ -239,8 +239,11 @@ xprt_rdma_connect_worker(struct work_struct *work)
        struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
                                                   rx_connect_worker.work);
        struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+       unsigned int pflags = current->flags;
        int rc;
 
+       if (atomic_read(&xprt->swapper))
+               current->flags |= PF_MEMALLOC;
        rc = rpcrdma_xprt_connect(r_xprt);
        xprt_clear_connecting(xprt);
        if (!rc) {
@@ -254,6 +257,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
                rpcrdma_xprt_disconnect(r_xprt);
        xprt_unlock_connect(xprt, r_xprt);
        xprt_wake_pending_tasks(xprt, rc);
+       current_restore_flags(pflags, PF_MEMALLOC);
 }
 
 /**
@@ -576,8 +580,6 @@ xprt_rdma_allocate(struct rpc_task *task)
        flags = RPCRDMA_DEF_GFP;
        if (RPC_IS_ASYNC(task))
                flags = GFP_NOWAIT | __GFP_NOWARN;
-       if (RPC_IS_SWAPPER(task))
-               flags |= __GFP_MEMALLOC;
 
        if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize,
                                  flags))
index 056fa0230359dde9a4ab427e6348f31e142d0d39..821eeea1c83b257bcc13370e848cdd7cc166f90c 100644 (file)
@@ -2070,7 +2070,10 @@ static void xs_udp_setup_socket(struct work_struct *work)
        struct rpc_xprt *xprt = &transport->xprt;
        struct socket *sock;
        int status = -EIO;
+       unsigned int pflags = current->flags;
 
+       if (atomic_read(&xprt->swapper))
+               current->flags |= PF_MEMALLOC;
        sock = xs_create_sock(xprt, transport,
                        xs_addr(xprt)->sa_family, SOCK_DGRAM,
                        IPPROTO_UDP, false);
@@ -2090,6 +2093,7 @@ out:
        xprt_clear_connecting(xprt);
        xprt_unlock_connect(xprt, transport);
        xprt_wake_pending_tasks(xprt, status);
+       current_restore_flags(pflags, PF_MEMALLOC);
 }
 
 /**
@@ -2249,7 +2253,10 @@ static void xs_tcp_setup_socket(struct work_struct *work)
        struct socket *sock = transport->sock;
        struct rpc_xprt *xprt = &transport->xprt;
        int status;
+       unsigned int pflags = current->flags;
 
+       if (atomic_read(&xprt->swapper))
+               current->flags |= PF_MEMALLOC;
        if (!sock) {
                sock = xs_create_sock(xprt, transport,
                                xs_addr(xprt)->sa_family, SOCK_STREAM,
@@ -2314,6 +2321,7 @@ out:
        xprt_clear_connecting(xprt);
 out_unlock:
        xprt_unlock_connect(xprt, transport);
+       current_restore_flags(pflags, PF_MEMALLOC);
 }
 
 /**