]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.14.69/nfsv4-client-live-hangs-after-live-data-migration-recovery.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.14.69 / nfsv4-client-live-hangs-after-live-data-migration-recovery.patch
1 From 0f90be132cbf1537d87a6a8b9e80867adac892f6 Mon Sep 17 00:00:00 2001
2 From: Bill Baker <Bill.Baker@Oracle.com>
3 Date: Tue, 19 Jun 2018 16:24:58 -0500
4 Subject: NFSv4 client live hangs after live data migration recovery
5
6 From: Bill Baker <Bill.Baker@Oracle.com>
7
8 commit 0f90be132cbf1537d87a6a8b9e80867adac892f6 upstream.
9
10 After a live data migration event at the NFS server, the client may send
11 I/O requests to the wrong server, causing a live hang due to repeated
12 recovery events. On the wire, this will appear as an I/O request failing
13 with NFS4ERR_BADSESSION, followed by successful CREATE_SESSION, repeatedly.
14 NFS4ERR_BADSSESSION is returned because the session ID being used was
15 issued by the other server and is not valid at the old server.
16
17 The failure is caused by async worker threads having cached the transport
18 (xprt) in the rpc_task structure. After the migration recovery completes,
19 the task is redispatched and the task resends the request to the wrong
20 server based on the old value still present in tk_xprt.
21
22 The solution is to recompute the tk_xprt field of the rpc_task structure
23 so that the request goes to the correct server.
24
25 Signed-off-by: Bill Baker <bill.baker@oracle.com>
26 Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
27 Tested-by: Helen Chao <helen.chao@oracle.com>
28 Fixes: fb43d17210ba ("SUNRPC: Use the multipath iterator to assign a ...")
29 Cc: stable@vger.kernel.org # v4.9+
30 Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
31 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
32
33 ---
34 fs/nfs/nfs4proc.c | 9 ++++++++-
35 include/linux/sunrpc/clnt.h | 1 +
36 net/sunrpc/clnt.c | 28 ++++++++++++++++++++--------
37 3 files changed, 29 insertions(+), 9 deletions(-)
38
39 --- a/fs/nfs/nfs4proc.c
40 +++ b/fs/nfs/nfs4proc.c
41 @@ -547,8 +547,15 @@ nfs4_async_handle_exception(struct rpc_t
42 ret = -EIO;
43 return ret;
44 out_retry:
45 - if (ret == 0)
46 + if (ret == 0) {
47 exception->retry = 1;
48 + /*
49 + * For NFS4ERR_MOVED, the client transport will need to
50 + * be recomputed after migration recovery has completed.
51 + */
52 + if (errorcode == -NFS4ERR_MOVED)
53 + rpc_task_release_transport(task);
54 + }
55 return ret;
56 }
57
58 --- a/include/linux/sunrpc/clnt.h
59 +++ b/include/linux/sunrpc/clnt.h
60 @@ -156,6 +156,7 @@ int rpc_switch_client_transport(struct
61
62 void rpc_shutdown_client(struct rpc_clnt *);
63 void rpc_release_client(struct rpc_clnt *);
64 +void rpc_task_release_transport(struct rpc_task *);
65 void rpc_task_release_client(struct rpc_task *);
66
67 int rpcb_create_local(struct net *);
68 --- a/net/sunrpc/clnt.c
69 +++ b/net/sunrpc/clnt.c
70 @@ -965,10 +965,20 @@ out:
71 }
72 EXPORT_SYMBOL_GPL(rpc_bind_new_program);
73
74 +void rpc_task_release_transport(struct rpc_task *task)
75 +{
76 + struct rpc_xprt *xprt = task->tk_xprt;
77 +
78 + if (xprt) {
79 + task->tk_xprt = NULL;
80 + xprt_put(xprt);
81 + }
82 +}
83 +EXPORT_SYMBOL_GPL(rpc_task_release_transport);
84 +
85 void rpc_task_release_client(struct rpc_task *task)
86 {
87 struct rpc_clnt *clnt = task->tk_client;
88 - struct rpc_xprt *xprt = task->tk_xprt;
89
90 if (clnt != NULL) {
91 /* Remove from client task list */
92 @@ -979,12 +989,14 @@ void rpc_task_release_client(struct rpc_
93
94 rpc_release_client(clnt);
95 }
96 + rpc_task_release_transport(task);
97 +}
98
99 - if (xprt != NULL) {
100 - task->tk_xprt = NULL;
101 -
102 - xprt_put(xprt);
103 - }
104 +static
105 +void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
106 +{
107 + if (!task->tk_xprt)
108 + task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
109 }
110
111 static
112 @@ -992,8 +1004,7 @@ void rpc_task_set_client(struct rpc_task
113 {
114
115 if (clnt != NULL) {
116 - if (task->tk_xprt == NULL)
117 - task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
118 + rpc_task_set_transport(task, clnt);
119 task->tk_client = clnt;
120 atomic_inc(&clnt->cl_count);
121 if (clnt->cl_softrtry)
122 @@ -1529,6 +1540,7 @@ call_start(struct rpc_task *task)
123 clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
124 clnt->cl_stats->rpccnt++;
125 task->tk_action = call_reserve;
126 + rpc_task_set_transport(task, clnt);
127 }
128
129 /*