]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | From: Gerald Schaefer <geraldsc@de.ibm.com> |
2 | Subject: zfcp: Improve reliability of SCSI eh handlers in zfcp | |
3 | References: bnc#518291,LTC#54465 | |
4 | ||
5 | Symptom: During error recovery scenarious, zfcp and SCSI midlayer | |
6 | run error recovery and later SCSI devices are flagged as | |
7 | "offline" in the Linux kernel. | |
8 | Problem: The SCSI midlayer error recovery issues various requests | |
9 | to zfcp. If zfcp is recovering internally at the same | |
10 | time, the SCSI midlayer requests fail immediately and the | |
11 | SCSI midlayer recovery will run until hitting the final | |
12 | step where SCSI devices are flagged as "offline". | |
13 | Solution: Backport the commit 63caf367e1c92e0667a344d9b687c04e6ef054b5. | |
14 | This commit changes the SCSI midlayer recovery callbacks | |
15 | in zfcp to wait for the zfcp erp to finish before issuing any | |
16 | request. If necessary retry the request three times. | |
17 | ||
18 | Acked-by: John Jolly <jjolly@suse.de> | |
19 | --- | |
20 | drivers/s390/scsi/zfcp_def.h | 3 - | |
21 | drivers/s390/scsi/zfcp_ext.h | 11 +-- | |
22 | drivers/s390/scsi/zfcp_fsf.c | 39 ++++--------- | |
23 | drivers/s390/scsi/zfcp_scsi.c | 122 ++++++++++++++++++++---------------------- | |
24 | 4 files changed, 77 insertions(+), 98 deletions(-) | |
25 | ||
26 | --- a/drivers/s390/scsi/zfcp_def.h 2009-07-01 13:39:47.000000000 +0200 | |
27 | +++ b/drivers/s390/scsi/zfcp_def.h 2009-07-01 13:39:50.000000000 +0200 | |
28 | @@ -621,9 +621,6 @@ struct zfcp_fsf_req_qtcb { | |
29 | ||
30 | /********************** ZFCP SPECIFIC DEFINES ********************************/ | |
31 | ||
32 | -#define ZFCP_REQ_AUTO_CLEANUP 0x00000002 | |
33 | -#define ZFCP_REQ_NO_QTCB 0x00000008 | |
34 | - | |
35 | #define ZFCP_SET 0x00000100 | |
36 | #define ZFCP_CLEAR 0x00000200 | |
37 | ||
38 | --- a/drivers/s390/scsi/zfcp_ext.h 2009-07-01 13:39:47.000000000 +0200 | |
39 | +++ b/drivers/s390/scsi/zfcp_ext.h 2009-07-01 13:39:50.000000000 +0200 | |
40 | @@ -127,16 +127,13 @@ extern int zfcp_status_read_refill(struc | |
41 | extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *, | |
42 | struct zfcp_erp_action *); | |
43 | extern int zfcp_fsf_send_els(struct zfcp_send_els *); | |
44 | -extern int zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *, | |
45 | - struct zfcp_unit *, | |
46 | - struct scsi_cmnd *, int, int); | |
47 | +extern int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *, | |
48 | + struct scsi_cmnd *); | |
49 | extern void zfcp_fsf_req_complete(struct zfcp_fsf_req *); | |
50 | extern void zfcp_fsf_req_free(struct zfcp_fsf_req *); | |
51 | -extern struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_adapter *, | |
52 | - struct zfcp_unit *, u8, int); | |
53 | +extern struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_unit *, u8); | |
54 | extern struct zfcp_fsf_req *zfcp_fsf_abort_fcp_command(unsigned long, | |
55 | - struct zfcp_adapter *, | |
56 | - struct zfcp_unit *, int); | |
57 | + struct zfcp_unit *); | |
58 | ||
59 | /* zfcp_qdio.c */ | |
60 | extern int zfcp_qdio_allocate(struct zfcp_adapter *); | |
61 | --- a/drivers/s390/scsi/zfcp_fsf.c 2009-07-01 13:39:47.000000000 +0200 | |
62 | +++ b/drivers/s390/scsi/zfcp_fsf.c 2009-07-01 13:39:50.000000000 +0200 | |
63 | @@ -11,6 +11,9 @@ | |
64 | #include <linux/blktrace_api.h> | |
65 | #include "zfcp_ext.h" | |
66 | ||
67 | +#define ZFCP_REQ_AUTO_CLEANUP 0x00000002 | |
68 | +#define ZFCP_REQ_NO_QTCB 0x00000008 | |
69 | + | |
70 | static void zfcp_fsf_request_timeout_handler(unsigned long data) | |
71 | { | |
72 | struct zfcp_adapter *adapter = (struct zfcp_adapter *) data; | |
73 | @@ -911,27 +914,22 @@ static void zfcp_fsf_abort_fcp_command_h | |
74 | /** | |
75 | * zfcp_fsf_abort_fcp_command - abort running SCSI command | |
76 | * @old_req_id: unsigned long | |
77 | - * @adapter: pointer to struct zfcp_adapter | |
78 | * @unit: pointer to struct zfcp_unit | |
79 | - * @req_flags: integer specifying the request flags | |
80 | * Returns: pointer to struct zfcp_fsf_req | |
81 | - * | |
82 | - * FIXME(design): should be watched by a timeout !!! | |
83 | */ | |
84 | ||
85 | struct zfcp_fsf_req *zfcp_fsf_abort_fcp_command(unsigned long old_req_id, | |
86 | - struct zfcp_adapter *adapter, | |
87 | - struct zfcp_unit *unit, | |
88 | - int req_flags) | |
89 | + struct zfcp_unit *unit) | |
90 | { | |
91 | struct qdio_buffer_element *sbale; | |
92 | struct zfcp_fsf_req *req = NULL; | |
93 | + struct zfcp_adapter *adapter = unit->port->adapter; | |
94 | ||
95 | spin_lock_bh(&adapter->req_q_lock); | |
96 | if (zfcp_fsf_req_sbal_get(adapter)) | |
97 | goto out; | |
98 | req = zfcp_fsf_req_create(adapter, FSF_QTCB_ABORT_FCP_CMND, | |
99 | - req_flags, adapter->pool.fsf_req_abort); | |
100 | + 0, adapter->pool.fsf_req_abort); | |
101 | if (IS_ERR(req)) { | |
102 | req = NULL; | |
103 | goto out; | |
104 | @@ -2334,21 +2332,17 @@ static void zfcp_set_fcp_dl(struct fcp_c | |
105 | ||
106 | /** | |
107 | * zfcp_fsf_send_fcp_command_task - initiate an FCP command (for a SCSI command) | |
108 | - * @adapter: adapter where scsi command is issued | |
109 | * @unit: unit where command is sent to | |
110 | * @scsi_cmnd: scsi command to be sent | |
111 | - * @timer: timer to be started when request is initiated | |
112 | - * @req_flags: flags for fsf_request | |
113 | */ | |
114 | -int zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter, | |
115 | - struct zfcp_unit *unit, | |
116 | - struct scsi_cmnd *scsi_cmnd, | |
117 | - int use_timer, int req_flags) | |
118 | +int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *unit, | |
119 | + struct scsi_cmnd *scsi_cmnd) | |
120 | { | |
121 | struct zfcp_fsf_req *req; | |
122 | struct fcp_cmnd_iu *fcp_cmnd_iu; | |
123 | unsigned int sbtype; | |
124 | int real_bytes, retval = -EIO; | |
125 | + struct zfcp_adapter *adapter = unit->port->adapter; | |
126 | ||
127 | if (unlikely(!(atomic_read(&unit->status) & | |
128 | ZFCP_STATUS_COMMON_UNBLOCKED))) | |
129 | @@ -2359,7 +2353,8 @@ int zfcp_fsf_send_fcp_command_task(struc | |
130 | atomic_inc(&adapter->qdio_outb_full); | |
131 | goto out; | |
132 | } | |
133 | - req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, req_flags, | |
134 | + req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, | |
135 | + ZFCP_REQ_AUTO_CLEANUP, | |
136 | adapter->pool.fsf_req_scsi); | |
137 | if (IS_ERR(req)) { | |
138 | retval = PTR_ERR(req); | |
139 | @@ -2441,9 +2436,6 @@ int zfcp_fsf_send_fcp_command_task(struc | |
140 | ||
141 | zfcp_set_fcp_dl(fcp_cmnd_iu, real_bytes); | |
142 | ||
143 | - if (use_timer) | |
144 | - zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); | |
145 | - | |
146 | retval = zfcp_fsf_req_send(req); | |
147 | if (unlikely(retval)) | |
148 | goto failed_scsi_cmnd; | |
149 | @@ -2461,19 +2453,16 @@ out: | |
150 | ||
151 | /** | |
152 | * zfcp_fsf_send_fcp_ctm - send SCSI task management command | |
153 | - * @adapter: pointer to struct zfcp-adapter | |
154 | * @unit: pointer to struct zfcp_unit | |
155 | * @tm_flags: unsigned byte for task management flags | |
156 | - * @req_flags: int request flags | |
157 | * Returns: on success pointer to struct fsf_req, NULL otherwise | |
158 | */ | |
159 | -struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_adapter *adapter, | |
160 | - struct zfcp_unit *unit, | |
161 | - u8 tm_flags, int req_flags) | |
162 | +struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_unit *unit, u8 tm_flags) | |
163 | { | |
164 | struct qdio_buffer_element *sbale; | |
165 | struct zfcp_fsf_req *req = NULL; | |
166 | struct fcp_cmnd_iu *fcp_cmnd_iu; | |
167 | + struct zfcp_adapter *adapter = unit->port->adapter; | |
168 | ||
169 | if (unlikely(!(atomic_read(&unit->status) & | |
170 | ZFCP_STATUS_COMMON_UNBLOCKED))) | |
171 | @@ -2482,7 +2471,7 @@ struct zfcp_fsf_req *zfcp_fsf_send_fcp_c | |
172 | spin_lock_bh(&adapter->req_q_lock); | |
173 | if (zfcp_fsf_req_sbal_get(adapter)) | |
174 | goto out; | |
175 | - req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, req_flags, | |
176 | + req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, 0, | |
177 | adapter->pool.fsf_req_scsi); | |
178 | if (IS_ERR(req)) { | |
179 | req = NULL; | |
180 | --- a/drivers/s390/scsi/zfcp_scsi.c 2009-07-01 13:39:47.000000000 +0200 | |
181 | +++ b/drivers/s390/scsi/zfcp_scsi.c 2009-07-01 13:41:23.000000000 +0200 | |
182 | @@ -94,8 +94,7 @@ static int zfcp_scsi_queuecommand(struct | |
183 | return 0;; | |
184 | } | |
185 | ||
186 | - ret = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, 0, | |
187 | - ZFCP_REQ_AUTO_CLEANUP); | |
188 | + ret = zfcp_fsf_send_fcp_command_task(unit, scpnt); | |
189 | if (unlikely(ret == -EBUSY)) | |
190 | return SCSI_MLQUEUE_DEVICE_BUSY; | |
191 | else if (unlikely(ret < 0)) | |
192 | @@ -153,79 +152,91 @@ out: | |
193 | ||
194 | static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) | |
195 | { | |
196 | - struct Scsi_Host *scsi_host; | |
197 | - struct zfcp_adapter *adapter; | |
198 | - struct zfcp_unit *unit; | |
199 | - struct zfcp_fsf_req *fsf_req; | |
200 | + struct Scsi_Host *scsi_host = scpnt->device->host; | |
201 | + struct zfcp_adapter *adapter = | |
202 | + (struct zfcp_adapter *) scsi_host->hostdata[0]; | |
203 | + struct zfcp_unit *unit = scpnt->device->hostdata; | |
204 | + struct zfcp_fsf_req *old_req, *abrt_req; | |
205 | unsigned long flags; | |
206 | unsigned long old_req_id = (unsigned long) scpnt->host_scribble; | |
207 | int retval = SUCCESS; | |
208 | - | |
209 | - scsi_host = scpnt->device->host; | |
210 | - adapter = (struct zfcp_adapter *) scsi_host->hostdata[0]; | |
211 | - unit = scpnt->device->hostdata; | |
212 | + int retry = 3; | |
213 | ||
214 | /* avoid race condition between late normal completion and abort */ | |
215 | write_lock_irqsave(&adapter->abort_lock, flags); | |
216 | ||
217 | - /* Check whether corresponding fsf_req is still pending */ | |
218 | spin_lock(&adapter->req_list_lock); | |
219 | - fsf_req = zfcp_reqlist_find(adapter, old_req_id); | |
220 | + old_req = zfcp_reqlist_find(adapter, old_req_id); | |
221 | spin_unlock(&adapter->req_list_lock); | |
222 | - if (!fsf_req) { | |
223 | + if (!old_req) { | |
224 | write_unlock_irqrestore(&adapter->abort_lock, flags); | |
225 | - zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, 0); | |
226 | - return FAILED; /* completion could be in progress */ | |
227 | + zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, | |
228 | + old_req_id); | |
229 | + return SUCCESS; | |
230 | } | |
231 | - fsf_req->data = NULL; | |
232 | + old_req->data = NULL; | |
233 | ||
234 | /* don't access old fsf_req after releasing the abort_lock */ | |
235 | write_unlock_irqrestore(&adapter->abort_lock, flags); | |
236 | ||
237 | - fsf_req = zfcp_fsf_abort_fcp_command(old_req_id, adapter, unit, 0); | |
238 | - if (!fsf_req) { | |
239 | - zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, | |
240 | - old_req_id); | |
241 | - retval = FAILED; | |
242 | - return retval; | |
243 | + while (retry--) { | |
244 | + abrt_req = zfcp_fsf_abort_fcp_command(old_req_id, unit); | |
245 | + if (abrt_req) | |
246 | + break; | |
247 | + | |
248 | + zfcp_erp_wait(adapter); | |
249 | + if (!(atomic_read(&adapter->status) & | |
250 | + ZFCP_STATUS_COMMON_RUNNING)) { | |
251 | + zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL, | |
252 | + old_req_id); | |
253 | + return SUCCESS; | |
254 | + } | |
255 | } | |
256 | + if (!abrt_req) | |
257 | + return FAILED; | |
258 | ||
259 | - __wait_event(fsf_req->completion_wq, | |
260 | - fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); | |
261 | + wait_event(abrt_req->completion_wq, | |
262 | + abrt_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); | |
263 | ||
264 | - if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) { | |
265 | - zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, fsf_req, 0); | |
266 | - } else if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) { | |
267 | - zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, fsf_req, 0); | |
268 | - } else { | |
269 | - zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, fsf_req, 0); | |
270 | + if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) | |
271 | + zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, abrt_req, 0); | |
272 | + else if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) | |
273 | + zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, abrt_req, 0); | |
274 | + else { | |
275 | + zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, abrt_req, 0); | |
276 | retval = FAILED; | |
277 | } | |
278 | - zfcp_fsf_req_free(fsf_req); | |
279 | - | |
280 | + zfcp_fsf_req_free(abrt_req); | |
281 | return retval; | |
282 | } | |
283 | ||
284 | -static int zfcp_task_mgmt_function(struct zfcp_unit *unit, u8 tm_flags, | |
285 | - struct scsi_cmnd *scpnt) | |
286 | +static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags) | |
287 | { | |
288 | + struct zfcp_unit *unit = scpnt->device->hostdata; | |
289 | struct zfcp_adapter *adapter = unit->port->adapter; | |
290 | struct zfcp_fsf_req *fsf_req; | |
291 | int retval = SUCCESS; | |
292 | + int retry = 3; | |
293 | ||
294 | - /* issue task management function */ | |
295 | - fsf_req = zfcp_fsf_send_fcp_ctm(adapter, unit, tm_flags, 0); | |
296 | - if (!fsf_req) { | |
297 | - zfcp_scsi_dbf_event_devreset("nres", tm_flags, unit, scpnt); | |
298 | - return FAILED; | |
299 | + while (retry--) { | |
300 | + fsf_req = zfcp_fsf_send_fcp_ctm(unit, tm_flags); | |
301 | + if (fsf_req) | |
302 | + break; | |
303 | + | |
304 | + zfcp_erp_wait(adapter); | |
305 | + if (!(atomic_read(&adapter->status) & | |
306 | + ZFCP_STATUS_COMMON_RUNNING)) { | |
307 | + zfcp_scsi_dbf_event_devreset("nres", tm_flags, unit, | |
308 | + scpnt); | |
309 | + return SUCCESS; | |
310 | + } | |
311 | } | |
312 | + if (!fsf_req) | |
313 | + return FAILED; | |
314 | ||
315 | - __wait_event(fsf_req->completion_wq, | |
316 | - fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); | |
317 | + wait_event(fsf_req->completion_wq, | |
318 | + fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); | |
319 | ||
320 | - /* | |
321 | - * check completion status of task management function | |
322 | - */ | |
323 | if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { | |
324 | zfcp_scsi_dbf_event_devreset("fail", tm_flags, unit, scpnt); | |
325 | retval = FAILED; | |
326 | @@ -236,39 +247,24 @@ static int zfcp_task_mgmt_function(struc | |
327 | zfcp_scsi_dbf_event_devreset("okay", tm_flags, unit, scpnt); | |
328 | ||
329 | zfcp_fsf_req_free(fsf_req); | |
330 | - | |
331 | return retval; | |
332 | } | |
333 | ||
334 | static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt) | |
335 | { | |
336 | - struct zfcp_unit *unit = scpnt->device->hostdata; | |
337 | - | |
338 | - if (!unit) { | |
339 | - WARN_ON(1); | |
340 | - return SUCCESS; | |
341 | - } | |
342 | - return zfcp_task_mgmt_function(unit, FCP_LOGICAL_UNIT_RESET, scpnt); | |
343 | + return zfcp_task_mgmt_function(scpnt, FCP_LOGICAL_UNIT_RESET); | |
344 | } | |
345 | ||
346 | static int zfcp_scsi_eh_target_reset_handler(struct scsi_cmnd *scpnt) | |
347 | { | |
348 | - struct zfcp_unit *unit = scpnt->device->hostdata; | |
349 | - | |
350 | - if (!unit) { | |
351 | - WARN_ON(1); | |
352 | - return SUCCESS; | |
353 | - } | |
354 | - return zfcp_task_mgmt_function(unit, FCP_TARGET_RESET, scpnt); | |
355 | + return zfcp_task_mgmt_function(scpnt, FCP_TARGET_RESET); | |
356 | } | |
357 | ||
358 | static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) | |
359 | { | |
360 | - struct zfcp_unit *unit; | |
361 | - struct zfcp_adapter *adapter; | |
362 | + struct zfcp_unit *unit = scpnt->device->hostdata; | |
363 | + struct zfcp_adapter *adapter = unit->port->adapter; | |
364 | ||
365 | - unit = scpnt->device->hostdata; | |
366 | - adapter = unit->port->adapter; | |
367 | zfcp_erp_adapter_reopen(adapter, 0, 141, scpnt); | |
368 | zfcp_erp_wait(adapter); | |
369 |