1 From: Gerald Schaefer <geraldsc@de.ibm.com>
2 Subject: zfcp: Improve reliability of SCSI eh handlers in zfcp
3 References: bnc#518291,LTC#54465
5 Symptom: During error recovery scenarious, zfcp and SCSI midlayer
6 run error recovery and later SCSI devices are flagged as
7 "offline" in the Linux kernel.
8 Problem: The SCSI midlayer error recovery issues various requests
9 to zfcp. If zfcp is recovering internally at the same
10 time, the SCSI midlayer requests fail immediately and the
11 SCSI midlayer recovery will run until hitting the final
12 step where SCSI devices are flagged as "offline".
13 Solution: Backport the commit 63caf367e1c92e0667a344d9b687c04e6ef054b5.
14 This commit changes the SCSI midlayer recovery callbacks
15 in zfcp to wait for the zfcp erp to finish before issuing any
16 request. If necessary retry the request three times.
18 Acked-by: John Jolly <jjolly@suse.de>
20 drivers/s390/scsi/zfcp_def.h | 3 -
21 drivers/s390/scsi/zfcp_ext.h | 11 +--
22 drivers/s390/scsi/zfcp_fsf.c | 39 ++++---------
23 drivers/s390/scsi/zfcp_scsi.c | 122 ++++++++++++++++++++----------------------
24 4 files changed, 77 insertions(+), 98 deletions(-)
26 --- a/drivers/s390/scsi/zfcp_def.h 2009-07-01 13:39:47.000000000 +0200
27 +++ b/drivers/s390/scsi/zfcp_def.h 2009-07-01 13:39:50.000000000 +0200
28 @@ -621,9 +621,6 @@ struct zfcp_fsf_req_qtcb {
30 /********************** ZFCP SPECIFIC DEFINES ********************************/
32 -#define ZFCP_REQ_AUTO_CLEANUP 0x00000002
33 -#define ZFCP_REQ_NO_QTCB 0x00000008
35 #define ZFCP_SET 0x00000100
36 #define ZFCP_CLEAR 0x00000200
38 --- a/drivers/s390/scsi/zfcp_ext.h 2009-07-01 13:39:47.000000000 +0200
39 +++ b/drivers/s390/scsi/zfcp_ext.h 2009-07-01 13:39:50.000000000 +0200
40 @@ -127,16 +127,13 @@ extern int zfcp_status_read_refill(struc
41 extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *,
42 struct zfcp_erp_action *);
43 extern int zfcp_fsf_send_els(struct zfcp_send_els *);
44 -extern int zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *,
46 - struct scsi_cmnd *, int, int);
47 +extern int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *,
48 + struct scsi_cmnd *);
49 extern void zfcp_fsf_req_complete(struct zfcp_fsf_req *);
50 extern void zfcp_fsf_req_free(struct zfcp_fsf_req *);
51 -extern struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_adapter *,
52 - struct zfcp_unit *, u8, int);
53 +extern struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_unit *, u8);
54 extern struct zfcp_fsf_req *zfcp_fsf_abort_fcp_command(unsigned long,
55 - struct zfcp_adapter *,
56 - struct zfcp_unit *, int);
57 + struct zfcp_unit *);
60 extern int zfcp_qdio_allocate(struct zfcp_adapter *);
61 --- a/drivers/s390/scsi/zfcp_fsf.c 2009-07-01 13:39:47.000000000 +0200
62 +++ b/drivers/s390/scsi/zfcp_fsf.c 2009-07-01 13:39:50.000000000 +0200
64 #include <linux/blktrace_api.h>
67 +#define ZFCP_REQ_AUTO_CLEANUP 0x00000002
68 +#define ZFCP_REQ_NO_QTCB 0x00000008
70 static void zfcp_fsf_request_timeout_handler(unsigned long data)
72 struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
73 @@ -911,27 +914,22 @@ static void zfcp_fsf_abort_fcp_command_h
75 * zfcp_fsf_abort_fcp_command - abort running SCSI command
76 * @old_req_id: unsigned long
77 - * @adapter: pointer to struct zfcp_adapter
78 * @unit: pointer to struct zfcp_unit
79 - * @req_flags: integer specifying the request flags
80 * Returns: pointer to struct zfcp_fsf_req
82 - * FIXME(design): should be watched by a timeout !!!
85 struct zfcp_fsf_req *zfcp_fsf_abort_fcp_command(unsigned long old_req_id,
86 - struct zfcp_adapter *adapter,
87 - struct zfcp_unit *unit,
89 + struct zfcp_unit *unit)
91 struct qdio_buffer_element *sbale;
92 struct zfcp_fsf_req *req = NULL;
93 + struct zfcp_adapter *adapter = unit->port->adapter;
95 spin_lock_bh(&adapter->req_q_lock);
96 if (zfcp_fsf_req_sbal_get(adapter))
98 req = zfcp_fsf_req_create(adapter, FSF_QTCB_ABORT_FCP_CMND,
99 - req_flags, adapter->pool.fsf_req_abort);
100 + 0, adapter->pool.fsf_req_abort);
104 @@ -2334,21 +2332,17 @@ static void zfcp_set_fcp_dl(struct fcp_c
107 * zfcp_fsf_send_fcp_command_task - initiate an FCP command (for a SCSI command)
108 - * @adapter: adapter where scsi command is issued
109 * @unit: unit where command is sent to
110 * @scsi_cmnd: scsi command to be sent
111 - * @timer: timer to be started when request is initiated
112 - * @req_flags: flags for fsf_request
114 -int zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter,
115 - struct zfcp_unit *unit,
116 - struct scsi_cmnd *scsi_cmnd,
117 - int use_timer, int req_flags)
118 +int zfcp_fsf_send_fcp_command_task(struct zfcp_unit *unit,
119 + struct scsi_cmnd *scsi_cmnd)
121 struct zfcp_fsf_req *req;
122 struct fcp_cmnd_iu *fcp_cmnd_iu;
124 int real_bytes, retval = -EIO;
125 + struct zfcp_adapter *adapter = unit->port->adapter;
127 if (unlikely(!(atomic_read(&unit->status) &
128 ZFCP_STATUS_COMMON_UNBLOCKED)))
129 @@ -2359,7 +2353,8 @@ int zfcp_fsf_send_fcp_command_task(struc
130 atomic_inc(&adapter->qdio_outb_full);
133 - req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, req_flags,
134 + req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND,
135 + ZFCP_REQ_AUTO_CLEANUP,
136 adapter->pool.fsf_req_scsi);
138 retval = PTR_ERR(req);
139 @@ -2441,9 +2436,6 @@ int zfcp_fsf_send_fcp_command_task(struc
141 zfcp_set_fcp_dl(fcp_cmnd_iu, real_bytes);
144 - zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
146 retval = zfcp_fsf_req_send(req);
147 if (unlikely(retval))
148 goto failed_scsi_cmnd;
149 @@ -2461,19 +2453,16 @@ out:
152 * zfcp_fsf_send_fcp_ctm - send SCSI task management command
153 - * @adapter: pointer to struct zfcp-adapter
154 * @unit: pointer to struct zfcp_unit
155 * @tm_flags: unsigned byte for task management flags
156 - * @req_flags: int request flags
157 * Returns: on success pointer to struct fsf_req, NULL otherwise
159 -struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_adapter *adapter,
160 - struct zfcp_unit *unit,
161 - u8 tm_flags, int req_flags)
162 +struct zfcp_fsf_req *zfcp_fsf_send_fcp_ctm(struct zfcp_unit *unit, u8 tm_flags)
164 struct qdio_buffer_element *sbale;
165 struct zfcp_fsf_req *req = NULL;
166 struct fcp_cmnd_iu *fcp_cmnd_iu;
167 + struct zfcp_adapter *adapter = unit->port->adapter;
169 if (unlikely(!(atomic_read(&unit->status) &
170 ZFCP_STATUS_COMMON_UNBLOCKED)))
171 @@ -2482,7 +2471,7 @@ struct zfcp_fsf_req *zfcp_fsf_send_fcp_c
172 spin_lock_bh(&adapter->req_q_lock);
173 if (zfcp_fsf_req_sbal_get(adapter))
175 - req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, req_flags,
176 + req = zfcp_fsf_req_create(adapter, FSF_QTCB_FCP_CMND, 0,
177 adapter->pool.fsf_req_scsi);
180 --- a/drivers/s390/scsi/zfcp_scsi.c 2009-07-01 13:39:47.000000000 +0200
181 +++ b/drivers/s390/scsi/zfcp_scsi.c 2009-07-01 13:41:23.000000000 +0200
182 @@ -94,8 +94,7 @@ static int zfcp_scsi_queuecommand(struct
186 - ret = zfcp_fsf_send_fcp_command_task(adapter, unit, scpnt, 0,
187 - ZFCP_REQ_AUTO_CLEANUP);
188 + ret = zfcp_fsf_send_fcp_command_task(unit, scpnt);
189 if (unlikely(ret == -EBUSY))
190 return SCSI_MLQUEUE_DEVICE_BUSY;
191 else if (unlikely(ret < 0))
192 @@ -153,79 +152,91 @@ out:
194 static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
196 - struct Scsi_Host *scsi_host;
197 - struct zfcp_adapter *adapter;
198 - struct zfcp_unit *unit;
199 - struct zfcp_fsf_req *fsf_req;
200 + struct Scsi_Host *scsi_host = scpnt->device->host;
201 + struct zfcp_adapter *adapter =
202 + (struct zfcp_adapter *) scsi_host->hostdata[0];
203 + struct zfcp_unit *unit = scpnt->device->hostdata;
204 + struct zfcp_fsf_req *old_req, *abrt_req;
206 unsigned long old_req_id = (unsigned long) scpnt->host_scribble;
207 int retval = SUCCESS;
209 - scsi_host = scpnt->device->host;
210 - adapter = (struct zfcp_adapter *) scsi_host->hostdata[0];
211 - unit = scpnt->device->hostdata;
214 /* avoid race condition between late normal completion and abort */
215 write_lock_irqsave(&adapter->abort_lock, flags);
217 - /* Check whether corresponding fsf_req is still pending */
218 spin_lock(&adapter->req_list_lock);
219 - fsf_req = zfcp_reqlist_find(adapter, old_req_id);
220 + old_req = zfcp_reqlist_find(adapter, old_req_id);
221 spin_unlock(&adapter->req_list_lock);
224 write_unlock_irqrestore(&adapter->abort_lock, flags);
225 - zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL, 0);
226 - return FAILED; /* completion could be in progress */
227 + zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, NULL,
231 - fsf_req->data = NULL;
232 + old_req->data = NULL;
234 /* don't access old fsf_req after releasing the abort_lock */
235 write_unlock_irqrestore(&adapter->abort_lock, flags);
237 - fsf_req = zfcp_fsf_abort_fcp_command(old_req_id, adapter, unit, 0);
239 - zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL,
244 + abrt_req = zfcp_fsf_abort_fcp_command(old_req_id, unit);
248 + zfcp_erp_wait(adapter);
249 + if (!(atomic_read(&adapter->status) &
250 + ZFCP_STATUS_COMMON_RUNNING)) {
251 + zfcp_scsi_dbf_event_abort("nres", adapter, scpnt, NULL,
259 - __wait_event(fsf_req->completion_wq,
260 - fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
261 + wait_event(abrt_req->completion_wq,
262 + abrt_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
264 - if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
265 - zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, fsf_req, 0);
266 - } else if (fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
267 - zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, fsf_req, 0);
269 - zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, fsf_req, 0);
270 + if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED)
271 + zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, abrt_req, 0);
272 + else if (abrt_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED)
273 + zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, abrt_req, 0);
275 + zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, abrt_req, 0);
278 - zfcp_fsf_req_free(fsf_req);
280 + zfcp_fsf_req_free(abrt_req);
284 -static int zfcp_task_mgmt_function(struct zfcp_unit *unit, u8 tm_flags,
285 - struct scsi_cmnd *scpnt)
286 +static int zfcp_task_mgmt_function(struct scsi_cmnd *scpnt, u8 tm_flags)
288 + struct zfcp_unit *unit = scpnt->device->hostdata;
289 struct zfcp_adapter *adapter = unit->port->adapter;
290 struct zfcp_fsf_req *fsf_req;
291 int retval = SUCCESS;
294 - /* issue task management function */
295 - fsf_req = zfcp_fsf_send_fcp_ctm(adapter, unit, tm_flags, 0);
297 - zfcp_scsi_dbf_event_devreset("nres", tm_flags, unit, scpnt);
300 + fsf_req = zfcp_fsf_send_fcp_ctm(unit, tm_flags);
304 + zfcp_erp_wait(adapter);
305 + if (!(atomic_read(&adapter->status) &
306 + ZFCP_STATUS_COMMON_RUNNING)) {
307 + zfcp_scsi_dbf_event_devreset("nres", tm_flags, unit,
315 - __wait_event(fsf_req->completion_wq,
316 - fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
317 + wait_event(fsf_req->completion_wq,
318 + fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
321 - * check completion status of task management function
323 if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) {
324 zfcp_scsi_dbf_event_devreset("fail", tm_flags, unit, scpnt);
326 @@ -236,39 +247,24 @@ static int zfcp_task_mgmt_function(struc
327 zfcp_scsi_dbf_event_devreset("okay", tm_flags, unit, scpnt);
329 zfcp_fsf_req_free(fsf_req);
334 static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt)
336 - struct zfcp_unit *unit = scpnt->device->hostdata;
342 - return zfcp_task_mgmt_function(unit, FCP_LOGICAL_UNIT_RESET, scpnt);
343 + return zfcp_task_mgmt_function(scpnt, FCP_LOGICAL_UNIT_RESET);
346 static int zfcp_scsi_eh_target_reset_handler(struct scsi_cmnd *scpnt)
348 - struct zfcp_unit *unit = scpnt->device->hostdata;
354 - return zfcp_task_mgmt_function(unit, FCP_TARGET_RESET, scpnt);
355 + return zfcp_task_mgmt_function(scpnt, FCP_TARGET_RESET);
358 static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
360 - struct zfcp_unit *unit;
361 - struct zfcp_adapter *adapter;
362 + struct zfcp_unit *unit = scpnt->device->hostdata;
363 + struct zfcp_adapter *adapter = unit->port->adapter;
365 - unit = scpnt->device->hostdata;
366 - adapter = unit->port->adapter;
367 zfcp_erp_adapter_reopen(adapter, 0, 141, scpnt);
368 zfcp_erp_wait(adapter);