1 Subject: ibmvfc oops while processing async events
2 From: Brian King <brking@us.ibm.com>
3 Date: Thu Nov 20 13:43:45 2008 +0100:
6 While running error injection (port disable/enable loop) during
7 I/O stress, the following oops occurred:
9 cpu 0x0: Vector: 300 (Data Access) at [c00000000f4cbb10]
10 pc: d0000000000c5c44: .ibmvfc_interrupt+0x1a4/0x1f8 [ibmvfc]
11 lr: d0000000000c5c44: .ibmvfc_interrupt+0x1a4/0x1f8 [ibmvfc]
16 current = 0xc00000001dee77a0
17 paca = 0xc000000000a92c80
19 There is a window in the ibmvfc interrupt handler. If, while
20 processing an interrupt, after processing both the regular crq
21 and the async crq, an async event is added to the async crq, we
22 will oops when we process the async queue a second time, due to
23 an obvious bug in the code.
25 Signed-off-by: Brian King <brking@us.ibm.com>
26 Acked-by: Hannes Reinecke <hare@suse.de>
29 drivers/scsi/ibmvscsi/ibmvfc.c | 266 +++++++++++++++++++++++++++--------------
30 drivers/scsi/ibmvscsi/ibmvfc.h | 26 ++--
31 2 files changed, 191 insertions(+), 101 deletions(-)
33 --- a/drivers/scsi/ibmvscsi/ibmvfc.c
34 +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
35 @@ -121,6 +121,7 @@ static const struct {
36 { IBMVFC_VIOS_FAILURE, IBMVFC_TRANS_CANCELLED, DID_ABORT, 0, 1, "transaction cancelled" },
37 { IBMVFC_VIOS_FAILURE, IBMVFC_TRANS_CANCELLED_IMPLICIT, DID_ABORT, 0, 1, "transaction cancelled implicit" },
38 { IBMVFC_VIOS_FAILURE, IBMVFC_INSUFFICIENT_RESOURCE, DID_REQUEUE, 1, 1, "insufficient resources" },
39 + { IBMVFC_VIOS_FAILURE, IBMVFC_PLOGI_REQUIRED, DID_ERROR, 0, 1, "port login required" },
40 { IBMVFC_VIOS_FAILURE, IBMVFC_COMMAND_FAILED, DID_ERROR, 1, 1, "command failed" },
42 { IBMVFC_FC_FAILURE, IBMVFC_INVALID_ELS_CMD_CODE, DID_ERROR, 0, 1, "invalid ELS command code" },
43 @@ -278,13 +279,6 @@ static int ibmvfc_get_err_result(struct
44 rsp->data.info.rsp_code))
45 return DID_ERROR << 16;
47 - if (!vfc_cmd->status) {
48 - if (rsp->flags & FCP_RESID_OVER)
49 - return rsp->scsi_status | (DID_ERROR << 16);
51 - return rsp->scsi_status | (DID_OK << 16);
54 err = ibmvfc_get_err_index(vfc_cmd->status, vfc_cmd->error);
56 return rsp->scsi_status | (cmd_status[err].result << 16);
57 @@ -503,6 +497,7 @@ static void ibmvfc_set_host_action(struc
58 case IBMVFC_HOST_ACTION_INIT:
59 case IBMVFC_HOST_ACTION_TGT_DEL:
60 case IBMVFC_HOST_ACTION_QUERY_TGTS:
61 + case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
62 case IBMVFC_HOST_ACTION_TGT_ADD:
63 case IBMVFC_HOST_ACTION_NONE:
65 @@ -765,6 +760,9 @@ static void ibmvfc_scsi_eh_done(struct i
66 cmnd->scsi_done(cmnd);
70 + complete(evt->eh_comp);
72 ibmvfc_free_event(evt);
75 @@ -1253,6 +1251,7 @@ static void ibmvfc_init_event(struct ibm
77 evt->crq.format = format;
79 + evt->eh_comp = NULL;
83 @@ -1478,6 +1477,11 @@ static void ibmvfc_scsi_done(struct ibmv
84 sense_len = SCSI_SENSE_BUFFERSIZE - rsp_len;
85 if ((rsp->flags & FCP_SNS_LEN_VALID) && rsp->fcp_sense_len && rsp_len <= 8)
86 memcpy(cmnd->sense_buffer, rsp->data.sense + rsp_len, sense_len);
87 + if ((vfc_cmd->status & IBMVFC_VIOS_FAILURE) && (vfc_cmd->error == IBMVFC_PLOGI_REQUIRED))
88 + ibmvfc_reinit_host(evt->vhost);
90 + if (!cmnd->result && (!scsi_get_resid(cmnd) || (rsp->flags & FCP_RESID_OVER)))
91 + cmnd->result = (DID_ERROR << 16);
93 ibmvfc_log_error(evt);
95 @@ -1490,6 +1494,9 @@ static void ibmvfc_scsi_done(struct ibmv
96 cmnd->scsi_done(cmnd);
100 + complete(evt->eh_comp);
102 ibmvfc_free_event(evt);
105 @@ -1628,7 +1635,7 @@ static int ibmvfc_reset_device(struct sc
106 struct ibmvfc_host *vhost = shost_priv(sdev->host);
107 struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
108 struct ibmvfc_cmd *tmf;
109 - struct ibmvfc_event *evt;
110 + struct ibmvfc_event *evt = NULL;
111 union ibmvfc_iu rsp_iu;
112 struct ibmvfc_fcp_rsp *fc_rsp = &rsp_iu.cmd.rsp;
114 @@ -1790,7 +1797,8 @@ static int ibmvfc_abort_task_set(struct
115 static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
117 struct ibmvfc_host *vhost = shost_priv(sdev->host);
118 - struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
119 + struct scsi_target *starget = scsi_target(sdev);
120 + struct fc_rport *rport = starget_to_rport(starget);
121 struct ibmvfc_tmf *tmf;
122 struct ibmvfc_event *evt, *found_evt;
124 @@ -1828,7 +1836,7 @@ static int ibmvfc_cancel_all(struct scsi
125 int_to_scsilun(sdev->lun, &tmf->lun);
126 tmf->flags = (type | IBMVFC_TMF_LUA_VALID);
127 tmf->cancel_key = (unsigned long)sdev->hostdata;
128 - tmf->my_cancel_key = (IBMVFC_TMF_CANCEL_KEY | (unsigned long)sdev->hostdata);
129 + tmf->my_cancel_key = (unsigned long)starget->hostdata;
132 init_completion(&evt->comp);
133 @@ -1860,6 +1868,91 @@ static int ibmvfc_cancel_all(struct scsi
137 + * ibmvfc_match_target - Match function for specified target
138 + * @evt: ibmvfc event struct
139 + * @device: device to match (starget)
142 + * 1 if event matches starget / 0 if event does not match starget
144 +static int ibmvfc_match_target(struct ibmvfc_event *evt, void *device)
146 + if (evt->cmnd && scsi_target(evt->cmnd->device) == device)
152 + * ibmvfc_match_lun - Match function for specified LUN
153 + * @evt: ibmvfc event struct
154 + * @device: device to match (sdev)
157 + * 1 if event matches sdev / 0 if event does not match sdev
159 +static int ibmvfc_match_lun(struct ibmvfc_event *evt, void *device)
161 + if (evt->cmnd && evt->cmnd->device == device)
167 + * ibmvfc_wait_for_ops - Wait for ops to complete
168 + * @vhost: ibmvfc host struct
169 + * @device: device to match (starget or sdev)
170 + * @match: match function
175 +static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
176 + int (*match) (struct ibmvfc_event *, void *))
178 + struct ibmvfc_event *evt;
179 + DECLARE_COMPLETION_ONSTACK(comp);
181 + unsigned long flags;
182 + signed long timeout = init_timeout * HZ;
187 + spin_lock_irqsave(vhost->host->host_lock, flags);
188 + list_for_each_entry(evt, &vhost->sent, queue) {
189 + if (match(evt, device)) {
190 + evt->eh_comp = ∁
194 + spin_unlock_irqrestore(vhost->host->host_lock, flags);
197 + timeout = wait_for_completion_timeout(&comp, timeout);
201 + spin_lock_irqsave(vhost->host->host_lock, flags);
202 + list_for_each_entry(evt, &vhost->sent, queue) {
203 + if (match(evt, device)) {
204 + evt->eh_comp = NULL;
208 + spin_unlock_irqrestore(vhost->host->host_lock, flags);
210 + dev_err(vhost->dev, "Timed out waiting for aborted commands\n");
212 + return wait ? FAILED : SUCCESS;
222 * ibmvfc_eh_abort_handler - Abort a command
223 * @cmd: scsi command to abort
225 @@ -1868,29 +1961,21 @@ static int ibmvfc_cancel_all(struct scsi
227 static int ibmvfc_eh_abort_handler(struct scsi_cmnd *cmd)
229 - struct ibmvfc_host *vhost = shost_priv(cmd->device->host);
230 - struct ibmvfc_event *evt, *pos;
231 + struct scsi_device *sdev = cmd->device;
232 + struct ibmvfc_host *vhost = shost_priv(sdev->host);
233 int cancel_rc, abort_rc;
234 - unsigned long flags;
238 ibmvfc_wait_while_resetting(vhost);
239 - cancel_rc = ibmvfc_cancel_all(cmd->device, IBMVFC_TMF_ABORT_TASK_SET);
240 - abort_rc = ibmvfc_abort_task_set(cmd->device);
241 + cancel_rc = ibmvfc_cancel_all(sdev, IBMVFC_TMF_ABORT_TASK_SET);
242 + abort_rc = ibmvfc_abort_task_set(sdev);
244 - if (!cancel_rc && !abort_rc) {
245 - spin_lock_irqsave(vhost->host->host_lock, flags);
246 - list_for_each_entry_safe(evt, pos, &vhost->sent, queue) {
247 - if (evt->cmnd && evt->cmnd->device == cmd->device)
248 - ibmvfc_fail_request(evt, DID_ABORT);
250 - spin_unlock_irqrestore(vhost->host->host_lock, flags);
254 + if (!cancel_rc && !abort_rc)
255 + rc = ibmvfc_wait_for_ops(vhost, sdev, ibmvfc_match_lun);
263 @@ -1902,29 +1987,21 @@ static int ibmvfc_eh_abort_handler(struc
265 static int ibmvfc_eh_device_reset_handler(struct scsi_cmnd *cmd)
267 - struct ibmvfc_host *vhost = shost_priv(cmd->device->host);
268 - struct ibmvfc_event *evt, *pos;
269 + struct scsi_device *sdev = cmd->device;
270 + struct ibmvfc_host *vhost = shost_priv(sdev->host);
271 int cancel_rc, reset_rc;
272 - unsigned long flags;
276 ibmvfc_wait_while_resetting(vhost);
277 - cancel_rc = ibmvfc_cancel_all(cmd->device, IBMVFC_TMF_LUN_RESET);
278 - reset_rc = ibmvfc_reset_device(cmd->device, IBMVFC_LUN_RESET, "LUN");
279 + cancel_rc = ibmvfc_cancel_all(sdev, IBMVFC_TMF_LUN_RESET);
280 + reset_rc = ibmvfc_reset_device(sdev, IBMVFC_LUN_RESET, "LUN");
282 - if (!cancel_rc && !reset_rc) {
283 - spin_lock_irqsave(vhost->host->host_lock, flags);
284 - list_for_each_entry_safe(evt, pos, &vhost->sent, queue) {
285 - if (evt->cmnd && evt->cmnd->device == cmd->device)
286 - ibmvfc_fail_request(evt, DID_ABORT);
288 - spin_unlock_irqrestore(vhost->host->host_lock, flags);
292 + if (!cancel_rc && !reset_rc)
293 + rc = ibmvfc_wait_for_ops(vhost, sdev, ibmvfc_match_lun);
301 @@ -1960,31 +2037,23 @@ static void ibmvfc_dev_abort_all(struct
303 static int ibmvfc_eh_target_reset_handler(struct scsi_cmnd *cmd)
305 - struct ibmvfc_host *vhost = shost_priv(cmd->device->host);
306 - struct scsi_target *starget = scsi_target(cmd->device);
307 - struct ibmvfc_event *evt, *pos;
308 + struct scsi_device *sdev = cmd->device;
309 + struct ibmvfc_host *vhost = shost_priv(sdev->host);
310 + struct scsi_target *starget = scsi_target(sdev);
313 unsigned long cancel_rc = 0;
314 - unsigned long flags;
317 ibmvfc_wait_while_resetting(vhost);
318 starget_for_each_device(starget, &cancel_rc, ibmvfc_dev_cancel_all);
319 - reset_rc = ibmvfc_reset_device(cmd->device, IBMVFC_TARGET_RESET, "target");
320 + reset_rc = ibmvfc_reset_device(sdev, IBMVFC_TARGET_RESET, "target");
322 - if (!cancel_rc && !reset_rc) {
323 - spin_lock_irqsave(vhost->host->host_lock, flags);
324 - list_for_each_entry_safe(evt, pos, &vhost->sent, queue) {
325 - if (evt->cmnd && scsi_target(evt->cmnd->device) == starget)
326 - ibmvfc_fail_request(evt, DID_ABORT);
328 - spin_unlock_irqrestore(vhost->host->host_lock, flags);
332 + if (!cancel_rc && !reset_rc)
333 + rc = ibmvfc_wait_for_ops(vhost, starget, ibmvfc_match_target);
341 @@ -2014,23 +2083,18 @@ static void ibmvfc_terminate_rport_io(st
342 struct scsi_target *starget = to_scsi_target(&rport->dev);
343 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
344 struct ibmvfc_host *vhost = shost_priv(shost);
345 - struct ibmvfc_event *evt, *pos;
346 unsigned long cancel_rc = 0;
347 unsigned long abort_rc = 0;
348 - unsigned long flags;
352 starget_for_each_device(starget, &cancel_rc, ibmvfc_dev_cancel_all);
353 starget_for_each_device(starget, &abort_rc, ibmvfc_dev_abort_all);
355 - if (!cancel_rc && !abort_rc) {
356 - spin_lock_irqsave(shost->host_lock, flags);
357 - list_for_each_entry_safe(evt, pos, &vhost->sent, queue) {
358 - if (evt->cmnd && scsi_target(evt->cmnd->device) == starget)
359 - ibmvfc_fail_request(evt, DID_ABORT);
361 - spin_unlock_irqrestore(shost->host_lock, flags);
363 + if (!cancel_rc && !abort_rc)
364 + rc = ibmvfc_wait_for_ops(vhost, starget, ibmvfc_match_target);
367 ibmvfc_issue_fc_host_lip(shost);
370 @@ -2266,6 +2330,28 @@ static int ibmvfc_slave_alloc(struct scs
374 + * ibmvfc_target_alloc - Setup the target's task set value
375 + * @starget: struct scsi_target
377 + * Set the target's task set value so that error handling works as
381 + * 0 on success / -ENXIO if device does not exist
383 +static int ibmvfc_target_alloc(struct scsi_target *starget)
385 + struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
386 + struct ibmvfc_host *vhost = shost_priv(shost);
387 + unsigned long flags = 0;
389 + spin_lock_irqsave(shost->host_lock, flags);
390 + starget->hostdata = (void *)(unsigned long)vhost->task_set++;
391 + spin_unlock_irqrestore(shost->host_lock, flags);
396 * ibmvfc_slave_configure - Configure the device
397 * @sdev: struct scsi_device device to configure
399 @@ -2544,6 +2630,7 @@ static struct scsi_host_template driver_
400 .eh_host_reset_handler = ibmvfc_eh_host_reset_handler,
401 .slave_alloc = ibmvfc_slave_alloc,
402 .slave_configure = ibmvfc_slave_configure,
403 + .target_alloc = ibmvfc_target_alloc,
404 .scan_finished = ibmvfc_scan_finished,
405 .change_queue_depth = ibmvfc_change_queue_depth,
406 .change_queue_type = ibmvfc_change_queue_type,
407 @@ -2640,7 +2727,7 @@ static irqreturn_t ibmvfc_interrupt(int
408 } else if ((async = ibmvfc_next_async_crq(vhost)) != NULL) {
409 vio_disable_interrupts(vdev);
410 ibmvfc_handle_async(async, vhost);
416 @@ -2711,6 +2798,8 @@ static void ibmvfc_tgt_prli_done(struct
417 rsp->status, rsp->error, status);
418 if (ibmvfc_retry_cmd(rsp->status, rsp->error))
419 ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_prli);
421 + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
425 @@ -2805,6 +2894,8 @@ static void ibmvfc_tgt_plogi_done(struct
427 if (ibmvfc_retry_cmd(rsp->status, rsp->error))
428 ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_send_plogi);
430 + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
434 @@ -3096,6 +3187,8 @@ static void ibmvfc_tgt_query_target_done
435 ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
436 else if (ibmvfc_retry_cmd(rsp->status, rsp->error))
437 ibmvfc_retry_tgt_init(tgt, ibmvfc_tgt_query_target);
439 + ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
443 @@ -3426,6 +3519,7 @@ static int __ibmvfc_work_to_do(struct ib
444 case IBMVFC_HOST_ACTION_ALLOC_TGTS:
445 case IBMVFC_HOST_ACTION_TGT_ADD:
446 case IBMVFC_HOST_ACTION_TGT_DEL:
447 + case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
448 case IBMVFC_HOST_ACTION_QUERY:
451 @@ -3547,6 +3641,7 @@ static void ibmvfc_do_work(struct ibmvfc
452 ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_DEL);
454 case IBMVFC_HOST_ACTION_TGT_DEL:
455 + case IBMVFC_HOST_ACTION_TGT_DEL_FAILED:
456 list_for_each_entry(tgt, &vhost->targets, queue) {
457 if (tgt->action == IBMVFC_TGT_ACTION_DEL_RPORT) {
458 tgt_dbg(tgt, "Deleting rport\n");
459 @@ -3562,8 +3657,17 @@ static void ibmvfc_do_work(struct ibmvfc
462 if (vhost->state == IBMVFC_INITIALIZING) {
463 - ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
464 - vhost->job_step = ibmvfc_discover_targets;
465 + if (vhost->action == IBMVFC_HOST_ACTION_TGT_DEL_FAILED) {
466 + ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
467 + ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_ADD);
468 + vhost->init_retries = 0;
469 + spin_unlock_irqrestore(vhost->host->host_lock, flags);
470 + scsi_unblock_requests(vhost->host);
473 + ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_INIT);
474 + vhost->job_step = ibmvfc_discover_targets;
477 ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE);
478 spin_unlock_irqrestore(vhost->host->host_lock, flags);
479 @@ -3586,14 +3690,8 @@ static void ibmvfc_do_work(struct ibmvfc
483 - if (!ibmvfc_dev_init_to_do(vhost)) {
484 - ibmvfc_set_host_state(vhost, IBMVFC_ACTIVE);
485 - ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_ADD);
486 - vhost->init_retries = 0;
487 - spin_unlock_irqrestore(vhost->host->host_lock, flags);
488 - scsi_unblock_requests(vhost->host);
491 + if (!ibmvfc_dev_init_to_do(vhost))
492 + ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_TGT_DEL_FAILED);
494 case IBMVFC_HOST_ACTION_TGT_ADD:
495 list_for_each_entry(tgt, &vhost->targets, queue) {
496 @@ -3601,16 +3699,6 @@ static void ibmvfc_do_work(struct ibmvfc
497 spin_unlock_irqrestore(vhost->host->host_lock, flags);
498 ibmvfc_tgt_add_rport(tgt);
500 - } else if (tgt->action == IBMVFC_TGT_ACTION_DEL_RPORT) {
501 - tgt_dbg(tgt, "Deleting rport\n");
502 - rport = tgt->rport;
504 - list_del(&tgt->queue);
505 - spin_unlock_irqrestore(vhost->host->host_lock, flags);
507 - fc_remote_port_delete(rport);
508 - kref_put(&tgt->kref, ibmvfc_release_tgt);
513 --- a/drivers/scsi/ibmvscsi/ibmvfc.h
514 +++ b/drivers/scsi/ibmvscsi/ibmvfc.h
518 #define IBMVFC_NAME "ibmvfc"
519 -#define IBMVFC_DRIVER_VERSION "1.0.2"
520 -#define IBMVFC_DRIVER_DATE "(August 14, 2008)"
521 +#define IBMVFC_DRIVER_VERSION "1.0.4"
522 +#define IBMVFC_DRIVER_DATE "(November 14, 2008)"
524 #define IBMVFC_DEFAULT_TIMEOUT 15
525 #define IBMVFC_INIT_TIMEOUT 120
526 @@ -110,6 +110,7 @@ enum ibmvfc_vios_errors {
527 IBMVFC_TRANS_CANCELLED = 0x0006,
528 IBMVFC_TRANS_CANCELLED_IMPLICIT = 0x0007,
529 IBMVFC_INSUFFICIENT_RESOURCE = 0x0008,
530 + IBMVFC_PLOGI_REQUIRED = 0x0010,
531 IBMVFC_COMMAND_FAILED = 0x8000,
534 @@ -338,7 +339,6 @@ struct ibmvfc_tmf {
535 #define IBMVFC_TMF_LUA_VALID 0x40
538 -#define IBMVFC_TMF_CANCEL_KEY 0x80000000
541 }__attribute__((packed, aligned (8)));
542 @@ -525,10 +525,10 @@ enum ibmvfc_async_event {
549 + volatile u8 format;
553 }__attribute__((packed, aligned (8)));
555 struct ibmvfc_crq_queue {
556 @@ -538,13 +538,13 @@ struct ibmvfc_crq_queue {
559 struct ibmvfc_async_crq {
568 + volatile u64 event;
569 + volatile u64 scsi_id;
571 + volatile u64 node_name;
573 }__attribute__((packed, aligned (8)));
575 @@ -607,6 +607,7 @@ struct ibmvfc_event {
576 struct srp_direct_buf *ext_list;
577 dma_addr_t ext_list_token;
578 struct completion comp;
579 + struct completion *eh_comp;
580 struct timer_list timer;
583 @@ -627,6 +628,7 @@ enum ibmvfc_host_action {
584 IBMVFC_HOST_ACTION_TGT_DEL,
585 IBMVFC_HOST_ACTION_ALLOC_TGTS,
586 IBMVFC_HOST_ACTION_TGT_INIT,
587 + IBMVFC_HOST_ACTION_TGT_DEL_FAILED,
588 IBMVFC_HOST_ACTION_TGT_ADD,
591 @@ -702,7 +704,7 @@ struct ibmvfc_host {
593 #define ibmvfc_log(vhost, level, ...) \
595 - if (level >= (vhost)->log_level) \
596 + if ((vhost)->log_level >= level) \
597 dev_err((vhost)->dev, ##__VA_ARGS__); \