From: Hannes Reinecke Subject: cciss: Ignore stale commands after reboot References: bnc#513437 Patch-Mainline: yes When doing an unexpected shutdown like kexec the cciss firmware might still have some commands in flight, which it is trying to complete. The driver is doing it's best on resetting the HBA, but sadly there's a firmware issue causing the firmware _not_ to abort or drop old commands. So the firmware will send us commands which we haven't accounted for, causing the driver to panic. This patch also updates the queue handling to using kernel-provided hashed lists; without it we wouldn't be able to detect stale commands at all. Queue handling is backported from mainline. Signed-off-by: Hannes Reinecke diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d7ec8e4..28d3f3d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -214,31 +214,27 @@ static struct block_device_operations cciss_fops = { /* * Enqueuing and dequeuing functions for cmdlists. */ -static inline void addQ(CommandList_struct **Qptr, CommandList_struct *c) +static inline void addQ(struct hlist_head *list, CommandList_struct *c) { - if (*Qptr == NULL) { - *Qptr = c; - c->next = c->prev = c; - } else { - c->prev = (*Qptr)->prev; - c->next = (*Qptr); - (*Qptr)->prev->next = c; - (*Qptr)->prev = c; - } + hlist_add_head(&c->list, list); } -static inline CommandList_struct *removeQ(CommandList_struct **Qptr, - CommandList_struct *c) +static inline void removeQ(CommandList_struct *c) { - if (c && c->next != c) { - if (*Qptr == c) - *Qptr = c->next; - c->prev->next = c->next; - c->next->prev = c->prev; - } else { - *Qptr = NULL; + /* + * After kexec/dump some commands might still + * be in flight, which the firmware will try + * to complete. Resetting the firmware doesn't work + * with old fw revisions, so we have to mark + * them off as 'stale' to prevent the driver from + * falling over. + */ + if (unlikely(hlist_unhashed(&c->list))) { + c->cmd_type = CMD_MSG_STALE; + return; } - return c; + + hlist_del_init(&c->list); } #include "cciss_scsi.c" /* For SCSI tape support */ @@ -505,6 +501,7 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool) c->cmdindex = i; } + INIT_HLIST_NODE(&c->list); c->busaddr = (__u32) cmd_dma_handle; temp64.val = (__u64) err_dma_handle; c->ErrDesc.Addr.lower = temp64.val32.lower; @@ -2549,7 +2548,8 @@ static void start_io(ctlr_info_t *h) { CommandList_struct *c; - while ((c = h->reqQ) != NULL) { + while (!hlist_empty(&h->reqQ)) { + c = hlist_entry(h->reqQ.first, CommandList_struct, list); /* can't do anything if fifo is full */ if ((h->access.fifo_full(h))) { printk(KERN_WARNING "cciss: fifo full\n"); @@ -2557,14 +2557,14 @@ static void start_io(ctlr_info_t *h) } /* Get the first entry from the Request Q */ - removeQ(&(h->reqQ), c); + removeQ(c); h->Qdepth--; /* Tell the controller execute command */ h->access.submit_command(h, c); /* Put job onto the completed Q */ - addQ(&(h->cmpQ), c); + addQ(&h->cmpQ, c); } } @@ -2577,7 +2577,7 @@ static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c) memset(c->err_info, 0, sizeof(ErrorInfo_struct)); /* add it to software queue and then send it to the controller */ - addQ(&(h->reqQ), c); + addQ(&h->reqQ, c); h->Qdepth++; if (h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; @@ -2898,7 +2898,7 @@ static void do_cciss_request(struct request_queue *q) spin_lock_irq(q->queue_lock); - addQ(&(h->reqQ), c); + addQ(&h->reqQ, c); h->Qdepth++; if (h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; @@ -2986,16 +2986,12 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) a = c->busaddr; } else { + struct hlist_node *tmp; + a &= ~3; - if ((c = h->cmpQ) == NULL) { - printk(KERN_WARNING - "cciss: Completion of %08x ignored\n", - a1); - continue; - } - while (c->busaddr != a) { - c = c->next; - if (c == h->cmpQ) + c = NULL; + hlist_for_each_entry(c, tmp, &h->cmpQ, list) { + if (c->busaddr == a) break; } } @@ -3003,8 +2999,8 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id) * If we've found the command, take it off the * completion Q and free it */ - if (c->busaddr == a) { - removeQ(&h->cmpQ, c); + if (c && c->busaddr == a) { + removeQ(c); if (c->cmd_type == CMD_RWREQ) { complete_command(h, c, 0); } else if (c->cmd_type == CMD_IOCTL_PEND) { @@ -3423,6 +3419,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, return -1; hba[i]->busy_initializing = 1; + INIT_HLIST_HEAD(&hba[i]->cmpQ); + INIT_HLIST_HEAD(&hba[i]->reqQ); if (cciss_pci_init(hba[i], pdev) != 0) goto clean1; @@ -3730,16 +3728,19 @@ static void fail_all_cmds(unsigned long ctlr) pci_disable_device(h->pdev); /* Make sure it is really dead. */ /* move everything off the request queue onto the completed queue */ - while ((c = h->reqQ) != NULL) { - removeQ(&(h->reqQ), c); + while (!hlist_empty(&h->reqQ)) { + c = hlist_entry(h->reqQ.first, CommandList_struct, list); + removeQ(c); h->Qdepth--; - addQ(&(h->cmpQ), c); + addQ(&h->cmpQ, c); } /* Now, fail everything on the completed queue with a HW error */ - while ((c = h->cmpQ) != NULL) { - removeQ(&h->cmpQ, c); - c->err_info->CommandStatus = CMD_HARDWARE_ERR; + while (!hlist_empty(&h->cmpQ)) { + c = hlist_entry(h->cmpQ.first, CommandList_struct, list); + removeQ(c); + if (c->cmd_type != CMD_MSG_STALE) + c->err_info->CommandStatus = CMD_HARDWARE_ERR; if (c->cmd_type == CMD_RWREQ) { complete_command(h, c, 0); } else if (c->cmd_type == CMD_IOCTL_PEND) diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 24a7efa..15e2b84 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -89,8 +89,8 @@ struct ctlr_info struct access_method access; /* queue and queue Info */ - CommandList_struct *reqQ; - CommandList_struct *cmpQ; + struct hlist_head reqQ; + struct hlist_head cmpQ; unsigned int Qdepth; unsigned int maxQsinceinit; unsigned int maxSG; diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index 43bf559..e4ca588 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -249,6 +249,7 @@ typedef struct _ErrorInfo_struct { #define CMD_SCSI 0x03 #define CMD_MSG_DONE 0x04 #define CMD_MSG_TIMEOUT 0x05 +#define CMD_MSG_STALE 0xff /* This structure needs to be divisible by 8 for new * indexing method. @@ -265,8 +266,7 @@ typedef struct _CommandList_struct { int ctlr; int cmd_type; long cmdindex; - struct _CommandList_struct *prev; - struct _CommandList_struct *next; + struct hlist_node list; struct request * rq; struct completion *waiting; int retry_count;