--- /dev/null
+Subject: reset the adapter on fatal error
+From: Divy Le Ray <divy@chelsio.com>
+References: 466062 - LTC51042
+
+when a fatal error occurs, bring ports down, reset the chip,
+and bring ports back up.
+
+Factorize code used for both EEH and fatal error recovery.
+Fix timer usage when bringing up/resetting sge queue sets.
+
+Signed-off-by: Divy Le Ray <divy@chelsio.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Olaf Hering <olh@suse.de>
+---
+ drivers/net/cxgb3/adapter.h | 1
+ drivers/net/cxgb3/common.h | 1
+ drivers/net/cxgb3/cxgb3_main.c | 166 +++++++++++++++++++++++++++--------------
+ drivers/net/cxgb3/sge.c | 9 --
+ drivers/net/cxgb3/t3_hw.c | 4
+ 5 files changed, 120 insertions(+), 61 deletions(-)
+
+--- a/drivers/net/cxgb3/adapter.h
++++ b/drivers/net/cxgb3/adapter.h
+@@ -241,6 +241,7 @@ struct adapter {
+ unsigned int check_task_cnt;
+ struct delayed_work adap_check_task;
+ struct work_struct ext_intr_handler_task;
++ struct work_struct fatal_error_handler_task;
+
+ struct dentry *debugfs_root;
+
+--- a/drivers/net/cxgb3/common.h
++++ b/drivers/net/cxgb3/common.h
+@@ -726,6 +726,7 @@ int t3_check_fw_version(struct adapter *
+ int t3_init_hw(struct adapter *adapter, u32 fw_params);
+ void mac_prep(struct cmac *mac, struct adapter *adapter, int index);
+ void early_hw_init(struct adapter *adapter, const struct adapter_info *ai);
++int t3_reset_adapter(struct adapter *adapter);
+ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
+ int reset);
+ int t3_replay_prep_adapter(struct adapter *adapter);
+--- a/drivers/net/cxgb3/cxgb3_main.c
++++ b/drivers/net/cxgb3/cxgb3_main.c
+@@ -1016,6 +1016,13 @@ static int cxgb_up(struct adapter *adap)
+ goto out;
+ }
+
++ /*
++ * Clear interrupts now to catch errors if t3_init_hw fails.
++ * We clear them again later as initialization may trigger
++ * conditions that can interrupt.
++ */
++ t3_intr_clear(adap);
++
+ err = t3_init_hw(adap, 0);
+ if (err)
+ goto out;
+@@ -1224,9 +1231,9 @@ static int cxgb_close(struct net_device
+ if (is_offload(adapter) && !ofld_disable)
+ sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group);
+
+- spin_lock(&adapter->work_lock); /* sync with update task */
++ spin_lock_irq(&adapter->work_lock); /* sync with update task */
+ clear_bit(pi->port_id, &adapter->open_device_map);
+- spin_unlock(&adapter->work_lock);
++ spin_unlock_irq(&adapter->work_lock);
+
+ if (!(adapter->open_device_map & PORT_MASK))
+ cancel_rearming_delayed_workqueue(cxgb3_wq,
+@@ -2555,10 +2562,10 @@ static void t3_adap_check_task(struct wo
+ check_t3b2_mac(adapter);
+
+ /* Schedule the next check update if any port is active. */
+- spin_lock(&adapter->work_lock);
++ spin_lock_irq(&adapter->work_lock);
+ if (adapter->open_device_map & PORT_MASK)
+ schedule_chk_task(adapter);
+- spin_unlock(&adapter->work_lock);
++ spin_unlock_irq(&adapter->work_lock);
+ }
+
+ /*
+@@ -2603,6 +2610,96 @@ void t3_os_ext_intr_handler(struct adapt
+ spin_unlock(&adapter->work_lock);
+ }
+
++static int t3_adapter_error(struct adapter *adapter, int reset)
++{
++ int i, ret = 0;
++
++ /* Stop all ports */
++ for_each_port(adapter, i) {
++ struct net_device *netdev = adapter->port[i];
++
++ if (netif_running(netdev))
++ cxgb_close(netdev);
++ }
++
++ if (is_offload(adapter) &&
++ test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
++ offload_close(&adapter->tdev);
++
++ /* Stop SGE timers */
++ t3_stop_sge_timers(adapter);
++
++ adapter->flags &= ~FULL_INIT_DONE;
++
++ if (reset)
++ ret = t3_reset_adapter(adapter);
++
++ pci_disable_device(adapter->pdev);
++
++ return ret;
++}
++
++static int t3_reenable_adapter(struct adapter *adapter)
++{
++ if (pci_enable_device(adapter->pdev)) {
++ dev_err(&adapter->pdev->dev,
++ "Cannot re-enable PCI device after reset.\n");
++ goto err;
++ }
++ pci_set_master(adapter->pdev);
++ pci_restore_state(adapter->pdev);
++
++ /* Free sge resources */
++ t3_free_sge_resources(adapter);
++
++ if (t3_replay_prep_adapter(adapter))
++ goto err;
++
++ return 0;
++err:
++ return -1;
++}
++
++static void t3_resume_ports(struct adapter *adapter)
++{
++ int i;
++
++ /* Restart the ports */
++ for_each_port(adapter, i) {
++ struct net_device *netdev = adapter->port[i];
++
++ if (netif_running(netdev)) {
++ if (cxgb_open(netdev)) {
++ dev_err(&adapter->pdev->dev,
++ "can't bring device back up"
++ " after reset\n");
++ continue;
++ }
++ }
++ }
++}
++
++/*
++ * processes a fatal error.
++ * Bring the ports down, reset the chip, bring the ports back up.
++ */
++static void fatal_error_task(struct work_struct *work)
++{
++ struct adapter *adapter = container_of(work, struct adapter,
++ fatal_error_handler_task);
++ int err = 0;
++
++ rtnl_lock();
++ err = t3_adapter_error(adapter, 1);
++ if (!err)
++ err = t3_reenable_adapter(adapter);
++ if (!err)
++ t3_resume_ports(adapter);
++
++ CH_ALERT(adapter, "adapter reset %s\n", err ? "failed" : "succeeded");
++ rtnl_unlock();
++}
++
+ void t3_fatal_err(struct adapter *adapter)
+ {
+ unsigned int fw_status[4];
+@@ -2613,7 +2710,11 @@ void t3_fatal_err(struct adapter *adapte
+ t3_write_reg(adapter, A_XGM_RX_CTRL, 0);
+ t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0);
+ t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0);
++
++ spin_lock(&adapter->work_lock);
+ t3_intr_disable(adapter);
++ queue_work(cxgb3_wq, &adapter->fatal_error_handler_task);
++ spin_unlock(&adapter->work_lock);
+ }
+ CH_ALERT(adapter, "encountered fatal error, operation suspended\n");
+ if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status))
+@@ -2635,26 +2736,9 @@ static pci_ers_result_t t3_io_error_dete
+ pci_channel_state_t state)
+ {
+ struct adapter *adapter = pci_get_drvdata(pdev);
+- int i;
+-
+- /* Stop all ports */
+- for_each_port(adapter, i) {
+- struct net_device *netdev = adapter->port[i];
+-
+- if (netif_running(netdev))
+- cxgb_close(netdev);
+- }
+-
+- if (is_offload(adapter) &&
+- test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
+- offload_close(&adapter->tdev);
+-
+- /* Stop SGE timers */
+- t3_stop_sge_timers(adapter);
+-
+- adapter->flags &= ~FULL_INIT_DONE;
+-
+- pci_disable_device(pdev);
++ int ret;
++
++ ret = t3_adapter_error(adapter, 0);
+
+ /* Request a slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+@@ -2670,22 +2754,9 @@ static pci_ers_result_t t3_io_slot_reset
+ {
+ struct adapter *adapter = pci_get_drvdata(pdev);
+
+- if (pci_enable_device(pdev)) {
+- dev_err(&pdev->dev,
+- "Cannot re-enable PCI device after reset.\n");
+- goto err;
+- }
+- pci_set_master(pdev);
+- pci_restore_state(pdev);
+-
+- /* Free sge resources */
+- t3_free_sge_resources(adapter);
+-
+- if (t3_replay_prep_adapter(adapter))
+- goto err;
++ if (!t3_reenable_adapter(adapter))
++ return PCI_ERS_RESULT_RECOVERED;
+
+- return PCI_ERS_RESULT_RECOVERED;
+-err:
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+@@ -2699,22 +2770,8 @@ err:
+ static void t3_io_resume(struct pci_dev *pdev)
+ {
+ struct adapter *adapter = pci_get_drvdata(pdev);
+- int i;
+-
+- /* Restart the ports */
+- for_each_port(adapter, i) {
+- struct net_device *netdev = adapter->port[i];
+
+- if (netif_running(netdev)) {
+- if (cxgb_open(netdev)) {
+- dev_err(&pdev->dev,
+- "can't bring device back up"
+- " after reset\n");
+- continue;
+- }
+- netif_device_attach(netdev);
+- }
+- }
++ t3_resume_ports(adapter);
+ }
+
+ static struct pci_error_handlers t3_err_handler = {
+@@ -2899,6 +2956,7 @@ static int __devinit init_one(struct pci
+
+ INIT_LIST_HEAD(&adapter->adapter_list);
+ INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task);
++ INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task);
+ INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task);
+
+ for (i = 0; i < ai->nports; ++i) {
+--- a/drivers/net/cxgb3/sge.c
++++ b/drivers/net/cxgb3/sge.c
+@@ -352,7 +352,8 @@ static void free_rx_bufs(struct pci_dev
+ pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
+ q->buf_size, PCI_DMA_FROMDEVICE);
+ if (q->use_pages) {
+- put_page(d->pg_chunk.page);
++ if (d->pg_chunk.page)
++ put_page(d->pg_chunk.page);
+ d->pg_chunk.page = NULL;
+ } else {
+ kfree_skb(d->skb);
+@@ -584,7 +585,7 @@ static void t3_reset_qset(struct sge_qse
+ memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
+ memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
+ q->txq_stopped = 0;
+- memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
++ q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
+ kfree(q->lro_frag_tbl);
+ q->lro_nfrags = q->lro_frag_len = 0;
+ }
+@@ -2900,9 +2901,7 @@ int t3_sge_alloc_qset(struct adapter *ad
+ struct net_lro_mgr *lro_mgr = &q->lro_mgr;
+
+ init_qset_cntxt(q, id);
+- init_timer(&q->tx_reclaim_timer);
+- q->tx_reclaim_timer.data = (unsigned long)q;
+- q->tx_reclaim_timer.function = sge_timer_cb;
++ setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
+
+ q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
+ sizeof(struct rx_desc),
+--- a/drivers/net/cxgb3/t3_hw.c
++++ b/drivers/net/cxgb3/t3_hw.c
+@@ -1275,7 +1275,7 @@ struct intr_info {
+ unsigned int mask; /* bits to check in interrupt status */
+ const char *msg; /* message to print or NULL */
+ short stat_idx; /* stat counter to increment or -1 */
+- unsigned short fatal:1; /* whether the condition reported is fatal */
++ unsigned short fatal; /* whether the condition reported is fatal */
+ };
+
+ /**
+@@ -3551,7 +3551,7 @@ void early_hw_init(struct adapter *adapt
+ * Older PCIe cards lose their config space during reset, PCI-X
+ * ones don't.
+ */
+-static int t3_reset_adapter(struct adapter *adapter)
++int t3_reset_adapter(struct adapter *adapter)
+ {
+ int i, save_and_restore_pcie =
+ adapter->params.rev < T3_REV_B2 && is_pcie(adapter);