+++ /dev/null
-Subject: reset the adapter on fatal error
-From: Divy Le Ray <divy@chelsio.com>
-References: 466062 - LTC51042
-
-when a fatal error occurs, bring ports down, reset the chip,
-and bring ports back up.
-
-Factorize code used for both EEH and fatal error recovery.
-Fix timer usage when bringing up/resetting sge queue sets.
-
-Signed-off-by: Divy Le Ray <divy@chelsio.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Olaf Hering <olh@suse.de>
----
- drivers/net/cxgb3/adapter.h | 1
- drivers/net/cxgb3/common.h | 1
- drivers/net/cxgb3/cxgb3_main.c | 166 +++++++++++++++++++++++++++--------------
- drivers/net/cxgb3/sge.c | 9 --
- drivers/net/cxgb3/t3_hw.c | 4
- 5 files changed, 120 insertions(+), 61 deletions(-)
-
---- a/drivers/net/cxgb3/adapter.h
-+++ b/drivers/net/cxgb3/adapter.h
-@@ -241,6 +241,7 @@ struct adapter {
- unsigned int check_task_cnt;
- struct delayed_work adap_check_task;
- struct work_struct ext_intr_handler_task;
-+ struct work_struct fatal_error_handler_task;
-
- struct dentry *debugfs_root;
-
---- a/drivers/net/cxgb3/common.h
-+++ b/drivers/net/cxgb3/common.h
-@@ -726,6 +726,7 @@ int t3_check_fw_version(struct adapter *
- int t3_init_hw(struct adapter *adapter, u32 fw_params);
- void mac_prep(struct cmac *mac, struct adapter *adapter, int index);
- void early_hw_init(struct adapter *adapter, const struct adapter_info *ai);
-+int t3_reset_adapter(struct adapter *adapter);
- int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
- int reset);
- int t3_replay_prep_adapter(struct adapter *adapter);
---- a/drivers/net/cxgb3/cxgb3_main.c
-+++ b/drivers/net/cxgb3/cxgb3_main.c
-@@ -1016,6 +1016,13 @@ static int cxgb_up(struct adapter *adap)
- goto out;
- }
-
-+ /*
-+ * Clear interrupts now to catch errors if t3_init_hw fails.
-+ * We clear them again later as initialization may trigger
-+ * conditions that can interrupt.
-+ */
-+ t3_intr_clear(adap);
-+
- err = t3_init_hw(adap, 0);
- if (err)
- goto out;
-@@ -1224,9 +1231,9 @@ static int cxgb_close(struct net_device
- if (is_offload(adapter) && !ofld_disable)
- sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group);
-
-- spin_lock(&adapter->work_lock); /* sync with update task */
-+ spin_lock_irq(&adapter->work_lock); /* sync with update task */
- clear_bit(pi->port_id, &adapter->open_device_map);
-- spin_unlock(&adapter->work_lock);
-+ spin_unlock_irq(&adapter->work_lock);
-
- if (!(adapter->open_device_map & PORT_MASK))
- cancel_rearming_delayed_workqueue(cxgb3_wq,
-@@ -2555,10 +2562,10 @@ static void t3_adap_check_task(struct wo
- check_t3b2_mac(adapter);
-
- /* Schedule the next check update if any port is active. */
-- spin_lock(&adapter->work_lock);
-+ spin_lock_irq(&adapter->work_lock);
- if (adapter->open_device_map & PORT_MASK)
- schedule_chk_task(adapter);
-- spin_unlock(&adapter->work_lock);
-+ spin_unlock_irq(&adapter->work_lock);
- }
-
- /*
-@@ -2603,6 +2610,96 @@ void t3_os_ext_intr_handler(struct adapt
- spin_unlock(&adapter->work_lock);
- }
-
-+static int t3_adapter_error(struct adapter *adapter, int reset)
-+{
-+ int i, ret = 0;
-+
-+ /* Stop all ports */
-+ for_each_port(adapter, i) {
-+ struct net_device *netdev = adapter->port[i];
-+
-+ if (netif_running(netdev))
-+ cxgb_close(netdev);
-+ }
-+
-+ if (is_offload(adapter) &&
-+ test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
-+ offload_close(&adapter->tdev);
-+
-+ /* Stop SGE timers */
-+ t3_stop_sge_timers(adapter);
-+
-+ adapter->flags &= ~FULL_INIT_DONE;
-+
-+ if (reset)
-+ ret = t3_reset_adapter(adapter);
-+
-+ pci_disable_device(adapter->pdev);
-+
-+ return ret;
-+}
-+
-+static int t3_reenable_adapter(struct adapter *adapter)
-+{
-+ if (pci_enable_device(adapter->pdev)) {
-+ dev_err(&adapter->pdev->dev,
-+ "Cannot re-enable PCI device after reset.\n");
-+ goto err;
-+ }
-+ pci_set_master(adapter->pdev);
-+ pci_restore_state(adapter->pdev);
-+
-+ /* Free sge resources */
-+ t3_free_sge_resources(adapter);
-+
-+ if (t3_replay_prep_adapter(adapter))
-+ goto err;
-+
-+ return 0;
-+err:
-+ return -1;
-+}
-+
-+static void t3_resume_ports(struct adapter *adapter)
-+{
-+ int i;
-+
-+ /* Restart the ports */
-+ for_each_port(adapter, i) {
-+ struct net_device *netdev = adapter->port[i];
-+
-+ if (netif_running(netdev)) {
-+ if (cxgb_open(netdev)) {
-+ dev_err(&adapter->pdev->dev,
-+ "can't bring device back up"
-+ " after reset\n");
-+ continue;
-+ }
-+ }
-+ }
-+}
-+
-+/*
-+ * processes a fatal error.
-+ * Bring the ports down, reset the chip, bring the ports back up.
-+ */
-+static void fatal_error_task(struct work_struct *work)
-+{
-+ struct adapter *adapter = container_of(work, struct adapter,
-+ fatal_error_handler_task);
-+ int err = 0;
-+
-+ rtnl_lock();
-+ err = t3_adapter_error(adapter, 1);
-+ if (!err)
-+ err = t3_reenable_adapter(adapter);
-+ if (!err)
-+ t3_resume_ports(adapter);
-+
-+ CH_ALERT(adapter, "adapter reset %s\n", err ? "failed" : "succeeded");
-+ rtnl_unlock();
-+}
-+
- void t3_fatal_err(struct adapter *adapter)
- {
- unsigned int fw_status[4];
-@@ -2613,7 +2710,11 @@ void t3_fatal_err(struct adapter *adapte
- t3_write_reg(adapter, A_XGM_RX_CTRL, 0);
- t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0);
- t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0);
-+
-+ spin_lock(&adapter->work_lock);
- t3_intr_disable(adapter);
-+ queue_work(cxgb3_wq, &adapter->fatal_error_handler_task);
-+ spin_unlock(&adapter->work_lock);
- }
- CH_ALERT(adapter, "encountered fatal error, operation suspended\n");
- if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status))
-@@ -2635,26 +2736,9 @@ static pci_ers_result_t t3_io_error_dete
- pci_channel_state_t state)
- {
- struct adapter *adapter = pci_get_drvdata(pdev);
-- int i;
--
-- /* Stop all ports */
-- for_each_port(adapter, i) {
-- struct net_device *netdev = adapter->port[i];
--
-- if (netif_running(netdev))
-- cxgb_close(netdev);
-- }
--
-- if (is_offload(adapter) &&
-- test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
-- offload_close(&adapter->tdev);
--
-- /* Stop SGE timers */
-- t3_stop_sge_timers(adapter);
--
-- adapter->flags &= ~FULL_INIT_DONE;
--
-- pci_disable_device(pdev);
-+ int ret;
-+
-+ ret = t3_adapter_error(adapter, 0);
-
- /* Request a slot reset. */
- return PCI_ERS_RESULT_NEED_RESET;
-@@ -2670,22 +2754,9 @@ static pci_ers_result_t t3_io_slot_reset
- {
- struct adapter *adapter = pci_get_drvdata(pdev);
-
-- if (pci_enable_device(pdev)) {
-- dev_err(&pdev->dev,
-- "Cannot re-enable PCI device after reset.\n");
-- goto err;
-- }
-- pci_set_master(pdev);
-- pci_restore_state(pdev);
--
-- /* Free sge resources */
-- t3_free_sge_resources(adapter);
--
-- if (t3_replay_prep_adapter(adapter))
-- goto err;
-+ if (!t3_reenable_adapter(adapter))
-+ return PCI_ERS_RESULT_RECOVERED;
-
-- return PCI_ERS_RESULT_RECOVERED;
--err:
- return PCI_ERS_RESULT_DISCONNECT;
- }
-
-@@ -2699,22 +2770,8 @@ err:
- static void t3_io_resume(struct pci_dev *pdev)
- {
- struct adapter *adapter = pci_get_drvdata(pdev);
-- int i;
--
-- /* Restart the ports */
-- for_each_port(adapter, i) {
-- struct net_device *netdev = adapter->port[i];
-
-- if (netif_running(netdev)) {
-- if (cxgb_open(netdev)) {
-- dev_err(&pdev->dev,
-- "can't bring device back up"
-- " after reset\n");
-- continue;
-- }
-- netif_device_attach(netdev);
-- }
-- }
-+ t3_resume_ports(adapter);
- }
-
- static struct pci_error_handlers t3_err_handler = {
-@@ -2899,6 +2956,7 @@ static int __devinit init_one(struct pci
-
- INIT_LIST_HEAD(&adapter->adapter_list);
- INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task);
-+ INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task);
- INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task);
-
- for (i = 0; i < ai->nports; ++i) {
---- a/drivers/net/cxgb3/sge.c
-+++ b/drivers/net/cxgb3/sge.c
-@@ -352,7 +352,8 @@ static void free_rx_bufs(struct pci_dev
- pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
- q->buf_size, PCI_DMA_FROMDEVICE);
- if (q->use_pages) {
-- put_page(d->pg_chunk.page);
-+ if (d->pg_chunk.page)
-+ put_page(d->pg_chunk.page);
- d->pg_chunk.page = NULL;
- } else {
- kfree_skb(d->skb);
-@@ -584,7 +585,7 @@ static void t3_reset_qset(struct sge_qse
- memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
- memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
- q->txq_stopped = 0;
-- memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
-+ q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
- kfree(q->lro_frag_tbl);
- q->lro_nfrags = q->lro_frag_len = 0;
- }
-@@ -2900,9 +2901,7 @@ int t3_sge_alloc_qset(struct adapter *ad
- struct net_lro_mgr *lro_mgr = &q->lro_mgr;
-
- init_qset_cntxt(q, id);
-- init_timer(&q->tx_reclaim_timer);
-- q->tx_reclaim_timer.data = (unsigned long)q;
-- q->tx_reclaim_timer.function = sge_timer_cb;
-+ setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
-
- q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
- sizeof(struct rx_desc),
---- a/drivers/net/cxgb3/t3_hw.c
-+++ b/drivers/net/cxgb3/t3_hw.c
-@@ -1275,7 +1275,7 @@ struct intr_info {
- unsigned int mask; /* bits to check in interrupt status */
- const char *msg; /* message to print or NULL */
- short stat_idx; /* stat counter to increment or -1 */
-- unsigned short fatal:1; /* whether the condition reported is fatal */
-+ unsigned short fatal; /* whether the condition reported is fatal */
- };
-
- /**
-@@ -3551,7 +3551,7 @@ void early_hw_init(struct adapter *adapt
- * Older PCIe cards lose their config space during reset, PCI-X
- * ones don't.
- */
--static int t3_reset_adapter(struct adapter *adapter)
-+int t3_reset_adapter(struct adapter *adapter)
- {
- int i, save_and_restore_pcie =
- adapter->params.rev < T3_REV_B2 && is_pcie(adapter);