1 Subject: reset the adapter on fatal error
2 From: Divy Le Ray <divy@chelsio.com>
3 References: 466062 - LTC51042
5 when a fatal error occurs, bring ports down, reset the chip,
6 and bring ports back up.
8 Factorize code used for both EEH and fatal error recovery.
9 Fix timer usage when bringing up/resetting sge queue sets.
11 Signed-off-by: Divy Le Ray <divy@chelsio.com>
12 Signed-off-by: David S. Miller <davem@davemloft.net>
13 Signed-off-by: Olaf Hering <olh@suse.de>
15 drivers/net/cxgb3/adapter.h | 1
16 drivers/net/cxgb3/common.h | 1
17 drivers/net/cxgb3/cxgb3_main.c | 166 +++++++++++++++++++++++++++--------------
18 drivers/net/cxgb3/sge.c | 9 --
19 drivers/net/cxgb3/t3_hw.c | 4
20 5 files changed, 120 insertions(+), 61 deletions(-)
22 --- a/drivers/net/cxgb3/adapter.h
23 +++ b/drivers/net/cxgb3/adapter.h
24 @@ -241,6 +241,7 @@ struct adapter {
25 unsigned int check_task_cnt;
26 struct delayed_work adap_check_task;
27 struct work_struct ext_intr_handler_task;
28 + struct work_struct fatal_error_handler_task;
30 struct dentry *debugfs_root;
32 --- a/drivers/net/cxgb3/common.h
33 +++ b/drivers/net/cxgb3/common.h
34 @@ -726,6 +726,7 @@ int t3_check_fw_version(struct adapter *
35 int t3_init_hw(struct adapter *adapter, u32 fw_params);
36 void mac_prep(struct cmac *mac, struct adapter *adapter, int index);
37 void early_hw_init(struct adapter *adapter, const struct adapter_info *ai);
38 +int t3_reset_adapter(struct adapter *adapter);
39 int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai,
41 int t3_replay_prep_adapter(struct adapter *adapter);
42 --- a/drivers/net/cxgb3/cxgb3_main.c
43 +++ b/drivers/net/cxgb3/cxgb3_main.c
44 @@ -1016,6 +1016,13 @@ static int cxgb_up(struct adapter *adap)
49 + * Clear interrupts now to catch errors if t3_init_hw fails.
50 + * We clear them again later as initialization may trigger
51 + * conditions that can interrupt.
53 + t3_intr_clear(adap);
55 err = t3_init_hw(adap, 0);
58 @@ -1224,9 +1231,9 @@ static int cxgb_close(struct net_device
59 if (is_offload(adapter) && !ofld_disable)
60 sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group);
62 - spin_lock(&adapter->work_lock); /* sync with update task */
63 + spin_lock_irq(&adapter->work_lock); /* sync with update task */
64 clear_bit(pi->port_id, &adapter->open_device_map);
65 - spin_unlock(&adapter->work_lock);
66 + spin_unlock_irq(&adapter->work_lock);
68 if (!(adapter->open_device_map & PORT_MASK))
69 cancel_rearming_delayed_workqueue(cxgb3_wq,
70 @@ -2555,10 +2562,10 @@ static void t3_adap_check_task(struct wo
71 check_t3b2_mac(adapter);
73 /* Schedule the next check update if any port is active. */
74 - spin_lock(&adapter->work_lock);
75 + spin_lock_irq(&adapter->work_lock);
76 if (adapter->open_device_map & PORT_MASK)
77 schedule_chk_task(adapter);
78 - spin_unlock(&adapter->work_lock);
79 + spin_unlock_irq(&adapter->work_lock);
83 @@ -2603,6 +2610,96 @@ void t3_os_ext_intr_handler(struct adapt
84 spin_unlock(&adapter->work_lock);
87 +static int t3_adapter_error(struct adapter *adapter, int reset)
91 + /* Stop all ports */
92 + for_each_port(adapter, i) {
93 + struct net_device *netdev = adapter->port[i];
95 + if (netif_running(netdev))
99 + if (is_offload(adapter) &&
100 + test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
101 + offload_close(&adapter->tdev);
103 + /* Stop SGE timers */
104 + t3_stop_sge_timers(adapter);
106 + adapter->flags &= ~FULL_INIT_DONE;
109 + ret = t3_reset_adapter(adapter);
111 + pci_disable_device(adapter->pdev);
116 +static int t3_reenable_adapter(struct adapter *adapter)
118 + if (pci_enable_device(adapter->pdev)) {
119 + dev_err(&adapter->pdev->dev,
120 + "Cannot re-enable PCI device after reset.\n");
123 + pci_set_master(adapter->pdev);
124 + pci_restore_state(adapter->pdev);
126 + /* Free sge resources */
127 + t3_free_sge_resources(adapter);
129 + if (t3_replay_prep_adapter(adapter))
137 +static void t3_resume_ports(struct adapter *adapter)
141 + /* Restart the ports */
142 + for_each_port(adapter, i) {
143 + struct net_device *netdev = adapter->port[i];
145 + if (netif_running(netdev)) {
146 + if (cxgb_open(netdev)) {
147 + dev_err(&adapter->pdev->dev,
148 + "can't bring device back up"
157 + * processes a fatal error.
158 + * Bring the ports down, reset the chip, bring the ports back up.
160 +static void fatal_error_task(struct work_struct *work)
162 + struct adapter *adapter = container_of(work, struct adapter,
163 + fatal_error_handler_task);
167 + err = t3_adapter_error(adapter, 1);
169 + err = t3_reenable_adapter(adapter);
171 + t3_resume_ports(adapter);
173 + CH_ALERT(adapter, "adapter reset %s\n", err ? "failed" : "succeeded");
177 void t3_fatal_err(struct adapter *adapter)
179 unsigned int fw_status[4];
180 @@ -2613,7 +2710,11 @@ void t3_fatal_err(struct adapter *adapte
181 t3_write_reg(adapter, A_XGM_RX_CTRL, 0);
182 t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0);
183 t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0);
185 + spin_lock(&adapter->work_lock);
186 t3_intr_disable(adapter);
187 + queue_work(cxgb3_wq, &adapter->fatal_error_handler_task);
188 + spin_unlock(&adapter->work_lock);
190 CH_ALERT(adapter, "encountered fatal error, operation suspended\n");
191 if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status))
192 @@ -2635,26 +2736,9 @@ static pci_ers_result_t t3_io_error_dete
193 pci_channel_state_t state)
195 struct adapter *adapter = pci_get_drvdata(pdev);
198 - /* Stop all ports */
199 - for_each_port(adapter, i) {
200 - struct net_device *netdev = adapter->port[i];
202 - if (netif_running(netdev))
203 - cxgb_close(netdev);
206 - if (is_offload(adapter) &&
207 - test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map))
208 - offload_close(&adapter->tdev);
210 - /* Stop SGE timers */
211 - t3_stop_sge_timers(adapter);
213 - adapter->flags &= ~FULL_INIT_DONE;
215 - pci_disable_device(pdev);
218 + ret = t3_adapter_error(adapter, 0);
220 /* Request a slot reset. */
221 return PCI_ERS_RESULT_NEED_RESET;
222 @@ -2670,22 +2754,9 @@ static pci_ers_result_t t3_io_slot_reset
224 struct adapter *adapter = pci_get_drvdata(pdev);
226 - if (pci_enable_device(pdev)) {
227 - dev_err(&pdev->dev,
228 - "Cannot re-enable PCI device after reset.\n");
231 - pci_set_master(pdev);
232 - pci_restore_state(pdev);
234 - /* Free sge resources */
235 - t3_free_sge_resources(adapter);
237 - if (t3_replay_prep_adapter(adapter))
239 + if (!t3_reenable_adapter(adapter))
240 + return PCI_ERS_RESULT_RECOVERED;
242 - return PCI_ERS_RESULT_RECOVERED;
244 return PCI_ERS_RESULT_DISCONNECT;
247 @@ -2699,22 +2770,8 @@ err:
248 static void t3_io_resume(struct pci_dev *pdev)
250 struct adapter *adapter = pci_get_drvdata(pdev);
253 - /* Restart the ports */
254 - for_each_port(adapter, i) {
255 - struct net_device *netdev = adapter->port[i];
257 - if (netif_running(netdev)) {
258 - if (cxgb_open(netdev)) {
259 - dev_err(&pdev->dev,
260 - "can't bring device back up"
264 - netif_device_attach(netdev);
267 + t3_resume_ports(adapter);
270 static struct pci_error_handlers t3_err_handler = {
271 @@ -2899,6 +2956,7 @@ static int __devinit init_one(struct pci
273 INIT_LIST_HEAD(&adapter->adapter_list);
274 INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task);
275 + INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task);
276 INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task);
278 for (i = 0; i < ai->nports; ++i) {
279 --- a/drivers/net/cxgb3/sge.c
280 +++ b/drivers/net/cxgb3/sge.c
281 @@ -352,7 +352,8 @@ static void free_rx_bufs(struct pci_dev
282 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
283 q->buf_size, PCI_DMA_FROMDEVICE);
285 - put_page(d->pg_chunk.page);
286 + if (d->pg_chunk.page)
287 + put_page(d->pg_chunk.page);
288 d->pg_chunk.page = NULL;
291 @@ -584,7 +585,7 @@ static void t3_reset_qset(struct sge_qse
292 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
293 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
295 - memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer));
296 + q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
297 kfree(q->lro_frag_tbl);
298 q->lro_nfrags = q->lro_frag_len = 0;
300 @@ -2900,9 +2901,7 @@ int t3_sge_alloc_qset(struct adapter *ad
301 struct net_lro_mgr *lro_mgr = &q->lro_mgr;
303 init_qset_cntxt(q, id);
304 - init_timer(&q->tx_reclaim_timer);
305 - q->tx_reclaim_timer.data = (unsigned long)q;
306 - q->tx_reclaim_timer.function = sge_timer_cb;
307 + setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
309 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
310 sizeof(struct rx_desc),
311 --- a/drivers/net/cxgb3/t3_hw.c
312 +++ b/drivers/net/cxgb3/t3_hw.c
313 @@ -1275,7 +1275,7 @@ struct intr_info {
314 unsigned int mask; /* bits to check in interrupt status */
315 const char *msg; /* message to print or NULL */
316 short stat_idx; /* stat counter to increment or -1 */
317 - unsigned short fatal:1; /* whether the condition reported is fatal */
318 + unsigned short fatal; /* whether the condition reported is fatal */
322 @@ -3551,7 +3551,7 @@ void early_hw_init(struct adapter *adapt
323 * Older PCIe cards lose their config space during reset, PCI-X
326 -static int t3_reset_adapter(struct adapter *adapter)
327 +int t3_reset_adapter(struct adapter *adapter)
329 int i, save_and_restore_pcie =
330 adapter->params.rev < T3_REV_B2 && is_pcie(adapter);