]>
Commit | Line | Data |
---|---|---|
2cb7cef9 BS |
1 | Subject: reset the adapter on fatal error |
2 | From: Divy Le Ray <divy@chelsio.com> | |
3 | References: 466062 - LTC51042 | |
4 | ||
5 | when a fatal error occurs, bring ports down, reset the chip, | |
6 | and bring ports back up. | |
7 | ||
8 | Factorize code used for both EEH and fatal error recovery. | |
9 | Fix timer usage when bringing up/resetting sge queue sets. | |
10 | ||
11 | Signed-off-by: Divy Le Ray <divy@chelsio.com> | |
12 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
13 | Signed-off-by: Olaf Hering <olh@suse.de> | |
14 | --- | |
15 | drivers/net/cxgb3/adapter.h | 1 | |
16 | drivers/net/cxgb3/common.h | 1 | |
17 | drivers/net/cxgb3/cxgb3_main.c | 166 +++++++++++++++++++++++++++-------------- | |
18 | drivers/net/cxgb3/sge.c | 9 -- | |
19 | drivers/net/cxgb3/t3_hw.c | 4 | |
20 | 5 files changed, 120 insertions(+), 61 deletions(-) | |
21 | ||
22 | --- a/drivers/net/cxgb3/adapter.h | |
23 | +++ b/drivers/net/cxgb3/adapter.h | |
24 | @@ -241,6 +241,7 @@ struct adapter { | |
25 | unsigned int check_task_cnt; | |
26 | struct delayed_work adap_check_task; | |
27 | struct work_struct ext_intr_handler_task; | |
28 | + struct work_struct fatal_error_handler_task; | |
29 | ||
30 | struct dentry *debugfs_root; | |
31 | ||
32 | --- a/drivers/net/cxgb3/common.h | |
33 | +++ b/drivers/net/cxgb3/common.h | |
34 | @@ -726,6 +726,7 @@ int t3_check_fw_version(struct adapter * | |
35 | int t3_init_hw(struct adapter *adapter, u32 fw_params); | |
36 | void mac_prep(struct cmac *mac, struct adapter *adapter, int index); | |
37 | void early_hw_init(struct adapter *adapter, const struct adapter_info *ai); | |
38 | +int t3_reset_adapter(struct adapter *adapter); | |
39 | int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai, | |
40 | int reset); | |
41 | int t3_replay_prep_adapter(struct adapter *adapter); | |
42 | --- a/drivers/net/cxgb3/cxgb3_main.c | |
43 | +++ b/drivers/net/cxgb3/cxgb3_main.c | |
44 | @@ -1016,6 +1016,13 @@ static int cxgb_up(struct adapter *adap) | |
45 | goto out; | |
46 | } | |
47 | ||
48 | + /* | |
49 | + * Clear interrupts now to catch errors if t3_init_hw fails. | |
50 | + * We clear them again later as initialization may trigger | |
51 | + * conditions that can interrupt. | |
52 | + */ | |
53 | + t3_intr_clear(adap); | |
54 | + | |
55 | err = t3_init_hw(adap, 0); | |
56 | if (err) | |
57 | goto out; | |
58 | @@ -1224,9 +1231,9 @@ static int cxgb_close(struct net_device | |
59 | if (is_offload(adapter) && !ofld_disable) | |
60 | sysfs_remove_group(&dev->dev.kobj, &iscsi_offload_attr_group); | |
61 | ||
62 | - spin_lock(&adapter->work_lock); /* sync with update task */ | |
63 | + spin_lock_irq(&adapter->work_lock); /* sync with update task */ | |
64 | clear_bit(pi->port_id, &adapter->open_device_map); | |
65 | - spin_unlock(&adapter->work_lock); | |
66 | + spin_unlock_irq(&adapter->work_lock); | |
67 | ||
68 | if (!(adapter->open_device_map & PORT_MASK)) | |
69 | cancel_rearming_delayed_workqueue(cxgb3_wq, | |
70 | @@ -2555,10 +2562,10 @@ static void t3_adap_check_task(struct wo | |
71 | check_t3b2_mac(adapter); | |
72 | ||
73 | /* Schedule the next check update if any port is active. */ | |
74 | - spin_lock(&adapter->work_lock); | |
75 | + spin_lock_irq(&adapter->work_lock); | |
76 | if (adapter->open_device_map & PORT_MASK) | |
77 | schedule_chk_task(adapter); | |
78 | - spin_unlock(&adapter->work_lock); | |
79 | + spin_unlock_irq(&adapter->work_lock); | |
80 | } | |
81 | ||
82 | /* | |
83 | @@ -2603,6 +2610,96 @@ void t3_os_ext_intr_handler(struct adapt | |
84 | spin_unlock(&adapter->work_lock); | |
85 | } | |
86 | ||
87 | +static int t3_adapter_error(struct adapter *adapter, int reset) | |
88 | +{ | |
89 | + int i, ret = 0; | |
90 | + | |
91 | + /* Stop all ports */ | |
92 | + for_each_port(adapter, i) { | |
93 | + struct net_device *netdev = adapter->port[i]; | |
94 | + | |
95 | + if (netif_running(netdev)) | |
96 | + cxgb_close(netdev); | |
97 | + } | |
98 | + | |
99 | + if (is_offload(adapter) && | |
100 | + test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) | |
101 | + offload_close(&adapter->tdev); | |
102 | + | |
103 | + /* Stop SGE timers */ | |
104 | + t3_stop_sge_timers(adapter); | |
105 | + | |
106 | + adapter->flags &= ~FULL_INIT_DONE; | |
107 | + | |
108 | + if (reset) | |
109 | + ret = t3_reset_adapter(adapter); | |
110 | + | |
111 | + pci_disable_device(adapter->pdev); | |
112 | + | |
113 | + return ret; | |
114 | +} | |
115 | + | |
116 | +static int t3_reenable_adapter(struct adapter *adapter) | |
117 | +{ | |
118 | + if (pci_enable_device(adapter->pdev)) { | |
119 | + dev_err(&adapter->pdev->dev, | |
120 | + "Cannot re-enable PCI device after reset.\n"); | |
121 | + goto err; | |
122 | + } | |
123 | + pci_set_master(adapter->pdev); | |
124 | + pci_restore_state(adapter->pdev); | |
125 | + | |
126 | + /* Free sge resources */ | |
127 | + t3_free_sge_resources(adapter); | |
128 | + | |
129 | + if (t3_replay_prep_adapter(adapter)) | |
130 | + goto err; | |
131 | + | |
132 | + return 0; | |
133 | +err: | |
134 | + return -1; | |
135 | +} | |
136 | + | |
137 | +static void t3_resume_ports(struct adapter *adapter) | |
138 | +{ | |
139 | + int i; | |
140 | + | |
141 | + /* Restart the ports */ | |
142 | + for_each_port(adapter, i) { | |
143 | + struct net_device *netdev = adapter->port[i]; | |
144 | + | |
145 | + if (netif_running(netdev)) { | |
146 | + if (cxgb_open(netdev)) { | |
147 | + dev_err(&adapter->pdev->dev, | |
148 | + "can't bring device back up" | |
149 | + " after reset\n"); | |
150 | + continue; | |
151 | + } | |
152 | + } | |
153 | + } | |
154 | +} | |
155 | + | |
156 | +/* | |
157 | + * processes a fatal error. | |
158 | + * Bring the ports down, reset the chip, bring the ports back up. | |
159 | + */ | |
160 | +static void fatal_error_task(struct work_struct *work) | |
161 | +{ | |
162 | + struct adapter *adapter = container_of(work, struct adapter, | |
163 | + fatal_error_handler_task); | |
164 | + int err = 0; | |
165 | + | |
166 | + rtnl_lock(); | |
167 | + err = t3_adapter_error(adapter, 1); | |
168 | + if (!err) | |
169 | + err = t3_reenable_adapter(adapter); | |
170 | + if (!err) | |
171 | + t3_resume_ports(adapter); | |
172 | + | |
173 | + CH_ALERT(adapter, "adapter reset %s\n", err ? "failed" : "succeeded"); | |
174 | + rtnl_unlock(); | |
175 | +} | |
176 | + | |
177 | void t3_fatal_err(struct adapter *adapter) | |
178 | { | |
179 | unsigned int fw_status[4]; | |
180 | @@ -2613,7 +2710,11 @@ void t3_fatal_err(struct adapter *adapte | |
181 | t3_write_reg(adapter, A_XGM_RX_CTRL, 0); | |
182 | t3_write_reg(adapter, XGM_REG(A_XGM_TX_CTRL, 1), 0); | |
183 | t3_write_reg(adapter, XGM_REG(A_XGM_RX_CTRL, 1), 0); | |
184 | + | |
185 | + spin_lock(&adapter->work_lock); | |
186 | t3_intr_disable(adapter); | |
187 | + queue_work(cxgb3_wq, &adapter->fatal_error_handler_task); | |
188 | + spin_unlock(&adapter->work_lock); | |
189 | } | |
190 | CH_ALERT(adapter, "encountered fatal error, operation suspended\n"); | |
191 | if (!t3_cim_ctl_blk_read(adapter, 0xa0, 4, fw_status)) | |
192 | @@ -2635,26 +2736,9 @@ static pci_ers_result_t t3_io_error_dete | |
193 | pci_channel_state_t state) | |
194 | { | |
195 | struct adapter *adapter = pci_get_drvdata(pdev); | |
196 | - int i; | |
197 | - | |
198 | - /* Stop all ports */ | |
199 | - for_each_port(adapter, i) { | |
200 | - struct net_device *netdev = adapter->port[i]; | |
201 | - | |
202 | - if (netif_running(netdev)) | |
203 | - cxgb_close(netdev); | |
204 | - } | |
205 | - | |
206 | - if (is_offload(adapter) && | |
207 | - test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) | |
208 | - offload_close(&adapter->tdev); | |
209 | - | |
210 | - /* Stop SGE timers */ | |
211 | - t3_stop_sge_timers(adapter); | |
212 | - | |
213 | - adapter->flags &= ~FULL_INIT_DONE; | |
214 | - | |
215 | - pci_disable_device(pdev); | |
216 | + int ret; | |
217 | + | |
218 | + ret = t3_adapter_error(adapter, 0); | |
219 | ||
220 | /* Request a slot reset. */ | |
221 | return PCI_ERS_RESULT_NEED_RESET; | |
222 | @@ -2670,22 +2754,9 @@ static pci_ers_result_t t3_io_slot_reset | |
223 | { | |
224 | struct adapter *adapter = pci_get_drvdata(pdev); | |
225 | ||
226 | - if (pci_enable_device(pdev)) { | |
227 | - dev_err(&pdev->dev, | |
228 | - "Cannot re-enable PCI device after reset.\n"); | |
229 | - goto err; | |
230 | - } | |
231 | - pci_set_master(pdev); | |
232 | - pci_restore_state(pdev); | |
233 | - | |
234 | - /* Free sge resources */ | |
235 | - t3_free_sge_resources(adapter); | |
236 | - | |
237 | - if (t3_replay_prep_adapter(adapter)) | |
238 | - goto err; | |
239 | + if (!t3_reenable_adapter(adapter)) | |
240 | + return PCI_ERS_RESULT_RECOVERED; | |
241 | ||
242 | - return PCI_ERS_RESULT_RECOVERED; | |
243 | -err: | |
244 | return PCI_ERS_RESULT_DISCONNECT; | |
245 | } | |
246 | ||
247 | @@ -2699,22 +2770,8 @@ err: | |
248 | static void t3_io_resume(struct pci_dev *pdev) | |
249 | { | |
250 | struct adapter *adapter = pci_get_drvdata(pdev); | |
251 | - int i; | |
252 | - | |
253 | - /* Restart the ports */ | |
254 | - for_each_port(adapter, i) { | |
255 | - struct net_device *netdev = adapter->port[i]; | |
256 | ||
257 | - if (netif_running(netdev)) { | |
258 | - if (cxgb_open(netdev)) { | |
259 | - dev_err(&pdev->dev, | |
260 | - "can't bring device back up" | |
261 | - " after reset\n"); | |
262 | - continue; | |
263 | - } | |
264 | - netif_device_attach(netdev); | |
265 | - } | |
266 | - } | |
267 | + t3_resume_ports(adapter); | |
268 | } | |
269 | ||
270 | static struct pci_error_handlers t3_err_handler = { | |
271 | @@ -2899,6 +2956,7 @@ static int __devinit init_one(struct pci | |
272 | ||
273 | INIT_LIST_HEAD(&adapter->adapter_list); | |
274 | INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task); | |
275 | + INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task); | |
276 | INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task); | |
277 | ||
278 | for (i = 0; i < ai->nports; ++i) { | |
279 | --- a/drivers/net/cxgb3/sge.c | |
280 | +++ b/drivers/net/cxgb3/sge.c | |
281 | @@ -352,7 +352,8 @@ static void free_rx_bufs(struct pci_dev | |
282 | pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr), | |
283 | q->buf_size, PCI_DMA_FROMDEVICE); | |
284 | if (q->use_pages) { | |
285 | - put_page(d->pg_chunk.page); | |
286 | + if (d->pg_chunk.page) | |
287 | + put_page(d->pg_chunk.page); | |
288 | d->pg_chunk.page = NULL; | |
289 | } else { | |
290 | kfree_skb(d->skb); | |
291 | @@ -584,7 +585,7 @@ static void t3_reset_qset(struct sge_qse | |
292 | memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET); | |
293 | memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET); | |
294 | q->txq_stopped = 0; | |
295 | - memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer)); | |
296 | + q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */ | |
297 | kfree(q->lro_frag_tbl); | |
298 | q->lro_nfrags = q->lro_frag_len = 0; | |
299 | } | |
300 | @@ -2900,9 +2901,7 @@ int t3_sge_alloc_qset(struct adapter *ad | |
301 | struct net_lro_mgr *lro_mgr = &q->lro_mgr; | |
302 | ||
303 | init_qset_cntxt(q, id); | |
304 | - init_timer(&q->tx_reclaim_timer); | |
305 | - q->tx_reclaim_timer.data = (unsigned long)q; | |
306 | - q->tx_reclaim_timer.function = sge_timer_cb; | |
307 | + setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q); | |
308 | ||
309 | q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, | |
310 | sizeof(struct rx_desc), | |
311 | --- a/drivers/net/cxgb3/t3_hw.c | |
312 | +++ b/drivers/net/cxgb3/t3_hw.c | |
313 | @@ -1275,7 +1275,7 @@ struct intr_info { | |
314 | unsigned int mask; /* bits to check in interrupt status */ | |
315 | const char *msg; /* message to print or NULL */ | |
316 | short stat_idx; /* stat counter to increment or -1 */ | |
317 | - unsigned short fatal:1; /* whether the condition reported is fatal */ | |
318 | + unsigned short fatal; /* whether the condition reported is fatal */ | |
319 | }; | |
320 | ||
321 | /** | |
322 | @@ -3551,7 +3551,7 @@ void early_hw_init(struct adapter *adapt | |
323 | * Older PCIe cards lose their config space during reset, PCI-X | |
324 | * ones don't. | |
325 | */ | |
326 | -static int t3_reset_adapter(struct adapter *adapter) | |
327 | +int t3_reset_adapter(struct adapter *adapter) | |
328 | { | |
329 | int i, save_and_restore_pcie = | |
330 | adapter->params.rev < T3_REV_B2 && is_pcie(adapter); |