]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - queue-5.1/scsi-qla2xxx-add-cleanup-for-pci-eeh-recovery.patch
Fix up backported ptrace patch
[thirdparty/kernel/stable-queue.git] / queue-5.1 / scsi-qla2xxx-add-cleanup-for-pci-eeh-recovery.patch
1 From 4c6dac87cc65532fcc5d4ada82aaa7bfb3f582fc Mon Sep 17 00:00:00 2001
2 From: Quinn Tran <qutran@marvell.com>
3 Date: Mon, 6 May 2019 13:52:19 -0700
4 Subject: scsi: qla2xxx: Add cleanup for PCI EEH recovery
5
6 [ Upstream commit 5386a4e6c7fecd282d265a24d930a74ba3c5917b ]
7
8 During EEH error recovery testing it was discovered that driver's reset()
9 callback partially frees resources used by driver, leaving some stale
10 memory. After reset() is done and when resume() callback in driver uses
11 old data which results into error leaving adapter disabled due to PCIe
12 error.
13
14 This patch does cleanup for EEH recovery code path and prevents adapter
15 from getting disabled.
16
17 Signed-off-by: Quinn Tran <qutran@marvell.com>
18 Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
19 Reviewed-by: Ewan D. Milne <emilne@redhat.com>
20 Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
21 Signed-off-by: Sasha Levin <sashal@kernel.org>
22 ---
23 drivers/scsi/qla2xxx/qla_os.c | 221 +++++++++++++---------------------
24 1 file changed, 82 insertions(+), 139 deletions(-)
25
26 diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
27 index 91f576d743fe..d377e50a6c19 100644
28 --- a/drivers/scsi/qla2xxx/qla_os.c
29 +++ b/drivers/scsi/qla2xxx/qla_os.c
30 @@ -6838,6 +6838,78 @@ qla2x00_release_firmware(void)
31 mutex_unlock(&qla_fw_lock);
32 }
33
34 +static void qla_pci_error_cleanup(scsi_qla_host_t *vha)
35 +{
36 + struct qla_hw_data *ha = vha->hw;
37 + scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
38 + struct qla_qpair *qpair = NULL;
39 + struct scsi_qla_host *vp;
40 + fc_port_t *fcport;
41 + int i;
42 + unsigned long flags;
43 +
44 + ha->chip_reset++;
45 +
46 + ha->base_qpair->chip_reset = ha->chip_reset;
47 + for (i = 0; i < ha->max_qpairs; i++) {
48 + if (ha->queue_pair_map[i])
49 + ha->queue_pair_map[i]->chip_reset =
50 + ha->base_qpair->chip_reset;
51 + }
52 +
53 + /* purge MBox commands */
54 + if (atomic_read(&ha->num_pend_mbx_stage3)) {
55 + clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
56 + complete(&ha->mbx_intr_comp);
57 + }
58 +
59 + i = 0;
60 +
61 + while (atomic_read(&ha->num_pend_mbx_stage3) ||
62 + atomic_read(&ha->num_pend_mbx_stage2) ||
63 + atomic_read(&ha->num_pend_mbx_stage1)) {
64 + msleep(20);
65 + i++;
66 + if (i > 50)
67 + break;
68 + }
69 +
70 + ha->flags.purge_mbox = 0;
71 +
72 + mutex_lock(&ha->mq_lock);
73 + list_for_each_entry(qpair, &base_vha->qp_list, qp_list_elem)
74 + qpair->online = 0;
75 + mutex_unlock(&ha->mq_lock);
76 +
77 + qla2x00_mark_all_devices_lost(vha, 0);
78 +
79 + spin_lock_irqsave(&ha->vport_slock, flags);
80 + list_for_each_entry(vp, &ha->vp_list, list) {
81 + atomic_inc(&vp->vref_count);
82 + spin_unlock_irqrestore(&ha->vport_slock, flags);
83 + qla2x00_mark_all_devices_lost(vp, 0);
84 + spin_lock_irqsave(&ha->vport_slock, flags);
85 + atomic_dec(&vp->vref_count);
86 + }
87 + spin_unlock_irqrestore(&ha->vport_slock, flags);
88 +
89 + /* Clear all async request states across all VPs. */
90 + list_for_each_entry(fcport, &vha->vp_fcports, list)
91 + fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
92 +
93 + spin_lock_irqsave(&ha->vport_slock, flags);
94 + list_for_each_entry(vp, &ha->vp_list, list) {
95 + atomic_inc(&vp->vref_count);
96 + spin_unlock_irqrestore(&ha->vport_slock, flags);
97 + list_for_each_entry(fcport, &vp->vp_fcports, list)
98 + fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
99 + spin_lock_irqsave(&ha->vport_slock, flags);
100 + atomic_dec(&vp->vref_count);
101 + }
102 + spin_unlock_irqrestore(&ha->vport_slock, flags);
103 +}
104 +
105 +
106 static pci_ers_result_t
107 qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
108 {
109 @@ -6863,20 +6935,7 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
110 return PCI_ERS_RESULT_CAN_RECOVER;
111 case pci_channel_io_frozen:
112 ha->flags.eeh_busy = 1;
113 - /* For ISP82XX complete any pending mailbox cmd */
114 - if (IS_QLA82XX(ha)) {
115 - ha->flags.isp82xx_fw_hung = 1;
116 - ql_dbg(ql_dbg_aer, vha, 0x9001, "Pci channel io frozen\n");
117 - qla82xx_clear_pending_mbx(vha);
118 - }
119 - qla2x00_free_irqs(vha);
120 - pci_disable_device(pdev);
121 - /* Return back all IOs */
122 - qla2x00_abort_all_cmds(vha, DID_RESET << 16);
123 - if (ql2xmqsupport || ql2xnvmeenable) {
124 - set_bit(QPAIR_ONLINE_CHECK_NEEDED, &vha->dpc_flags);
125 - qla2xxx_wake_dpc(vha);
126 - }
127 + qla_pci_error_cleanup(vha);
128 return PCI_ERS_RESULT_NEED_RESET;
129 case pci_channel_io_perm_failure:
130 ha->flags.pci_channel_io_perm_failure = 1;
131 @@ -6930,122 +6989,14 @@ qla2xxx_pci_mmio_enabled(struct pci_dev *pdev)
132 return PCI_ERS_RESULT_RECOVERED;
133 }
134
135 -static uint32_t
136 -qla82xx_error_recovery(scsi_qla_host_t *base_vha)
137 -{
138 - uint32_t rval = QLA_FUNCTION_FAILED;
139 - uint32_t drv_active = 0;
140 - struct qla_hw_data *ha = base_vha->hw;
141 - int fn;
142 - struct pci_dev *other_pdev = NULL;
143 -
144 - ql_dbg(ql_dbg_aer, base_vha, 0x9006,
145 - "Entered %s.\n", __func__);
146 -
147 - set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
148 -
149 - if (base_vha->flags.online) {
150 - /* Abort all outstanding commands,
151 - * so as to be requeued later */
152 - qla2x00_abort_isp_cleanup(base_vha);
153 - }
154 -
155 -
156 - fn = PCI_FUNC(ha->pdev->devfn);
157 - while (fn > 0) {
158 - fn--;
159 - ql_dbg(ql_dbg_aer, base_vha, 0x9007,
160 - "Finding pci device at function = 0x%x.\n", fn);
161 - other_pdev =
162 - pci_get_domain_bus_and_slot(pci_domain_nr(ha->pdev->bus),
163 - ha->pdev->bus->number, PCI_DEVFN(PCI_SLOT(ha->pdev->devfn),
164 - fn));
165 -
166 - if (!other_pdev)
167 - continue;
168 - if (atomic_read(&other_pdev->enable_cnt)) {
169 - ql_dbg(ql_dbg_aer, base_vha, 0x9008,
170 - "Found PCI func available and enable at 0x%x.\n",
171 - fn);
172 - pci_dev_put(other_pdev);
173 - break;
174 - }
175 - pci_dev_put(other_pdev);
176 - }
177 -
178 - if (!fn) {
179 - /* Reset owner */
180 - ql_dbg(ql_dbg_aer, base_vha, 0x9009,
181 - "This devfn is reset owner = 0x%x.\n",
182 - ha->pdev->devfn);
183 - qla82xx_idc_lock(ha);
184 -
185 - qla82xx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
186 - QLA8XXX_DEV_INITIALIZING);
187 -
188 - qla82xx_wr_32(ha, QLA82XX_CRB_DRV_IDC_VERSION,
189 - QLA82XX_IDC_VERSION);
190 -
191 - drv_active = qla82xx_rd_32(ha, QLA82XX_CRB_DRV_ACTIVE);
192 - ql_dbg(ql_dbg_aer, base_vha, 0x900a,
193 - "drv_active = 0x%x.\n", drv_active);
194 -
195 - qla82xx_idc_unlock(ha);
196 - /* Reset if device is not already reset
197 - * drv_active would be 0 if a reset has already been done
198 - */
199 - if (drv_active)
200 - rval = qla82xx_start_firmware(base_vha);
201 - else
202 - rval = QLA_SUCCESS;
203 - qla82xx_idc_lock(ha);
204 -
205 - if (rval != QLA_SUCCESS) {
206 - ql_log(ql_log_info, base_vha, 0x900b,
207 - "HW State: FAILED.\n");
208 - qla82xx_clear_drv_active(ha);
209 - qla82xx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
210 - QLA8XXX_DEV_FAILED);
211 - } else {
212 - ql_log(ql_log_info, base_vha, 0x900c,
213 - "HW State: READY.\n");
214 - qla82xx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
215 - QLA8XXX_DEV_READY);
216 - qla82xx_idc_unlock(ha);
217 - ha->flags.isp82xx_fw_hung = 0;
218 - rval = qla82xx_restart_isp(base_vha);
219 - qla82xx_idc_lock(ha);
220 - /* Clear driver state register */
221 - qla82xx_wr_32(ha, QLA82XX_CRB_DRV_STATE, 0);
222 - qla82xx_set_drv_active(base_vha);
223 - }
224 - qla82xx_idc_unlock(ha);
225 - } else {
226 - ql_dbg(ql_dbg_aer, base_vha, 0x900d,
227 - "This devfn is not reset owner = 0x%x.\n",
228 - ha->pdev->devfn);
229 - if ((qla82xx_rd_32(ha, QLA82XX_CRB_DEV_STATE) ==
230 - QLA8XXX_DEV_READY)) {
231 - ha->flags.isp82xx_fw_hung = 0;
232 - rval = qla82xx_restart_isp(base_vha);
233 - qla82xx_idc_lock(ha);
234 - qla82xx_set_drv_active(base_vha);
235 - qla82xx_idc_unlock(ha);
236 - }
237 - }
238 - clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
239 -
240 - return rval;
241 -}
242 -
243 static pci_ers_result_t
244 qla2xxx_pci_slot_reset(struct pci_dev *pdev)
245 {
246 pci_ers_result_t ret = PCI_ERS_RESULT_DISCONNECT;
247 scsi_qla_host_t *base_vha = pci_get_drvdata(pdev);
248 struct qla_hw_data *ha = base_vha->hw;
249 - struct rsp_que *rsp;
250 - int rc, retries = 10;
251 + int rc;
252 + struct qla_qpair *qpair = NULL;
253
254 ql_dbg(ql_dbg_aer, base_vha, 0x9004,
255 "Slot Reset.\n");
256 @@ -7074,24 +7025,16 @@ qla2xxx_pci_slot_reset(struct pci_dev *pdev)
257 goto exit_slot_reset;
258 }
259
260 - rsp = ha->rsp_q_map[0];
261 - if (qla2x00_request_irqs(ha, rsp))
262 - goto exit_slot_reset;
263
264 if (ha->isp_ops->pci_config(base_vha))
265 goto exit_slot_reset;
266
267 - if (IS_QLA82XX(ha)) {
268 - if (qla82xx_error_recovery(base_vha) == QLA_SUCCESS) {
269 - ret = PCI_ERS_RESULT_RECOVERED;
270 - goto exit_slot_reset;
271 - } else
272 - goto exit_slot_reset;
273 - }
274 -
275 - while (ha->flags.mbox_busy && retries--)
276 - msleep(1000);
277 + mutex_lock(&ha->mq_lock);
278 + list_for_each_entry(qpair, &base_vha->qp_list, qp_list_elem)
279 + qpair->online = 1;
280 + mutex_unlock(&ha->mq_lock);
281
282 + base_vha->flags.online = 1;
283 set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
284 if (ha->isp_ops->abort_isp(base_vha) == QLA_SUCCESS)
285 ret = PCI_ERS_RESULT_RECOVERED;
286 @@ -7115,13 +7058,13 @@ qla2xxx_pci_resume(struct pci_dev *pdev)
287 ql_dbg(ql_dbg_aer, base_vha, 0x900f,
288 "pci_resume.\n");
289
290 + ha->flags.eeh_busy = 0;
291 +
292 ret = qla2x00_wait_for_hba_online(base_vha);
293 if (ret != QLA_SUCCESS) {
294 ql_log(ql_log_fatal, base_vha, 0x9002,
295 "The device failed to resume I/O from slot/link_reset.\n");
296 }
297 -
298 - ha->flags.eeh_busy = 0;
299 }
300
301 static void
302 --
303 2.20.1
304