]>
Commit | Line | Data |
---|---|---|
035933cc GKH |
1 | From fa89adba1941e4f3b213399b81732a5c12fd9131 Mon Sep 17 00:00:00 2001 |
2 | From: Jens Remus <jremus@linux.ibm.com> | |
3 | Date: Thu, 3 May 2018 13:52:47 +0200 | |
4 | Subject: scsi: zfcp: fix infinite iteration on ERP ready list | |
5 | ||
6 | From: Jens Remus <jremus@linux.ibm.com> | |
7 | ||
8 | commit fa89adba1941e4f3b213399b81732a5c12fd9131 upstream. | |
9 | ||
10 | zfcp_erp_adapter_reopen() schedules blocking of all of the adapter's | |
11 | rports via zfcp_scsi_schedule_rports_block() and enqueues a reopen | |
12 | adapter ERP action via zfcp_erp_action_enqueue(). Both are separately | |
13 | processed asynchronously and concurrently. | |
14 | ||
15 | Blocking of rports is done in a kworker by zfcp_scsi_rport_work(). It | |
16 | calls zfcp_scsi_rport_block(), which then traces a DBF REC "scpdely" via | |
17 | zfcp_dbf_rec_trig(). zfcp_dbf_rec_trig() acquires the DBF REC spin lock | |
18 | and then iterates with list_for_each() over the adapter's ERP ready list | |
19 | without holding the ERP lock. This opens a race window in which the | |
20 | current list entry can be moved to another list, causing list_for_each() | |
21 | to iterate forever on the wrong list, as the erp_ready_head is never | |
22 | encountered as terminal condition. | |
23 | ||
24 | Meanwhile the ERP action can be processed in the ERP thread by | |
25 | zfcp_erp_thread(). It calls zfcp_erp_strategy(), which acquires the ERP | |
26 | lock and then calls zfcp_erp_action_to_running() to move the ERP action | |
27 | from the ready to the running list. zfcp_erp_action_to_running() can | |
28 | move the ERP action using list_move() just during the aforementioned | |
29 | race window. It then traces a REC RUN "erator1" via zfcp_dbf_rec_run(). | |
30 | zfcp_dbf_rec_run() tries to acquire the DBF REC spin lock. If this is | |
31 | held by the infinitely looping kworker, it effectively spins forever. | |
32 | ||
33 | Example Sequence Diagram: | |
34 | ||
35 | Process ERP Thread rport_work | |
36 | ------------------- ------------------- ------------------- | |
37 | zfcp_erp_adapter_reopen() | |
38 | zfcp_erp_adapter_block() | |
39 | zfcp_scsi_schedule_rports_block() | |
40 | lock ERP zfcp_scsi_rport_work() | |
41 | zfcp_erp_action_enqueue(ZFCP_ERP_ACTION_REOPEN_ADAPTER) | |
42 | list_add_tail() on ready !(rport_task==RPORT_ADD) | |
43 | wake_up() ERP thread zfcp_scsi_rport_block() | |
44 | zfcp_dbf_rec_trig() zfcp_erp_strategy() zfcp_dbf_rec_trig() | |
45 | unlock ERP lock DBF REC | |
46 | zfcp_erp_wait() lock ERP | |
47 | | zfcp_erp_action_to_running() | |
48 | | list_for_each() ready | |
49 | | list_move() current entry | |
50 | | ready to running | |
51 | | zfcp_dbf_rec_run() endless loop over running | |
52 | | zfcp_dbf_rec_run_lvl() | |
53 | | lock DBF REC spins forever | |
54 | ||
55 | Any adapter recovery can trigger this, such as setting the device offline | |
56 | or reboot. | |
57 | ||
58 | V4.9 commit 4eeaa4f3f1d6 ("zfcp: close window with unblocked rport | |
59 | during rport gone") introduced additional tracing of (un)blocking of | |
60 | rports. It missed that the adapter->erp_lock must be held when calling | |
61 | zfcp_dbf_rec_trig(). | |
62 | ||
63 | This fix uses the approach formerly introduced by commit aa0fec62391c | |
64 | ("[SCSI] zfcp: Fix sparse warning by providing new entry in dbf") that got | |
65 | later removed by commit ae0904f60fab ("[SCSI] zfcp: Redesign of the debug | |
66 | tracing for recovery actions."). | |
67 | ||
68 | Introduce zfcp_dbf_rec_trig_lock(), a wrapper for zfcp_dbf_rec_trig() that | |
69 | acquires and releases the adapter->erp_lock for read. | |
70 | ||
71 | Reported-by: Sebastian Ott <sebott@linux.ibm.com> | |
72 | Signed-off-by: Jens Remus <jremus@linux.ibm.com> | |
73 | Fixes: 4eeaa4f3f1d6 ("zfcp: close window with unblocked rport during rport gone") | |
74 | Cc: <stable@vger.kernel.org> # 2.6.32+ | |
75 | Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com> | |
76 | Signed-off-by: Steffen Maier <maier@linux.ibm.com> | |
77 | Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> | |
78 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
79 | ||
80 | --- | |
81 | drivers/s390/scsi/zfcp_dbf.c | 23 ++++++++++++++++++++++- | |
82 | drivers/s390/scsi/zfcp_ext.h | 5 ++++- | |
83 | drivers/s390/scsi/zfcp_scsi.c | 14 +++++++------- | |
84 | 3 files changed, 33 insertions(+), 9 deletions(-) | |
85 | ||
86 | --- a/drivers/s390/scsi/zfcp_dbf.c | |
87 | +++ b/drivers/s390/scsi/zfcp_dbf.c | |
88 | @@ -4,7 +4,7 @@ | |
89 | * | |
90 | * Debug traces for zfcp. | |
91 | * | |
92 | - * Copyright IBM Corp. 2002, 2017 | |
93 | + * Copyright IBM Corp. 2002, 2018 | |
94 | */ | |
95 | ||
96 | #define KMSG_COMPONENT "zfcp" | |
97 | @@ -308,6 +308,27 @@ void zfcp_dbf_rec_trig(char *tag, struct | |
98 | spin_unlock_irqrestore(&dbf->rec_lock, flags); | |
99 | } | |
100 | ||
101 | +/** | |
102 | + * zfcp_dbf_rec_trig_lock - trace event related to triggered recovery with lock | |
103 | + * @tag: identifier for event | |
104 | + * @adapter: adapter on which the erp_action should run | |
105 | + * @port: remote port involved in the erp_action | |
106 | + * @sdev: scsi device involved in the erp_action | |
107 | + * @want: wanted erp_action | |
108 | + * @need: required erp_action | |
109 | + * | |
110 | + * The adapter->erp_lock must not be held. | |
111 | + */ | |
112 | +void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter, | |
113 | + struct zfcp_port *port, struct scsi_device *sdev, | |
114 | + u8 want, u8 need) | |
115 | +{ | |
116 | + unsigned long flags; | |
117 | + | |
118 | + read_lock_irqsave(&adapter->erp_lock, flags); | |
119 | + zfcp_dbf_rec_trig(tag, adapter, port, sdev, want, need); | |
120 | + read_unlock_irqrestore(&adapter->erp_lock, flags); | |
121 | +} | |
122 | ||
123 | /** | |
124 | * zfcp_dbf_rec_run_lvl - trace event related to running recovery | |
125 | --- a/drivers/s390/scsi/zfcp_ext.h | |
126 | +++ b/drivers/s390/scsi/zfcp_ext.h | |
127 | @@ -4,7 +4,7 @@ | |
128 | * | |
129 | * External function declarations. | |
130 | * | |
131 | - * Copyright IBM Corp. 2002, 2016 | |
132 | + * Copyright IBM Corp. 2002, 2018 | |
133 | */ | |
134 | ||
135 | #ifndef ZFCP_EXT_H | |
136 | @@ -35,6 +35,9 @@ extern int zfcp_dbf_adapter_register(str | |
137 | extern void zfcp_dbf_adapter_unregister(struct zfcp_adapter *); | |
138 | extern void zfcp_dbf_rec_trig(char *, struct zfcp_adapter *, | |
139 | struct zfcp_port *, struct scsi_device *, u8, u8); | |
140 | +extern void zfcp_dbf_rec_trig_lock(char *tag, struct zfcp_adapter *adapter, | |
141 | + struct zfcp_port *port, | |
142 | + struct scsi_device *sdev, u8 want, u8 need); | |
143 | extern void zfcp_dbf_rec_run(char *, struct zfcp_erp_action *); | |
144 | extern void zfcp_dbf_rec_run_lvl(int level, char *tag, | |
145 | struct zfcp_erp_action *erp); | |
146 | --- a/drivers/s390/scsi/zfcp_scsi.c | |
147 | +++ b/drivers/s390/scsi/zfcp_scsi.c | |
148 | @@ -4,7 +4,7 @@ | |
149 | * | |
150 | * Interface to Linux SCSI midlayer. | |
151 | * | |
152 | - * Copyright IBM Corp. 2002, 2017 | |
153 | + * Copyright IBM Corp. 2002, 2018 | |
154 | */ | |
155 | ||
156 | #define KMSG_COMPONENT "zfcp" | |
157 | @@ -618,9 +618,9 @@ static void zfcp_scsi_rport_register(str | |
158 | ids.port_id = port->d_id; | |
159 | ids.roles = FC_RPORT_ROLE_FCP_TARGET; | |
160 | ||
161 | - zfcp_dbf_rec_trig("scpaddy", port->adapter, port, NULL, | |
162 | - ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD, | |
163 | - ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD); | |
164 | + zfcp_dbf_rec_trig_lock("scpaddy", port->adapter, port, NULL, | |
165 | + ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD, | |
166 | + ZFCP_PSEUDO_ERP_ACTION_RPORT_ADD); | |
167 | rport = fc_remote_port_add(port->adapter->scsi_host, 0, &ids); | |
168 | if (!rport) { | |
169 | dev_err(&port->adapter->ccw_device->dev, | |
170 | @@ -642,9 +642,9 @@ static void zfcp_scsi_rport_block(struct | |
171 | struct fc_rport *rport = port->rport; | |
172 | ||
173 | if (rport) { | |
174 | - zfcp_dbf_rec_trig("scpdely", port->adapter, port, NULL, | |
175 | - ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL, | |
176 | - ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL); | |
177 | + zfcp_dbf_rec_trig_lock("scpdely", port->adapter, port, NULL, | |
178 | + ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL, | |
179 | + ZFCP_PSEUDO_ERP_ACTION_RPORT_DEL); | |
180 | fc_remote_port_delete(rport); | |
181 | port->rport = NULL; | |
182 | } |