]>
Commit | Line | Data |
---|---|---|
4e2b6cae GKH |
1 | From 1e560864159d002b453da42bd2c13a1805515a20 Mon Sep 17 00:00:00 2001 |
2 | From: Johan Hovold <johan+linaro@kernel.org> | |
3 | Date: Tue, 30 Jan 2024 11:02:43 +0100 | |
4 | Subject: PCI/ASPM: Fix deadlock when enabling ASPM | |
5 | ||
6 | From: Johan Hovold <johan+linaro@kernel.org> | |
7 | ||
8 | commit 1e560864159d002b453da42bd2c13a1805515a20 upstream. | |
9 | ||
10 | A last minute revert in 6.7-final introduced a potential deadlock when | |
11 | enabling ASPM during probe of Qualcomm PCIe controllers as reported by | |
12 | lockdep: | |
13 | ||
14 | ============================================ | |
15 | WARNING: possible recursive locking detected | |
16 | 6.7.0 #40 Not tainted | |
17 | -------------------------------------------- | |
18 | kworker/u16:5/90 is trying to acquire lock: | |
19 | ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc | |
20 | ||
21 | but task is already holding lock: | |
22 | ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc | |
23 | ||
24 | other info that might help us debug this: | |
25 | Possible unsafe locking scenario: | |
26 | ||
27 | CPU0 | |
28 | ---- | |
29 | lock(pci_bus_sem); | |
30 | lock(pci_bus_sem); | |
31 | ||
32 | *** DEADLOCK *** | |
33 | ||
34 | Call trace: | |
35 | print_deadlock_bug+0x25c/0x348 | |
36 | __lock_acquire+0x10a4/0x2064 | |
37 | lock_acquire+0x1e8/0x318 | |
38 | down_read+0x60/0x184 | |
39 | pcie_aspm_pm_state_change+0x58/0xdc | |
40 | pci_set_full_power_state+0xa8/0x114 | |
41 | pci_set_power_state+0xc4/0x120 | |
42 | qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] | |
43 | pci_walk_bus+0x64/0xbc | |
44 | qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] | |
45 | ||
46 | The deadlock can easily be reproduced on machines like the Lenovo ThinkPad | |
47 | X13s by adding a delay to increase the race window during asynchronous | |
48 | probe where another thread can take a write lock. | |
49 | ||
50 | Add a new pci_set_power_state_locked() and associated helper functions that | |
51 | can be called with the PCI bus semaphore held to avoid taking the read lock | |
52 | twice. | |
53 | ||
54 | Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com | |
55 | Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org | |
56 | Fixes: f93e71aea6c6 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") | |
57 | Signed-off-by: Johan Hovold <johan+linaro@kernel.org> | |
58 | Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> | |
59 | Cc: <stable@vger.kernel.org> # 6.7 | |
60 | [bhelgaas: backported to v6.6.y, which contains 8cc22ba3f77c ("Revert | |
61 | "PCI/ASPM: Remove pcie_aspm_pm_state_change()""), a backport of | |
62 | f93e71aea6c6. This omits the drivers/pci/controller/dwc/pcie-qcom.c hunk | |
63 | that updates qcom_pcie_enable_aspm(), which was added by 9f4f3dfad8cf | |
64 | ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops"), which is not | |
65 | present in v6.6.28.] | |
66 | Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> | |
67 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
68 | --- | |
69 | drivers/pci/bus.c | 49 +++++++++++++++++++----------- | |
70 | drivers/pci/pci.c | 78 ++++++++++++++++++++++++++++++++---------------- | |
71 | drivers/pci/pci.h | 4 +- | |
72 | drivers/pci/pcie/aspm.c | 13 +++++--- | |
73 | include/linux/pci.h | 5 +++ | |
74 | 5 files changed, 100 insertions(+), 49 deletions(-) | |
75 | ||
76 | --- a/drivers/pci/bus.c | |
77 | +++ b/drivers/pci/bus.c | |
78 | @@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pc | |
79 | } | |
80 | EXPORT_SYMBOL(pci_bus_add_devices); | |
81 | ||
82 | -/** pci_walk_bus - walk devices on/under bus, calling callback. | |
83 | - * @top bus whose devices should be walked | |
84 | - * @cb callback to be called for each device found | |
85 | - * @userdata arbitrary pointer to be passed to callback. | |
86 | - * | |
87 | - * Walk the given bus, including any bridged devices | |
88 | - * on buses under this bus. Call the provided callback | |
89 | - * on each device found. | |
90 | - * | |
91 | - * We check the return of @cb each time. If it returns anything | |
92 | - * other than 0, we break out. | |
93 | - * | |
94 | - */ | |
95 | -void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), | |
96 | - void *userdata) | |
97 | +static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), | |
98 | + void *userdata, bool locked) | |
99 | { | |
100 | struct pci_dev *dev; | |
101 | struct pci_bus *bus; | |
102 | @@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, i | |
103 | int retval; | |
104 | ||
105 | bus = top; | |
106 | - down_read(&pci_bus_sem); | |
107 | + if (!locked) | |
108 | + down_read(&pci_bus_sem); | |
109 | next = top->devices.next; | |
110 | for (;;) { | |
111 | if (next == &bus->devices) { | |
112 | @@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, i | |
113 | if (retval) | |
114 | break; | |
115 | } | |
116 | - up_read(&pci_bus_sem); | |
117 | + if (!locked) | |
118 | + up_read(&pci_bus_sem); | |
119 | +} | |
120 | + | |
121 | +/** | |
122 | + * pci_walk_bus - walk devices on/under bus, calling callback. | |
123 | + * @top: bus whose devices should be walked | |
124 | + * @cb: callback to be called for each device found | |
125 | + * @userdata: arbitrary pointer to be passed to callback | |
126 | + * | |
127 | + * Walk the given bus, including any bridged devices | |
128 | + * on buses under this bus. Call the provided callback | |
129 | + * on each device found. | |
130 | + * | |
131 | + * We check the return of @cb each time. If it returns anything | |
132 | + * other than 0, we break out. | |
133 | + */ | |
134 | +void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) | |
135 | +{ | |
136 | + __pci_walk_bus(top, cb, userdata, false); | |
137 | } | |
138 | EXPORT_SYMBOL_GPL(pci_walk_bus); | |
139 | ||
140 | +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata) | |
141 | +{ | |
142 | + lockdep_assert_held(&pci_bus_sem); | |
143 | + | |
144 | + __pci_walk_bus(top, cb, userdata, true); | |
145 | +} | |
146 | +EXPORT_SYMBOL_GPL(pci_walk_bus_locked); | |
147 | + | |
148 | struct pci_bus *pci_bus_get(struct pci_bus *bus) | |
149 | { | |
150 | if (bus) | |
151 | --- a/drivers/pci/pci.c | |
152 | +++ b/drivers/pci/pci.c | |
153 | @@ -1291,6 +1291,7 @@ end: | |
154 | /** | |
155 | * pci_set_full_power_state - Put a PCI device into D0 and update its state | |
156 | * @dev: PCI device to power up | |
157 | + * @locked: whether pci_bus_sem is held | |
158 | * | |
159 | * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register | |
160 | * to confirm the state change, restore its BARs if they might be lost and | |
161 | @@ -1300,7 +1301,7 @@ end: | |
162 | * to D0, it is more efficient to use pci_power_up() directly instead of this | |
163 | * function. | |
164 | */ | |
165 | -static int pci_set_full_power_state(struct pci_dev *dev) | |
166 | +static int pci_set_full_power_state(struct pci_dev *dev, bool locked) | |
167 | { | |
168 | u16 pmcsr; | |
169 | int ret; | |
170 | @@ -1336,7 +1337,7 @@ static int pci_set_full_power_state(stru | |
171 | } | |
172 | ||
173 | if (dev->bus->self) | |
174 | - pcie_aspm_pm_state_change(dev->bus->self); | |
175 | + pcie_aspm_pm_state_change(dev->bus->self, locked); | |
176 | ||
177 | return 0; | |
178 | } | |
179 | @@ -1365,10 +1366,22 @@ void pci_bus_set_current_state(struct pc | |
180 | pci_walk_bus(bus, __pci_dev_set_current_state, &state); | |
181 | } | |
182 | ||
183 | +static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked) | |
184 | +{ | |
185 | + if (!bus) | |
186 | + return; | |
187 | + | |
188 | + if (locked) | |
189 | + pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state); | |
190 | + else | |
191 | + pci_walk_bus(bus, __pci_dev_set_current_state, &state); | |
192 | +} | |
193 | + | |
194 | /** | |
195 | * pci_set_low_power_state - Put a PCI device into a low-power state. | |
196 | * @dev: PCI device to handle. | |
197 | * @state: PCI power state (D1, D2, D3hot) to put the device into. | |
198 | + * @locked: whether pci_bus_sem is held | |
199 | * | |
200 | * Use the device's PCI_PM_CTRL register to put it into a low-power state. | |
201 | * | |
202 | @@ -1379,7 +1392,7 @@ void pci_bus_set_current_state(struct pc | |
203 | * 0 if device already is in the requested state. | |
204 | * 0 if device's power state has been successfully changed. | |
205 | */ | |
206 | -static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) | |
207 | +static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked) | |
208 | { | |
209 | u16 pmcsr; | |
210 | ||
211 | @@ -1433,29 +1446,12 @@ static int pci_set_low_power_state(struc | |
212 | pci_power_name(state)); | |
213 | ||
214 | if (dev->bus->self) | |
215 | - pcie_aspm_pm_state_change(dev->bus->self); | |
216 | + pcie_aspm_pm_state_change(dev->bus->self, locked); | |
217 | ||
218 | return 0; | |
219 | } | |
220 | ||
221 | -/** | |
222 | - * pci_set_power_state - Set the power state of a PCI device | |
223 | - * @dev: PCI device to handle. | |
224 | - * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. | |
225 | - * | |
226 | - * Transition a device to a new power state, using the platform firmware and/or | |
227 | - * the device's PCI PM registers. | |
228 | - * | |
229 | - * RETURN VALUE: | |
230 | - * -EINVAL if the requested state is invalid. | |
231 | - * -EIO if device does not support PCI PM or its PM capabilities register has a | |
232 | - * wrong version, or device doesn't support the requested state. | |
233 | - * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. | |
234 | - * 0 if device already is in the requested state. | |
235 | - * 0 if the transition is to D3 but D3 is not supported. | |
236 | - * 0 if device's power state has been successfully changed. | |
237 | - */ | |
238 | -int pci_set_power_state(struct pci_dev *dev, pci_power_t state) | |
239 | +static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked) | |
240 | { | |
241 | int error; | |
242 | ||
243 | @@ -1479,7 +1475,7 @@ int pci_set_power_state(struct pci_dev * | |
244 | return 0; | |
245 | ||
246 | if (state == PCI_D0) | |
247 | - return pci_set_full_power_state(dev); | |
248 | + return pci_set_full_power_state(dev, locked); | |
249 | ||
250 | /* | |
251 | * This device is quirked not to be put into D3, so don't put it in | |
252 | @@ -1493,16 +1489,16 @@ int pci_set_power_state(struct pci_dev * | |
253 | * To put the device in D3cold, put it into D3hot in the native | |
254 | * way, then put it into D3cold using platform ops. | |
255 | */ | |
256 | - error = pci_set_low_power_state(dev, PCI_D3hot); | |
257 | + error = pci_set_low_power_state(dev, PCI_D3hot, locked); | |
258 | ||
259 | if (pci_platform_power_transition(dev, PCI_D3cold)) | |
260 | return error; | |
261 | ||
262 | /* Powering off a bridge may power off the whole hierarchy */ | |
263 | if (dev->current_state == PCI_D3cold) | |
264 | - pci_bus_set_current_state(dev->subordinate, PCI_D3cold); | |
265 | + __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked); | |
266 | } else { | |
267 | - error = pci_set_low_power_state(dev, state); | |
268 | + error = pci_set_low_power_state(dev, state, locked); | |
269 | ||
270 | if (pci_platform_power_transition(dev, state)) | |
271 | return error; | |
272 | @@ -1510,8 +1506,38 @@ int pci_set_power_state(struct pci_dev * | |
273 | ||
274 | return 0; | |
275 | } | |
276 | + | |
277 | +/** | |
278 | + * pci_set_power_state - Set the power state of a PCI device | |
279 | + * @dev: PCI device to handle. | |
280 | + * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. | |
281 | + * | |
282 | + * Transition a device to a new power state, using the platform firmware and/or | |
283 | + * the device's PCI PM registers. | |
284 | + * | |
285 | + * RETURN VALUE: | |
286 | + * -EINVAL if the requested state is invalid. | |
287 | + * -EIO if device does not support PCI PM or its PM capabilities register has a | |
288 | + * wrong version, or device doesn't support the requested state. | |
289 | + * 0 if the transition is to D1 or D2 but D1 and D2 are not supported. | |
290 | + * 0 if device already is in the requested state. | |
291 | + * 0 if the transition is to D3 but D3 is not supported. | |
292 | + * 0 if device's power state has been successfully changed. | |
293 | + */ | |
294 | +int pci_set_power_state(struct pci_dev *dev, pci_power_t state) | |
295 | +{ | |
296 | + return __pci_set_power_state(dev, state, false); | |
297 | +} | |
298 | EXPORT_SYMBOL(pci_set_power_state); | |
299 | ||
300 | +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) | |
301 | +{ | |
302 | + lockdep_assert_held(&pci_bus_sem); | |
303 | + | |
304 | + return __pci_set_power_state(dev, state, true); | |
305 | +} | |
306 | +EXPORT_SYMBOL(pci_set_power_state_locked); | |
307 | + | |
308 | #define PCI_EXP_SAVE_REGS 7 | |
309 | ||
310 | static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev, | |
311 | --- a/drivers/pci/pci.h | |
312 | +++ b/drivers/pci/pci.h | |
313 | @@ -561,12 +561,12 @@ int pcie_retrain_link(struct pci_dev *pd | |
314 | #ifdef CONFIG_PCIEASPM | |
315 | void pcie_aspm_init_link_state(struct pci_dev *pdev); | |
316 | void pcie_aspm_exit_link_state(struct pci_dev *pdev); | |
317 | -void pcie_aspm_pm_state_change(struct pci_dev *pdev); | |
318 | +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); | |
319 | void pcie_aspm_powersave_config_link(struct pci_dev *pdev); | |
320 | #else | |
321 | static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } | |
322 | static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } | |
323 | -static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } | |
324 | +static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } | |
325 | static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } | |
326 | #endif | |
327 | ||
328 | --- a/drivers/pci/pcie/aspm.c | |
329 | +++ b/drivers/pci/pcie/aspm.c | |
330 | @@ -1001,8 +1001,11 @@ void pcie_aspm_exit_link_state(struct pc | |
331 | up_read(&pci_bus_sem); | |
332 | } | |
333 | ||
334 | -/* @pdev: the root port or switch downstream port */ | |
335 | -void pcie_aspm_pm_state_change(struct pci_dev *pdev) | |
336 | +/* | |
337 | + * @pdev: the root port or switch downstream port | |
338 | + * @locked: whether pci_bus_sem is held | |
339 | + */ | |
340 | +void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) | |
341 | { | |
342 | struct pcie_link_state *link = pdev->link_state; | |
343 | ||
344 | @@ -1012,12 +1015,14 @@ void pcie_aspm_pm_state_change(struct pc | |
345 | * Devices changed PM state, we should recheck if latency | |
346 | * meets all functions' requirement | |
347 | */ | |
348 | - down_read(&pci_bus_sem); | |
349 | + if (!locked) | |
350 | + down_read(&pci_bus_sem); | |
351 | mutex_lock(&aspm_lock); | |
352 | pcie_update_aspm_capable(link->root); | |
353 | pcie_config_aspm_path(link); | |
354 | mutex_unlock(&aspm_lock); | |
355 | - up_read(&pci_bus_sem); | |
356 | + if (!locked) | |
357 | + up_read(&pci_bus_sem); | |
358 | } | |
359 | ||
360 | void pcie_aspm_powersave_config_link(struct pci_dev *pdev) | |
361 | --- a/include/linux/pci.h | |
362 | +++ b/include/linux/pci.h | |
363 | @@ -1391,6 +1391,7 @@ int pci_load_and_free_saved_state(struct | |
364 | struct pci_saved_state **state); | |
365 | int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); | |
366 | int pci_set_power_state(struct pci_dev *dev, pci_power_t state); | |
367 | +int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state); | |
368 | pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); | |
369 | bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); | |
370 | void pci_pme_active(struct pci_dev *dev, bool enable); | |
371 | @@ -1594,6 +1595,8 @@ int pci_scan_bridge(struct pci_bus *bus, | |
372 | ||
373 | void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), | |
374 | void *userdata); | |
375 | +void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), | |
376 | + void *userdata); | |
377 | int pci_cfg_space_size(struct pci_dev *dev); | |
378 | unsigned char pci_bus_max_busnr(struct pci_bus *bus); | |
379 | void pci_setup_bridge(struct pci_bus *bus); | |
380 | @@ -1990,6 +1993,8 @@ static inline int pci_save_state(struct | |
381 | static inline void pci_restore_state(struct pci_dev *dev) { } | |
382 | static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) | |
383 | { return 0; } | |
384 | +static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state) | |
385 | +{ return 0; } | |
386 | static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) | |
387 | { return 0; } | |
388 | static inline pci_power_t pci_choose_state(struct pci_dev *dev, |