]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blame - releases/4.14.111/pci-pme-fix-hotplug-sysfs-remove-deadlock-in-pcie_pm.patch
Fixes for 4.19
[thirdparty/kernel/stable-queue.git] / releases / 4.14.111 / pci-pme-fix-hotplug-sysfs-remove-deadlock-in-pcie_pm.patch
CommitLineData
04fd09d4
SL
1From 87a35aca6c761b392945d5f3b9d6fa94fbfa6527 Mon Sep 17 00:00:00 2001
2From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
3Date: Thu, 28 Feb 2019 13:56:27 -0600
4Subject: PCI/PME: Fix hotplug/sysfs remove deadlock in pcie_pme_remove()
5
6[ Upstream commit 95c80bc6952b6a5badc7b702d23e5bf14d251e7c ]
7
8Dongdong reported a deadlock triggered by a hotplug event during a sysfs
9"remove" operation:
10
11 pciehp 0000:00:0c.0:pcie004: Slot(0-1): Link Up
12 # echo 1 > 0000:00:0c.0/remove
13
14 PME and hotplug share an MSI/MSI-X vector. The sysfs "remove" side is:
15
16 remove_store
17 pci_stop_and_remove_bus_device_locked
18 pci_lock_rescan_remove
19 pci_stop_and_remove_bus_device
20 ...
21 pcie_pme_remove
22 pcie_pme_suspend
23 synchronize_irq # wait for hotplug IRQ handler
24 pci_unlock_rescan_remove
25
26 The hotplug side is:
27
28 pciehp_ist
29 pciehp_handle_presence_or_link_change
30 pciehp_configure_device
31 pci_lock_rescan_remove # wait for pci_unlock_rescan_remove()
32
33 INFO: task bash:10913 blocked for more than 120 seconds.
34
35 # ps -ax |grep D
36 PID TTY STAT TIME COMMAND
37 10913 ttyAMA0 Ds+ 0:00 -bash
38 14022 ? D 0:00 [irq/745-pciehp]
39
40 # cat /proc/14022/stack
41 __switch_to+0x94/0xd8
42 pci_lock_rescan_remove+0x20/0x28
43 pciehp_configure_device+0x30/0x140
44 pciehp_handle_presence_or_link_change+0x324/0x458
45 pciehp_ist+0x1dc/0x1e0
46
47 # cat /proc/10913/stack
48 __switch_to+0x94/0xd8
49 synchronize_irq+0x8c/0xc0
50 pcie_pme_suspend+0xa4/0x118
51 pcie_pme_remove+0x20/0x40
52 pcie_port_remove_service+0x3c/0x58
53 ...
54 pcie_port_device_remove+0x2c/0x48
55 pcie_portdrv_remove+0x68/0x78
56 pci_device_remove+0x48/0x120
57 ...
58 pci_stop_bus_device+0x84/0xc0
59 pci_stop_and_remove_bus_device_locked+0x24/0x40
60 remove_store+0xa4/0xb8
61 dev_attr_store+0x44/0x60
62 sysfs_kf_write+0x58/0x80
63
64It is incorrect to call pcie_pme_suspend() from pcie_pme_remove() for two
65reasons.
66
67First, pcie_pme_suspend() calls synchronize_irq(), which will wait for the
68native hotplug interrupt handler as well as for the PME one, because they
69share one IRQ (as per the spec). That may deadlock if hotplug is signaled
70while pcie_pme_remove() is running and the latter calls
71pci_lock_rescan_remove() before the former.
72
73Second, if pcie_pme_suspend() figures out that wakeup needs to be enabled
74for the port, it will return without disabling the interrupt as expected by
75pcie_pme_remove() which was overlooked by commit c7b5a4e6e8fb ("PCI / PM:
76Fix native PME handling during system suspend/resume").
77
78To fix that, rework pcie_pme_remove() to disable the PME interrupt, clear
79its status and prevent the PME worker function from re-enabling it before
80calling free_irq() on it, which should be sufficient.
81
82Fixes: c7b5a4e6e8fb ("PCI / PM: Fix native PME handling during system suspend/resume")
83Link: https://lore.kernel.org/linux-pci/c7697e7c-e1af-13e4-8491-0a3996e6ab5d@huawei.com
84Reported-by: Dongdong Liu <liudongdong3@huawei.com>
85Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
86[bhelgaas: add URL and deadlock details from Dongdong]
87Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
88Signed-off-by: Sasha Levin <sashal@kernel.org>
89---
90 drivers/pci/pcie/pme.c | 22 +++++++++++++++-------
91 1 file changed, 15 insertions(+), 7 deletions(-)
92
93diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c
94index df290aa58dce..c2e6e3d1073f 100644
95--- a/drivers/pci/pcie/pme.c
96+++ b/drivers/pci/pcie/pme.c
97@@ -367,6 +367,16 @@ static bool pcie_pme_check_wakeup(struct pci_bus *bus)
98 return false;
99 }
100
101+static void pcie_pme_disable_interrupt(struct pci_dev *port,
102+ struct pcie_pme_service_data *data)
103+{
104+ spin_lock_irq(&data->lock);
105+ pcie_pme_interrupt_enable(port, false);
106+ pcie_clear_root_pme_status(port);
107+ data->noirq = true;
108+ spin_unlock_irq(&data->lock);
109+}
110+
111 /**
112 * pcie_pme_suspend - Suspend PCIe PME service device.
113 * @srv: PCIe service device to suspend.
114@@ -391,11 +401,7 @@ static int pcie_pme_suspend(struct pcie_device *srv)
115 return 0;
116 }
117
118- spin_lock_irq(&data->lock);
119- pcie_pme_interrupt_enable(port, false);
120- pcie_clear_root_pme_status(port);
121- data->noirq = true;
122- spin_unlock_irq(&data->lock);
123+ pcie_pme_disable_interrupt(port, data);
124
125 synchronize_irq(srv->irq);
126
127@@ -431,9 +437,11 @@ static int pcie_pme_resume(struct pcie_device *srv)
128 */
129 static void pcie_pme_remove(struct pcie_device *srv)
130 {
131- pcie_pme_suspend(srv);
132+ struct pcie_pme_service_data *data = get_service_data(srv);
133+
134+ pcie_pme_disable_interrupt(srv->port, data);
135 free_irq(srv->irq, srv);
136- kfree(get_service_data(srv));
137+ kfree(data);
138 }
139
140 static struct pcie_port_service_driver pcie_pme_driver = {
141--
1422.19.1
143