1 From edeb304f659792fb5bab90d7d6f3408b4c7301fb Mon Sep 17 00:00:00 2001
2 From: Vaibhav Jain <vaibhav@linux.ibm.com>
3 Date: Tue, 29 Jan 2019 16:36:18 +0530
4 Subject: cxl: Wrap iterations over afu slices inside 'afu_list_lock'
6 From: Vaibhav Jain <vaibhav@linux.ibm.com>
8 commit edeb304f659792fb5bab90d7d6f3408b4c7301fb upstream.
10 Within cxl module, iteration over array 'adapter->afu' may be racy
11 at few points as it might be simultaneously read during an EEH and its
12 contents being set to NULL while driver is being unloaded or unbound
13 from the adapter. This might result in a NULL pointer to 'struct afu'
14 being de-referenced during an EEH thereby causing a kernel oops.
16 This patch fixes this by making sure that all access to the array
17 'adapter->afu' is wrapped within the context of spin-lock
18 'adapter->afu_list_lock'.
20 Fixes: 9e8df8a21963 ("cxl: EEH support")
21 Cc: stable@vger.kernel.org # v4.3+
22 Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
23 Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
24 Acked-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
25 Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
26 Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
27 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
30 drivers/misc/cxl/guest.c | 2 ++
31 drivers/misc/cxl/pci.c | 39 ++++++++++++++++++++++++++++++---------
32 2 files changed, 32 insertions(+), 9 deletions(-)
34 --- a/drivers/misc/cxl/guest.c
35 +++ b/drivers/misc/cxl/guest.c
36 @@ -267,6 +267,7 @@ static int guest_reset(struct cxl *adapt
39 pr_devel("Adapter reset request\n");
40 + spin_lock(&adapter->afu_list_lock);
41 for (i = 0; i < adapter->slices; i++) {
42 if ((afu = adapter->afu[i])) {
43 pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
44 @@ -283,6 +284,7 @@ static int guest_reset(struct cxl *adapt
45 pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
48 + spin_unlock(&adapter->afu_list_lock);
52 --- a/drivers/misc/cxl/pci.c
53 +++ b/drivers/misc/cxl/pci.c
54 @@ -1807,7 +1807,7 @@ static pci_ers_result_t cxl_vphb_error_d
55 /* There should only be one entry, but go through the list
58 - if (afu->phb == NULL)
59 + if (afu == NULL || afu->phb == NULL)
62 list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
63 @@ -1834,7 +1834,8 @@ static pci_ers_result_t cxl_pci_error_de
65 struct cxl *adapter = pci_get_drvdata(pdev);
67 - pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET, afu_result;
68 + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
69 + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
72 /* At this point, we could still have an interrupt pending.
73 @@ -1845,6 +1846,7 @@ static pci_ers_result_t cxl_pci_error_de
75 /* If we're permanently dead, give up. */
76 if (state == pci_channel_io_perm_failure) {
77 + spin_lock(&adapter->afu_list_lock);
78 for (i = 0; i < adapter->slices; i++) {
79 afu = adapter->afu[i];
81 @@ -1853,6 +1855,7 @@ static pci_ers_result_t cxl_pci_error_de
83 cxl_vphb_error_detected(afu, state);
85 + spin_unlock(&adapter->afu_list_lock);
86 return PCI_ERS_RESULT_DISCONNECT;
89 @@ -1934,11 +1937,17 @@ static pci_ers_result_t cxl_pci_error_de
90 * * In slot_reset, free the old resources and allocate new ones.
91 * * In resume, clear the flag to allow things to start.
94 + /* Make sure no one else changes the afu list */
95 + spin_lock(&adapter->afu_list_lock);
97 for (i = 0; i < adapter->slices; i++) {
98 afu = adapter->afu[i];
100 - afu_result = cxl_vphb_error_detected(afu, state);
104 + afu_result = cxl_vphb_error_detected(afu, state);
105 cxl_context_detach_all(afu);
106 cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
107 pci_deconfigure_afu(afu);
108 @@ -1950,6 +1959,7 @@ static pci_ers_result_t cxl_pci_error_de
109 (result == PCI_ERS_RESULT_NEED_RESET))
110 result = PCI_ERS_RESULT_NONE;
112 + spin_unlock(&adapter->afu_list_lock);
114 /* should take the context lock here */
115 if (cxl_adapter_context_lock(adapter) != 0)
116 @@ -1982,14 +1992,18 @@ static pci_ers_result_t cxl_pci_slot_res
118 cxl_adapter_context_unlock(adapter);
120 + spin_lock(&adapter->afu_list_lock);
121 for (i = 0; i < adapter->slices; i++) {
122 afu = adapter->afu[i];
127 if (pci_configure_afu(afu, adapter, pdev))
131 if (cxl_afu_select_best_mode(afu))
135 if (afu->phb == NULL)
137 @@ -2001,16 +2015,16 @@ static pci_ers_result_t cxl_pci_slot_res
138 ctx = cxl_get_context(afu_dev);
140 if (ctx && cxl_release_context(ctx))
144 ctx = cxl_dev_context_init(afu_dev);
149 afu_dev->dev.archdata.cxl_ctx = ctx;
151 if (cxl_ops->afu_check_and_enable(afu))
155 afu_dev->error_state = pci_channel_io_normal;
157 @@ -2031,8 +2045,13 @@ static pci_ers_result_t cxl_pci_slot_res
158 result = PCI_ERS_RESULT_DISCONNECT;
162 + spin_unlock(&adapter->afu_list_lock);
166 + spin_unlock(&adapter->afu_list_lock);
169 /* All the bits that happen in both error_detected and cxl_remove
170 * should be idempotent, so we don't need to worry about leaving a mix
171 @@ -2053,10 +2072,11 @@ static void cxl_pci_resume(struct pci_de
172 * This is not the place to be checking if everything came back up
173 * properly, because there's no return value: do that in slot_reset.
175 + spin_lock(&adapter->afu_list_lock);
176 for (i = 0; i < adapter->slices; i++) {
177 afu = adapter->afu[i];
179 - if (afu->phb == NULL)
180 + if (afu == NULL || afu->phb == NULL)
183 list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
184 @@ -2065,6 +2085,7 @@ static void cxl_pci_resume(struct pci_de
185 afu_dev->driver->err_handler->resume(afu_dev);
188 + spin_unlock(&adapter->afu_list_lock);
191 static const struct pci_error_handlers cxl_err_handler = {