]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.8.5/cxl-prevent-adapter-reset-if-an-active-context-exists.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.8.5 / cxl-prevent-adapter-reset-if-an-active-context-exists.patch
1 From 70b565bbdb911023373e035225ab10077e4ab937 Mon Sep 17 00:00:00 2001
2 From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
3 Date: Fri, 14 Oct 2016 15:08:36 +0530
4 Subject: cxl: Prevent adapter reset if an active context exists
5
6 From: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
7
8 commit 70b565bbdb911023373e035225ab10077e4ab937 upstream.
9
10 This patch prevents resetting the cxl adapter via sysfs in presence of
11 one or more active cxl_context on it. This protects against an
12 unrecoverable error caused by PSL owning a dirty cache line even after
13 reset and host tries to touch the same cache line. In case a force reset
14 of the card is required irrespective of any active contexts, the int
15 value -1 can be stored in the 'reset' sysfs attribute of the card.
16
17 The patch introduces a new atomic_t member named contexts_num inside
18 struct cxl that holds the number of active context attached to the card
19 , which is checked against '0' before proceeding with the reset. To
20 prevent against a race condition where a context is activated just after
21 reset check is performed, the contexts_num is atomically set to '-1'
22 after reset-check to indicate that no more contexts can be activated on
23 the card anymore.
24
25 Before activating a context we atomically test if contexts_num is
26 non-negative and if so, increment its value by one. In case the value of
27 contexts_num is negative then it indicates that the card is about to be
28 reset and context activation is error-ed out at that point.
29
30 Fixes: 62fa19d4b4fd ("cxl: Add ability to reset the card")
31 Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
32 Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
33 Signed-off-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
34 Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
35 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
36
37 ---
38 Documentation/ABI/testing/sysfs-class-cxl | 7 +++--
39 drivers/misc/cxl/api.c | 9 ++++++
40 drivers/misc/cxl/context.c | 3 ++
41 drivers/misc/cxl/cxl.h | 24 +++++++++++++++++
42 drivers/misc/cxl/file.c | 11 +++++++
43 drivers/misc/cxl/guest.c | 3 ++
44 drivers/misc/cxl/main.c | 42 +++++++++++++++++++++++++++++-
45 drivers/misc/cxl/pci.c | 2 +
46 drivers/misc/cxl/sysfs.c | 27 ++++++++++++++++---
47 9 files changed, 121 insertions(+), 7 deletions(-)
48
49 --- a/Documentation/ABI/testing/sysfs-class-cxl
50 +++ b/Documentation/ABI/testing/sysfs-class-cxl
51 @@ -220,8 +220,11 @@ What: /sys/class/cxl/<card>/re
52 Date: October 2014
53 Contact: linuxppc-dev@lists.ozlabs.org
54 Description: write only
55 - Writing 1 will issue a PERST to card which may cause the card
56 - to reload the FPGA depending on load_image_on_perst.
57 + Writing 1 will issue a PERST to card provided there are no
58 + contexts active on any one of the card AFUs. This may cause
59 + the card to reload the FPGA depending on load_image_on_perst.
60 + Writing -1 will do a force PERST irrespective of any active
61 + contexts on the card AFUs.
62 Users: https://github.com/ibm-capi/libcxl
63
64 What: /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
65 --- a/drivers/misc/cxl/api.c
66 +++ b/drivers/misc/cxl/api.c
67 @@ -229,6 +229,14 @@ int cxl_start_context(struct cxl_context
68 if (ctx->status == STARTED)
69 goto out; /* already started */
70
71 + /*
72 + * Increment the mapped context count for adapter. This also checks
73 + * if adapter_context_lock is taken.
74 + */
75 + rc = cxl_adapter_context_get(ctx->afu->adapter);
76 + if (rc)
77 + goto out;
78 +
79 if (task) {
80 ctx->pid = get_task_pid(task, PIDTYPE_PID);
81 ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
82 @@ -240,6 +248,7 @@ int cxl_start_context(struct cxl_context
83
84 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
85 put_pid(ctx->pid);
86 + cxl_adapter_context_put(ctx->afu->adapter);
87 cxl_ctx_put();
88 goto out;
89 }
90 --- a/drivers/misc/cxl/context.c
91 +++ b/drivers/misc/cxl/context.c
92 @@ -238,6 +238,9 @@ int __detach_context(struct cxl_context
93 put_pid(ctx->glpid);
94
95 cxl_ctx_put();
96 +
97 + /* Decrease the attached context count on the adapter */
98 + cxl_adapter_context_put(ctx->afu->adapter);
99 return 0;
100 }
101
102 --- a/drivers/misc/cxl/cxl.h
103 +++ b/drivers/misc/cxl/cxl.h
104 @@ -615,6 +615,14 @@ struct cxl {
105 bool perst_select_user;
106 bool perst_same_image;
107 bool psl_timebase_synced;
108 +
109 + /*
110 + * number of contexts mapped on to this card. Possible values are:
111 + * >0: Number of contexts mapped and new one can be mapped.
112 + * 0: No active contexts and new ones can be mapped.
113 + * -1: No contexts mapped and new ones cannot be mapped.
114 + */
115 + atomic_t contexts_num;
116 };
117
118 int cxl_pci_alloc_one_irq(struct cxl *adapter);
119 @@ -940,4 +948,20 @@ bool cxl_pci_is_vphb_device(struct pci_d
120
121 /* decode AFU error bits in the PSL register PSL_SERR_An */
122 void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
123 +
124 +/*
125 + * Increments the number of attached contexts on an adapter.
126 + * In case an adapter_context_lock is taken the return -EBUSY.
127 + */
128 +int cxl_adapter_context_get(struct cxl *adapter);
129 +
130 +/* Decrements the number of attached contexts on an adapter */
131 +void cxl_adapter_context_put(struct cxl *adapter);
132 +
133 +/* If no active contexts then prevents contexts from being attached */
134 +int cxl_adapter_context_lock(struct cxl *adapter);
135 +
136 +/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */
137 +void cxl_adapter_context_unlock(struct cxl *adapter);
138 +
139 #endif
140 --- a/drivers/misc/cxl/file.c
141 +++ b/drivers/misc/cxl/file.c
142 @@ -205,11 +205,22 @@ static long afu_ioctl_start_work(struct
143 ctx->pid = get_task_pid(current, PIDTYPE_PID);
144 ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
145
146 + /*
147 + * Increment the mapped context count for adapter. This also checks
148 + * if adapter_context_lock is taken.
149 + */
150 + rc = cxl_adapter_context_get(ctx->afu->adapter);
151 + if (rc) {
152 + afu_release_irqs(ctx, ctx);
153 + goto out;
154 + }
155 +
156 trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
157
158 if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
159 amr))) {
160 afu_release_irqs(ctx, ctx);
161 + cxl_adapter_context_put(ctx->afu->adapter);
162 goto out;
163 }
164
165 --- a/drivers/misc/cxl/guest.c
166 +++ b/drivers/misc/cxl/guest.c
167 @@ -1152,6 +1152,9 @@ struct cxl *cxl_guest_init_adapter(struc
168 if ((rc = cxl_sysfs_adapter_add(adapter)))
169 goto err_put1;
170
171 + /* release the context lock as the adapter is configured */
172 + cxl_adapter_context_unlock(adapter);
173 +
174 return adapter;
175
176 err_put1:
177 --- a/drivers/misc/cxl/main.c
178 +++ b/drivers/misc/cxl/main.c
179 @@ -243,8 +243,10 @@ struct cxl *cxl_alloc_adapter(void)
180 if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
181 goto err2;
182
183 - return adapter;
184 + /* start with context lock taken */
185 + atomic_set(&adapter->contexts_num, -1);
186
187 + return adapter;
188 err2:
189 cxl_remove_adapter_nr(adapter);
190 err1:
191 @@ -286,6 +288,44 @@ int cxl_afu_select_best_mode(struct cxl_
192 return 0;
193 }
194
195 +int cxl_adapter_context_get(struct cxl *adapter)
196 +{
197 + int rc;
198 +
199 + rc = atomic_inc_unless_negative(&adapter->contexts_num);
200 + return rc >= 0 ? 0 : -EBUSY;
201 +}
202 +
203 +void cxl_adapter_context_put(struct cxl *adapter)
204 +{
205 + atomic_dec_if_positive(&adapter->contexts_num);
206 +}
207 +
208 +int cxl_adapter_context_lock(struct cxl *adapter)
209 +{
210 + int rc;
211 + /* no active contexts -> contexts_num == 0 */
212 + rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1);
213 + return rc ? -EBUSY : 0;
214 +}
215 +
216 +void cxl_adapter_context_unlock(struct cxl *adapter)
217 +{
218 + int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0);
219 +
220 + /*
221 + * contexts lock taken -> contexts_num == -1
222 + * If not true then show a warning and force reset the lock.
223 + * This will happen when context_unlock was requested without
224 + * doing a context_lock.
225 + */
226 + if (val != -1) {
227 + atomic_set(&adapter->contexts_num, 0);
228 + WARN(1, "Adapter context unlocked with %d active contexts",
229 + val);
230 + }
231 +}
232 +
233 static int __init init_cxl(void)
234 {
235 int rc = 0;
236 --- a/drivers/misc/cxl/pci.c
237 +++ b/drivers/misc/cxl/pci.c
238 @@ -1484,6 +1484,8 @@ static int cxl_configure_adapter(struct
239 if ((rc = cxl_native_register_psl_err_irq(adapter)))
240 goto err;
241
242 + /* Release the context lock as adapter is configured */
243 + cxl_adapter_context_unlock(adapter);
244 return 0;
245
246 err:
247 --- a/drivers/misc/cxl/sysfs.c
248 +++ b/drivers/misc/cxl/sysfs.c
249 @@ -75,12 +75,31 @@ static ssize_t reset_adapter_store(struc
250 int val;
251
252 rc = sscanf(buf, "%i", &val);
253 - if ((rc != 1) || (val != 1))
254 + if ((rc != 1) || (val != 1 && val != -1))
255 return -EINVAL;
256
257 - if ((rc = cxl_ops->adapter_reset(adapter)))
258 - return rc;
259 - return count;
260 + /*
261 + * See if we can lock the context mapping that's only allowed
262 + * when there are no contexts attached to the adapter. Once
263 + * taken this will also prevent any context from getting activated.
264 + */
265 + if (val == 1) {
266 + rc = cxl_adapter_context_lock(adapter);
267 + if (rc)
268 + goto out;
269 +
270 + rc = cxl_ops->adapter_reset(adapter);
271 + /* In case reset failed release context lock */
272 + if (rc)
273 + cxl_adapter_context_unlock(adapter);
274 +
275 + } else if (val == -1) {
276 + /* Perform a forced adapter reset */
277 + rc = cxl_ops->adapter_reset(adapter);
278 + }
279 +
280 +out:
281 + return rc ? rc : count;
282 }
283
284 static ssize_t load_image_on_perst_show(struct device *device,