]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
30edc14b KRW |
2 | /* |
3 | * PCI Backend Operations - respond to PCI requests from Frontend | |
4 | * | |
5 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
6 | */ | |
283c0972 JP |
7 | |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
9 | ||
59aa56bf | 10 | #include <linux/moduleparam.h> |
30edc14b KRW |
11 | #include <linux/wait.h> |
12 | #include <linux/bitops.h> | |
13 | #include <xen/events.h> | |
14 | #include <linux/sched.h> | |
15 | #include "pciback.h" | |
16 | ||
17 | int verbose_request; | |
18 | module_param(verbose_request, int, 0644); | |
19 | ||
a92336a1 KRW |
20 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); |
21 | ||
0513fe9e | 22 | /* Ensure a device is has the fake IRQ handler "turned on/off" and is |
a92336a1 | 23 | * ready to be exported. This MUST be run after xen_pcibk_reset_device |
0513fe9e KRW |
24 | * which does the actual PCI device enable/disable. |
25 | */ | |
a92336a1 | 26 | static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) |
0513fe9e | 27 | { |
a92336a1 | 28 | struct xen_pcibk_dev_data *dev_data; |
0513fe9e KRW |
29 | int rc; |
30 | int enable = 0; | |
31 | ||
32 | dev_data = pci_get_drvdata(dev); | |
33 | if (!dev_data) | |
34 | return; | |
35 | ||
36 | /* We don't deal with bridges */ | |
37 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) | |
38 | return; | |
39 | ||
40 | if (reset) { | |
41 | dev_data->enable_intx = 0; | |
42 | dev_data->ack_intr = 0; | |
43 | } | |
44 | enable = dev_data->enable_intx; | |
45 | ||
46 | /* Asked to disable, but ISR isn't runnig */ | |
47 | if (!enable && !dev_data->isr_on) | |
48 | return; | |
49 | ||
50 | /* Squirrel away the IRQs in the dev_data. We need this | |
51 | * b/c when device transitions to MSI, the dev->irq is | |
52 | * overwritten with the MSI vector. | |
53 | */ | |
54 | if (enable) | |
55 | dev_data->irq = dev->irq; | |
56 | ||
e17ab35f KRW |
57 | /* |
58 | * SR-IOV devices in all use MSI-X and have no legacy | |
59 | * interrupts, so inhibit creating a fake IRQ handler for them. | |
60 | */ | |
61 | if (dev_data->irq == 0) | |
62 | goto out; | |
63 | ||
0513fe9e KRW |
64 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", |
65 | dev_data->irq_name, | |
66 | dev_data->irq, | |
67 | pci_is_enabled(dev) ? "on" : "off", | |
68 | dev->msi_enabled ? "MSI" : "", | |
69 | dev->msix_enabled ? "MSI/X" : "", | |
70 | dev_data->isr_on ? "enable" : "disable", | |
71 | enable ? "enable" : "disable"); | |
72 | ||
73 | if (enable) { | |
a396f3a2 KRW |
74 | /* |
75 | * The MSI or MSI-X should not have an IRQ handler. Otherwise | |
76 | * if the guest terminates we BUG_ON in free_msi_irqs. | |
77 | */ | |
78 | if (dev->msi_enabled || dev->msix_enabled) | |
79 | goto out; | |
80 | ||
0513fe9e | 81 | rc = request_irq(dev_data->irq, |
a92336a1 | 82 | xen_pcibk_guest_interrupt, IRQF_SHARED, |
0513fe9e KRW |
83 | dev_data->irq_name, dev); |
84 | if (rc) { | |
85 | dev_err(&dev->dev, "%s: failed to install fake IRQ " \ | |
86 | "handler for IRQ %d! (rc:%d)\n", | |
87 | dev_data->irq_name, dev_data->irq, rc); | |
88 | goto out; | |
89 | } | |
90 | } else { | |
91 | free_irq(dev_data->irq, dev); | |
92 | dev_data->irq = 0; | |
93 | } | |
94 | dev_data->isr_on = enable; | |
95 | dev_data->ack_intr = enable; | |
96 | out: | |
97 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", | |
98 | dev_data->irq_name, | |
99 | dev_data->irq, | |
100 | pci_is_enabled(dev) ? "on" : "off", | |
101 | dev->msi_enabled ? "MSI" : "", | |
102 | dev->msix_enabled ? "MSI/X" : "", | |
103 | enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : | |
104 | (dev_data->isr_on ? "failed to disable" : "disabled")); | |
105 | } | |
106 | ||
30edc14b | 107 | /* Ensure a device is "turned off" and ready to be exported. |
a92336a1 | 108 | * (Also see xen_pcibk_config_reset to ensure virtual configuration space is |
30edc14b KRW |
109 | * ready to be re-exported) |
110 | */ | |
a92336a1 | 111 | void xen_pcibk_reset_device(struct pci_dev *dev) |
30edc14b KRW |
112 | { |
113 | u16 cmd; | |
114 | ||
a92336a1 | 115 | xen_pcibk_control_isr(dev, 1 /* reset device */); |
0513fe9e | 116 | |
30edc14b KRW |
117 | /* Disable devices (but not bridges) */ |
118 | if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | |
a2be65fd KRW |
119 | #ifdef CONFIG_PCI_MSI |
120 | /* The guest could have been abruptly killed without | |
121 | * disabling MSI/MSI-X interrupts.*/ | |
122 | if (dev->msix_enabled) | |
123 | pci_disable_msix(dev); | |
124 | if (dev->msi_enabled) | |
125 | pci_disable_msi(dev); | |
126 | #endif | |
bdc5c181 KRW |
127 | if (pci_is_enabled(dev)) |
128 | pci_disable_device(dev); | |
30edc14b KRW |
129 | |
130 | pci_write_config_word(dev, PCI_COMMAND, 0); | |
131 | ||
132 | dev->is_busmaster = 0; | |
133 | } else { | |
134 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
135 | if (cmd & (PCI_COMMAND_INVALIDATE)) { | |
136 | cmd &= ~(PCI_COMMAND_INVALIDATE); | |
137 | pci_write_config_word(dev, PCI_COMMAND, cmd); | |
138 | ||
139 | dev->is_busmaster = 0; | |
140 | } | |
141 | } | |
142 | } | |
a92336a1 KRW |
143 | |
144 | #ifdef CONFIG_PCI_MSI | |
145 | static | |
146 | int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, | |
147 | struct pci_dev *dev, struct xen_pci_op *op) | |
148 | { | |
149 | struct xen_pcibk_dev_data *dev_data; | |
a92336a1 KRW |
150 | int status; |
151 | ||
152 | if (unlikely(verbose_request)) | |
153 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); | |
154 | ||
56441f3c KRW |
155 | if (dev->msi_enabled) |
156 | status = -EALREADY; | |
157 | else if (dev->msix_enabled) | |
158 | status = -ENXIO; | |
159 | else | |
160 | status = pci_enable_msi(dev); | |
a92336a1 KRW |
161 | |
162 | if (status) { | |
283c0972 | 163 | pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", |
51ac8893 JB |
164 | pci_name(dev), pdev->xdev->otherend_id, |
165 | status); | |
a92336a1 KRW |
166 | op->value = 0; |
167 | return XEN_PCI_ERR_op_failed; | |
168 | } | |
169 | ||
170 | /* The value the guest needs is actually the IDT vector, not the | |
171 | * the local domain's IRQ number. */ | |
172 | ||
173 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
174 | if (unlikely(verbose_request)) | |
175 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
176 | op->value); | |
177 | ||
178 | dev_data = pci_get_drvdata(dev); | |
179 | if (dev_data) | |
180 | dev_data->ack_intr = 0; | |
181 | ||
182 | return 0; | |
183 | } | |
184 | ||
185 | static | |
186 | int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, | |
187 | struct pci_dev *dev, struct xen_pci_op *op) | |
188 | { | |
a92336a1 KRW |
189 | if (unlikely(verbose_request)) |
190 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", | |
191 | pci_name(dev)); | |
a92336a1 | 192 | |
7cfb905b KRW |
193 | if (dev->msi_enabled) { |
194 | struct xen_pcibk_dev_data *dev_data; | |
195 | ||
196 | pci_disable_msi(dev); | |
197 | ||
198 | dev_data = pci_get_drvdata(dev); | |
199 | if (dev_data) | |
200 | dev_data->ack_intr = 1; | |
201 | } | |
a92336a1 KRW |
202 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; |
203 | if (unlikely(verbose_request)) | |
204 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
205 | op->value); | |
a92336a1 KRW |
206 | return 0; |
207 | } | |
208 | ||
209 | static | |
210 | int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, | |
211 | struct pci_dev *dev, struct xen_pci_op *op) | |
212 | { | |
213 | struct xen_pcibk_dev_data *dev_data; | |
214 | int i, result; | |
215 | struct msix_entry *entries; | |
408fb0e5 | 216 | u16 cmd; |
a92336a1 KRW |
217 | |
218 | if (unlikely(verbose_request)) | |
219 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", | |
220 | pci_name(dev)); | |
5e0ce145 | 221 | |
a92336a1 KRW |
222 | if (op->value > SH_INFO_MAX_VEC) |
223 | return -EINVAL; | |
224 | ||
5e0ce145 KRW |
225 | if (dev->msix_enabled) |
226 | return -EALREADY; | |
227 | ||
408fb0e5 KRW |
228 | /* |
229 | * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able | |
230 | * to access the BARs where the MSI-X entries reside. | |
8d47065f | 231 | * But VF devices are unique in which the PF needs to be checked. |
408fb0e5 | 232 | */ |
8d47065f | 233 | pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); |
408fb0e5 | 234 | if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) |
5e0ce145 KRW |
235 | return -ENXIO; |
236 | ||
a92336a1 KRW |
237 | entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); |
238 | if (entries == NULL) | |
239 | return -ENOMEM; | |
240 | ||
241 | for (i = 0; i < op->value; i++) { | |
242 | entries[i].entry = op->msix_entries[i].entry; | |
243 | entries[i].vector = op->msix_entries[i].vector; | |
244 | } | |
245 | ||
efdfa3ed | 246 | result = pci_enable_msix_exact(dev, entries, op->value); |
a92336a1 KRW |
247 | if (result == 0) { |
248 | for (i = 0; i < op->value; i++) { | |
249 | op->msix_entries[i].entry = entries[i].entry; | |
c0914e61 | 250 | if (entries[i].vector) { |
a92336a1 KRW |
251 | op->msix_entries[i].vector = |
252 | xen_pirq_from_irq(entries[i].vector); | |
253 | if (unlikely(verbose_request)) | |
254 | printk(KERN_DEBUG DRV_NAME ": %s: " \ | |
255 | "MSI-X[%d]: %d\n", | |
256 | pci_name(dev), i, | |
257 | op->msix_entries[i].vector); | |
c0914e61 | 258 | } |
a92336a1 | 259 | } |
51ac8893 | 260 | } else |
283c0972 | 261 | pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n", |
51ac8893 JB |
262 | pci_name(dev), pdev->xdev->otherend_id, |
263 | result); | |
a92336a1 KRW |
264 | kfree(entries); |
265 | ||
266 | op->value = result; | |
267 | dev_data = pci_get_drvdata(dev); | |
268 | if (dev_data) | |
269 | dev_data->ack_intr = 0; | |
270 | ||
0ee46eca | 271 | return result > 0 ? 0 : result; |
a92336a1 KRW |
272 | } |
273 | ||
274 | static | |
275 | int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, | |
276 | struct pci_dev *dev, struct xen_pci_op *op) | |
277 | { | |
a92336a1 KRW |
278 | if (unlikely(verbose_request)) |
279 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", | |
280 | pci_name(dev)); | |
a92336a1 | 281 | |
7cfb905b KRW |
282 | if (dev->msix_enabled) { |
283 | struct xen_pcibk_dev_data *dev_data; | |
284 | ||
285 | pci_disable_msix(dev); | |
286 | ||
287 | dev_data = pci_get_drvdata(dev); | |
288 | if (dev_data) | |
289 | dev_data->ack_intr = 1; | |
290 | } | |
a92336a1 KRW |
291 | /* |
292 | * SR-IOV devices (which don't have any legacy IRQ) have | |
293 | * an undefined IRQ value of zero. | |
294 | */ | |
295 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
296 | if (unlikely(verbose_request)) | |
7cfb905b KRW |
297 | printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", |
298 | pci_name(dev), op->value); | |
a92336a1 KRW |
299 | return 0; |
300 | } | |
301 | #endif | |
30edc14b KRW |
302 | /* |
303 | * Now the same evtchn is used for both pcifront conf_read_write request | |
304 | * as well as pcie aer front end ack. We use a new work_queue to schedule | |
a92336a1 | 305 | * xen_pcibk conf_read_write service for avoiding confict with aer_core |
30edc14b KRW |
306 | * do_recovery job which also use the system default work_queue |
307 | */ | |
a92336a1 | 308 | void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) |
30edc14b KRW |
309 | { |
310 | /* Check that frontend is requesting an operation and that we are not | |
311 | * already processing a request */ | |
312 | if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) | |
313 | && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { | |
429eafe6 | 314 | schedule_work(&pdev->op_work); |
30edc14b KRW |
315 | } |
316 | /*_XEN_PCIB_active should have been cleared by pcifront. And also make | |
a92336a1 | 317 | sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ |
30edc14b KRW |
318 | if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) |
319 | && test_bit(_PCIB_op_pending, &pdev->flags)) { | |
a92336a1 | 320 | wake_up(&xen_pcibk_aer_wait_queue); |
30edc14b KRW |
321 | } |
322 | } | |
323 | ||
324 | /* Performing the configuration space reads/writes must not be done in atomic | |
325 | * context because some of the pci_* functions can sleep (mostly due to ACPI | |
326 | * use of semaphores). This function is intended to be called from a work | |
a92336a1 | 327 | * queue in process context taking a struct xen_pcibk_device as a parameter */ |
30edc14b | 328 | |
a92336a1 | 329 | void xen_pcibk_do_op(struct work_struct *data) |
30edc14b | 330 | { |
a92336a1 KRW |
331 | struct xen_pcibk_device *pdev = |
332 | container_of(data, struct xen_pcibk_device, op_work); | |
30edc14b | 333 | struct pci_dev *dev; |
a92336a1 | 334 | struct xen_pcibk_dev_data *dev_data = NULL; |
8135cf8b | 335 | struct xen_pci_op *op = &pdev->op; |
0513fe9e | 336 | int test_intx = 0; |
d159457b KRW |
337 | #ifdef CONFIG_PCI_MSI |
338 | unsigned int nr = 0; | |
339 | #endif | |
30edc14b | 340 | |
8135cf8b KRW |
341 | *op = pdev->sh_info->op; |
342 | barrier(); | |
a92336a1 | 343 | dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); |
30edc14b KRW |
344 | |
345 | if (dev == NULL) | |
346 | op->err = XEN_PCI_ERR_dev_not_found; | |
347 | else { | |
0513fe9e KRW |
348 | dev_data = pci_get_drvdata(dev); |
349 | if (dev_data) | |
350 | test_intx = dev_data->enable_intx; | |
30edc14b KRW |
351 | switch (op->cmd) { |
352 | case XEN_PCI_OP_conf_read: | |
a92336a1 | 353 | op->err = xen_pcibk_config_read(dev, |
30edc14b KRW |
354 | op->offset, op->size, &op->value); |
355 | break; | |
356 | case XEN_PCI_OP_conf_write: | |
a92336a1 | 357 | op->err = xen_pcibk_config_write(dev, |
30edc14b KRW |
358 | op->offset, op->size, op->value); |
359 | break; | |
360 | #ifdef CONFIG_PCI_MSI | |
361 | case XEN_PCI_OP_enable_msi: | |
a92336a1 | 362 | op->err = xen_pcibk_enable_msi(pdev, dev, op); |
30edc14b KRW |
363 | break; |
364 | case XEN_PCI_OP_disable_msi: | |
a92336a1 | 365 | op->err = xen_pcibk_disable_msi(pdev, dev, op); |
30edc14b KRW |
366 | break; |
367 | case XEN_PCI_OP_enable_msix: | |
d159457b | 368 | nr = op->value; |
a92336a1 | 369 | op->err = xen_pcibk_enable_msix(pdev, dev, op); |
30edc14b KRW |
370 | break; |
371 | case XEN_PCI_OP_disable_msix: | |
a92336a1 | 372 | op->err = xen_pcibk_disable_msix(pdev, dev, op); |
30edc14b KRW |
373 | break; |
374 | #endif | |
375 | default: | |
376 | op->err = XEN_PCI_ERR_not_implemented; | |
377 | break; | |
378 | } | |
379 | } | |
0513fe9e KRW |
380 | if (!op->err && dev && dev_data) { |
381 | /* Transition detected */ | |
382 | if ((dev_data->enable_intx != test_intx)) | |
a92336a1 | 383 | xen_pcibk_control_isr(dev, 0 /* no reset */); |
0513fe9e | 384 | } |
8135cf8b KRW |
385 | pdev->sh_info->op.err = op->err; |
386 | pdev->sh_info->op.value = op->value; | |
387 | #ifdef CONFIG_PCI_MSI | |
388 | if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { | |
389 | unsigned int i; | |
390 | ||
d159457b | 391 | for (i = 0; i < nr; i++) |
8135cf8b KRW |
392 | pdev->sh_info->op.msix_entries[i].vector = |
393 | op->msix_entries[i].vector; | |
394 | } | |
395 | #endif | |
30edc14b KRW |
396 | /* Tell the driver domain that we're done. */ |
397 | wmb(); | |
398 | clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
399 | notify_remote_via_irq(pdev->evtchn_irq); | |
400 | ||
401 | /* Mark that we're done. */ | |
4e857c58 | 402 | smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ |
30edc14b | 403 | clear_bit(_PDEVF_op_active, &pdev->flags); |
4e857c58 | 404 | smp_mb__after_atomic(); /* /before/ final check for work */ |
30edc14b KRW |
405 | |
406 | /* Check to see if the driver domain tried to start another request in | |
407 | * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. | |
408 | */ | |
a92336a1 | 409 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
410 | } |
411 | ||
a92336a1 | 412 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) |
30edc14b | 413 | { |
a92336a1 | 414 | struct xen_pcibk_device *pdev = dev_id; |
30edc14b | 415 | |
a92336a1 | 416 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
417 | |
418 | return IRQ_HANDLED; | |
419 | } | |
a92336a1 | 420 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) |
0513fe9e KRW |
421 | { |
422 | struct pci_dev *dev = (struct pci_dev *)dev_id; | |
a92336a1 | 423 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); |
0513fe9e KRW |
424 | |
425 | if (dev_data->isr_on && dev_data->ack_intr) { | |
426 | dev_data->handled++; | |
427 | if ((dev_data->handled % 1000) == 0) { | |
428 | if (xen_test_irq_shared(irq)) { | |
283c0972 | 429 | pr_info("%s IRQ line is not shared " |
0513fe9e KRW |
430 | "with other domains. Turning ISR off\n", |
431 | dev_data->irq_name); | |
432 | dev_data->ack_intr = 0; | |
433 | } | |
434 | } | |
435 | return IRQ_HANDLED; | |
436 | } | |
437 | return IRQ_NONE; | |
438 | } |