1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
4 * Author: Alex Williamson <alex.williamson@redhat.com>
6 * Derived from original vfio:
7 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
8 * Author: Tom Lyon, pugs@cisco.com
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/device.h>
14 #include <linux/eventfd.h>
15 #include <linux/file.h>
16 #include <linux/interrupt.h>
17 #include <linux/iommu.h>
18 #include <linux/module.h>
19 #include <linux/mutex.h>
20 #include <linux/notifier.h>
21 #include <linux/pci.h>
22 #include <linux/pm_runtime.h>
23 #include <linux/slab.h>
24 #include <linux/types.h>
25 #include <linux/uaccess.h>
26 #include <linux/vfio.h>
27 #include <linux/vgaarb.h>
28 #include <linux/nospec.h>
29 #include <linux/sched/mm.h>
31 #include "vfio_pci_private.h"
33 #define DRIVER_VERSION "0.2"
34 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
35 #define DRIVER_DESC "VFIO PCI - User Level meta-driver"
37 static char ids
[1024] __initdata
;
38 module_param_string(ids
, ids
, sizeof(ids
), 0);
39 MODULE_PARM_DESC(ids
, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");
41 static bool nointxmask
;
42 module_param_named(nointxmask
, nointxmask
, bool, S_IRUGO
| S_IWUSR
);
43 MODULE_PARM_DESC(nointxmask
,
44 "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
46 #ifdef CONFIG_VFIO_PCI_VGA
47 static bool disable_vga
;
48 module_param(disable_vga
, bool, S_IRUGO
);
49 MODULE_PARM_DESC(disable_vga
, "Disable VGA resource access through vfio-pci");
52 static bool disable_idle_d3
;
53 module_param(disable_idle_d3
, bool, S_IRUGO
| S_IWUSR
);
54 MODULE_PARM_DESC(disable_idle_d3
,
55 "Disable using the PCI D3 low power state for idle, unused devices");
57 static bool enable_sriov
;
59 module_param(enable_sriov
, bool, 0644);
60 MODULE_PARM_DESC(enable_sriov
, "Enable support for SR-IOV configuration. Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF.");
63 static inline bool vfio_vga_disabled(void)
65 #ifdef CONFIG_VFIO_PCI_VGA
73 * Our VGA arbiter participation is limited since we don't know anything
74 * about the device itself. However, if the device is the only VGA device
75 * downstream of a bridge and VFIO VGA support is disabled, then we can
76 * safely return legacy VGA IO and memory as not decoded since the user
77 * has no way to get to it and routing can be disabled externally at the
80 static unsigned int vfio_pci_set_vga_decode(void *opaque
, bool single_vga
)
82 struct vfio_pci_device
*vdev
= opaque
;
83 struct pci_dev
*tmp
= NULL
, *pdev
= vdev
->pdev
;
84 unsigned char max_busnr
;
87 if (single_vga
|| !vfio_vga_disabled() || pci_is_root_bus(pdev
->bus
))
88 return VGA_RSRC_NORMAL_IO
| VGA_RSRC_NORMAL_MEM
|
89 VGA_RSRC_LEGACY_IO
| VGA_RSRC_LEGACY_MEM
;
91 max_busnr
= pci_bus_max_busnr(pdev
->bus
);
92 decodes
= VGA_RSRC_NORMAL_IO
| VGA_RSRC_NORMAL_MEM
;
94 while ((tmp
= pci_get_class(PCI_CLASS_DISPLAY_VGA
<< 8, tmp
)) != NULL
) {
96 pci_domain_nr(tmp
->bus
) != pci_domain_nr(pdev
->bus
) ||
97 pci_is_root_bus(tmp
->bus
))
100 if (tmp
->bus
->number
>= pdev
->bus
->number
&&
101 tmp
->bus
->number
<= max_busnr
) {
103 decodes
|= VGA_RSRC_LEGACY_IO
| VGA_RSRC_LEGACY_MEM
;
111 static inline bool vfio_pci_is_vga(struct pci_dev
*pdev
)
113 return (pdev
->class >> 8) == PCI_CLASS_DISPLAY_VGA
;
116 static void vfio_pci_probe_mmaps(struct vfio_pci_device
*vdev
)
118 struct resource
*res
;
120 struct vfio_pci_dummy_resource
*dummy_res
;
122 INIT_LIST_HEAD(&vdev
->dummy_resources_list
);
124 for (i
= 0; i
< PCI_STD_NUM_BARS
; i
++) {
125 int bar
= i
+ PCI_STD_RESOURCES
;
127 res
= &vdev
->pdev
->resource
[bar
];
129 if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP
))
132 if (!(res
->flags
& IORESOURCE_MEM
))
136 * The PCI core shouldn't set up a resource with a
137 * type but zero size. But there may be bugs that
138 * cause us to do that.
140 if (!resource_size(res
))
143 if (resource_size(res
) >= PAGE_SIZE
) {
144 vdev
->bar_mmap_supported
[bar
] = true;
148 if (!(res
->start
& ~PAGE_MASK
)) {
150 * Add a dummy resource to reserve the remainder
151 * of the exclusive page in case that hot-add
152 * device's bar is assigned into it.
154 dummy_res
= kzalloc(sizeof(*dummy_res
), GFP_KERNEL
);
155 if (dummy_res
== NULL
)
158 dummy_res
->resource
.name
= "vfio sub-page reserved";
159 dummy_res
->resource
.start
= res
->end
+ 1;
160 dummy_res
->resource
.end
= res
->start
+ PAGE_SIZE
- 1;
161 dummy_res
->resource
.flags
= res
->flags
;
162 if (request_resource(res
->parent
,
163 &dummy_res
->resource
)) {
167 dummy_res
->index
= bar
;
168 list_add(&dummy_res
->res_next
,
169 &vdev
->dummy_resources_list
);
170 vdev
->bar_mmap_supported
[bar
] = true;
174 * Here we don't handle the case when the BAR is not page
175 * aligned because we can't expect the BAR will be
176 * assigned into the same location in a page in guest
177 * when we passthrough the BAR. And it's hard to access
178 * this BAR in userspace because we have no way to get
179 * the BAR's location in a page.
182 vdev
->bar_mmap_supported
[bar
] = false;
186 static void vfio_pci_try_bus_reset(struct vfio_pci_device
*vdev
);
187 static void vfio_pci_disable(struct vfio_pci_device
*vdev
);
188 static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev
*pdev
, void *data
);
191 * INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
192 * _and_ the ability detect when the device is asserting INTx via PCI_STATUS.
193 * If a device implements the former but not the latter we would typically
194 * expect broken_intx_masking be set and require an exclusive interrupt.
195 * However since we do have control of the device's ability to assert INTx,
196 * we can instead pretend that the device does not implement INTx, virtualizing
197 * the pin register to report zero and maintaining DisINTx set on the host.
199 static bool vfio_pci_nointx(struct pci_dev
*pdev
)
201 switch (pdev
->vendor
) {
202 case PCI_VENDOR_ID_INTEL
:
203 switch (pdev
->device
) {
204 /* All i40e (XL710/X710/XXV710) 10/20/25/40GbE NICs */
207 case 0x1580 ... 0x1581:
208 case 0x1583 ... 0x158b:
209 case 0x37d0 ... 0x37d2:
219 static void vfio_pci_probe_power_state(struct vfio_pci_device
*vdev
)
221 struct pci_dev
*pdev
= vdev
->pdev
;
227 pci_read_config_word(pdev
, pdev
->pm_cap
+ PCI_PM_CTRL
, &pmcsr
);
229 vdev
->needs_pm_restore
= !(pmcsr
& PCI_PM_CTRL_NO_SOFT_RESET
);
233 * pci_set_power_state() wrapper handling devices which perform a soft reset on
234 * D3->D0 transition. Save state prior to D0/1/2->D3, stash it on the vdev,
235 * restore when returned to D0. Saved separately from pci_saved_state for use
236 * by PM capability emulation and separately from pci_dev internal saved state
237 * to avoid it being overwritten and consumed around other resets.
239 int vfio_pci_set_power_state(struct vfio_pci_device
*vdev
, pci_power_t state
)
241 struct pci_dev
*pdev
= vdev
->pdev
;
242 bool needs_restore
= false, needs_save
= false;
245 if (vdev
->needs_pm_restore
) {
246 if (pdev
->current_state
< PCI_D3hot
&& state
>= PCI_D3hot
) {
247 pci_save_state(pdev
);
251 if (pdev
->current_state
>= PCI_D3hot
&& state
<= PCI_D0
)
252 needs_restore
= true;
255 ret
= pci_set_power_state(pdev
, state
);
258 /* D3 might be unsupported via quirk, skip unless in D3 */
259 if (needs_save
&& pdev
->current_state
>= PCI_D3hot
) {
260 vdev
->pm_save
= pci_store_saved_state(pdev
);
261 } else if (needs_restore
) {
262 pci_load_and_free_saved_state(pdev
, &vdev
->pm_save
);
263 pci_restore_state(pdev
);
270 static int vfio_pci_enable(struct vfio_pci_device
*vdev
)
272 struct pci_dev
*pdev
= vdev
->pdev
;
277 vfio_pci_set_power_state(vdev
, PCI_D0
);
279 /* Don't allow our initial saved state to include busmaster */
280 pci_clear_master(pdev
);
282 ret
= pci_enable_device(pdev
);
286 /* If reset fails because of the device lock, fail this path entirely */
287 ret
= pci_try_reset_function(pdev
);
288 if (ret
== -EAGAIN
) {
289 pci_disable_device(pdev
);
293 vdev
->reset_works
= !ret
;
294 pci_save_state(pdev
);
295 vdev
->pci_saved_state
= pci_store_saved_state(pdev
);
296 if (!vdev
->pci_saved_state
)
297 pci_dbg(pdev
, "%s: Couldn't store saved state\n", __func__
);
299 if (likely(!nointxmask
)) {
300 if (vfio_pci_nointx(pdev
)) {
301 pci_info(pdev
, "Masking broken INTx support\n");
305 vdev
->pci_2_3
= pci_intx_mask_supported(pdev
);
308 pci_read_config_word(pdev
, PCI_COMMAND
, &cmd
);
309 if (vdev
->pci_2_3
&& (cmd
& PCI_COMMAND_INTX_DISABLE
)) {
310 cmd
&= ~PCI_COMMAND_INTX_DISABLE
;
311 pci_write_config_word(pdev
, PCI_COMMAND
, cmd
);
314 ret
= vfio_config_init(vdev
);
316 kfree(vdev
->pci_saved_state
);
317 vdev
->pci_saved_state
= NULL
;
318 pci_disable_device(pdev
);
322 msix_pos
= pdev
->msix_cap
;
327 pci_read_config_word(pdev
, msix_pos
+ PCI_MSIX_FLAGS
, &flags
);
328 pci_read_config_dword(pdev
, msix_pos
+ PCI_MSIX_TABLE
, &table
);
330 vdev
->msix_bar
= table
& PCI_MSIX_TABLE_BIR
;
331 vdev
->msix_offset
= table
& PCI_MSIX_TABLE_OFFSET
;
332 vdev
->msix_size
= ((flags
& PCI_MSIX_FLAGS_QSIZE
) + 1) * 16;
334 vdev
->msix_bar
= 0xFF;
336 if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev
))
337 vdev
->has_vga
= true;
340 if (vfio_pci_is_vga(pdev
) &&
341 pdev
->vendor
== PCI_VENDOR_ID_INTEL
&&
342 IS_ENABLED(CONFIG_VFIO_PCI_IGD
)) {
343 ret
= vfio_pci_igd_init(vdev
);
345 pci_warn(pdev
, "Failed to setup Intel IGD regions\n");
350 if (pdev
->vendor
== PCI_VENDOR_ID_NVIDIA
&&
351 IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2
)) {
352 ret
= vfio_pci_nvdia_v100_nvlink2_init(vdev
);
353 if (ret
&& ret
!= -ENODEV
) {
354 pci_warn(pdev
, "Failed to setup NVIDIA NV2 RAM region\n");
359 if (pdev
->vendor
== PCI_VENDOR_ID_IBM
&&
360 IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2
)) {
361 ret
= vfio_pci_ibm_npu2_init(vdev
);
362 if (ret
&& ret
!= -ENODEV
) {
363 pci_warn(pdev
, "Failed to setup NVIDIA NV2 ATSD region\n");
368 vfio_pci_probe_mmaps(vdev
);
373 vfio_pci_disable(vdev
);
377 static void vfio_pci_disable(struct vfio_pci_device
*vdev
)
379 struct pci_dev
*pdev
= vdev
->pdev
;
380 struct vfio_pci_dummy_resource
*dummy_res
, *tmp
;
381 struct vfio_pci_ioeventfd
*ioeventfd
, *ioeventfd_tmp
;
384 /* Stop the device from further DMA */
385 pci_clear_master(pdev
);
387 vfio_pci_set_irqs_ioctl(vdev
, VFIO_IRQ_SET_DATA_NONE
|
388 VFIO_IRQ_SET_ACTION_TRIGGER
,
389 vdev
->irq_type
, 0, 0, NULL
);
391 /* Device closed, don't need mutex here */
392 list_for_each_entry_safe(ioeventfd
, ioeventfd_tmp
,
393 &vdev
->ioeventfds_list
, next
) {
394 vfio_virqfd_disable(&ioeventfd
->virqfd
);
395 list_del(&ioeventfd
->next
);
398 vdev
->ioeventfds_nr
= 0;
400 vdev
->virq_disabled
= false;
402 for (i
= 0; i
< vdev
->num_regions
; i
++)
403 vdev
->region
[i
].ops
->release(vdev
, &vdev
->region
[i
]);
405 vdev
->num_regions
= 0;
407 vdev
->region
= NULL
; /* don't krealloc a freed pointer */
409 vfio_config_free(vdev
);
411 for (i
= 0; i
< PCI_STD_NUM_BARS
; i
++) {
412 bar
= i
+ PCI_STD_RESOURCES
;
413 if (!vdev
->barmap
[bar
])
415 pci_iounmap(pdev
, vdev
->barmap
[bar
]);
416 pci_release_selected_regions(pdev
, 1 << bar
);
417 vdev
->barmap
[bar
] = NULL
;
420 list_for_each_entry_safe(dummy_res
, tmp
,
421 &vdev
->dummy_resources_list
, res_next
) {
422 list_del(&dummy_res
->res_next
);
423 release_resource(&dummy_res
->resource
);
427 vdev
->needs_reset
= true;
430 * If we have saved state, restore it. If we can reset the device,
431 * even better. Resetting with current state seems better than
432 * nothing, but saving and restoring current state without reset
435 if (pci_load_and_free_saved_state(pdev
, &vdev
->pci_saved_state
)) {
436 pci_info(pdev
, "%s: Couldn't reload saved state\n", __func__
);
438 if (!vdev
->reset_works
)
441 pci_save_state(pdev
);
445 * Disable INTx and MSI, presumably to avoid spurious interrupts
446 * during reset. Stolen from pci_reset_function()
448 pci_write_config_word(pdev
, PCI_COMMAND
, PCI_COMMAND_INTX_DISABLE
);
451 * Try to get the locks ourselves to prevent a deadlock. The
452 * success of this is dependent on being able to lock the device,
453 * which is not always possible.
454 * We can not use the "try" reset interface here, which will
455 * overwrite the previously restored configuration information.
457 if (vdev
->reset_works
&& pci_cfg_access_trylock(pdev
)) {
458 if (device_trylock(&pdev
->dev
)) {
459 if (!__pci_reset_function_locked(pdev
))
460 vdev
->needs_reset
= false;
461 device_unlock(&pdev
->dev
);
463 pci_cfg_access_unlock(pdev
);
466 pci_restore_state(pdev
);
468 pci_disable_device(pdev
);
470 vfio_pci_try_bus_reset(vdev
);
472 if (!disable_idle_d3
)
473 vfio_pci_set_power_state(vdev
, PCI_D3hot
);
476 static struct pci_driver vfio_pci_driver
;
478 static struct vfio_pci_device
*get_pf_vdev(struct vfio_pci_device
*vdev
,
479 struct vfio_device
**pf_dev
)
481 struct pci_dev
*physfn
= pci_physfn(vdev
->pdev
);
483 if (!vdev
->pdev
->is_virtfn
)
486 *pf_dev
= vfio_device_get_from_dev(&physfn
->dev
);
490 if (pci_dev_driver(physfn
) != &vfio_pci_driver
) {
491 vfio_device_put(*pf_dev
);
495 return vfio_device_data(*pf_dev
);
498 static void vfio_pci_vf_token_user_add(struct vfio_pci_device
*vdev
, int val
)
500 struct vfio_device
*pf_dev
;
501 struct vfio_pci_device
*pf_vdev
= get_pf_vdev(vdev
, &pf_dev
);
506 mutex_lock(&pf_vdev
->vf_token
->lock
);
507 pf_vdev
->vf_token
->users
+= val
;
508 WARN_ON(pf_vdev
->vf_token
->users
< 0);
509 mutex_unlock(&pf_vdev
->vf_token
->lock
);
511 vfio_device_put(pf_dev
);
514 static void vfio_pci_release(void *device_data
)
516 struct vfio_pci_device
*vdev
= device_data
;
518 mutex_lock(&vdev
->reflck
->lock
);
520 if (!(--vdev
->refcnt
)) {
521 vfio_pci_vf_token_user_add(vdev
, -1);
522 vfio_spapr_pci_eeh_release(vdev
->pdev
);
523 vfio_pci_disable(vdev
);
524 if (vdev
->err_trigger
)
525 eventfd_ctx_put(vdev
->err_trigger
);
526 if (vdev
->req_trigger
)
527 eventfd_ctx_put(vdev
->req_trigger
);
530 mutex_unlock(&vdev
->reflck
->lock
);
532 module_put(THIS_MODULE
);
535 static int vfio_pci_open(void *device_data
)
537 struct vfio_pci_device
*vdev
= device_data
;
540 if (!try_module_get(THIS_MODULE
))
543 mutex_lock(&vdev
->reflck
->lock
);
546 ret
= vfio_pci_enable(vdev
);
550 vfio_spapr_pci_eeh_open(vdev
->pdev
);
551 vfio_pci_vf_token_user_add(vdev
, 1);
555 mutex_unlock(&vdev
->reflck
->lock
);
557 module_put(THIS_MODULE
);
561 static int vfio_pci_get_irq_count(struct vfio_pci_device
*vdev
, int irq_type
)
563 if (irq_type
== VFIO_PCI_INTX_IRQ_INDEX
) {
566 if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX
) ||
567 vdev
->nointx
|| vdev
->pdev
->is_virtfn
)
570 pci_read_config_byte(vdev
->pdev
, PCI_INTERRUPT_PIN
, &pin
);
573 } else if (irq_type
== VFIO_PCI_MSI_IRQ_INDEX
) {
577 pos
= vdev
->pdev
->msi_cap
;
579 pci_read_config_word(vdev
->pdev
,
580 pos
+ PCI_MSI_FLAGS
, &flags
);
581 return 1 << ((flags
& PCI_MSI_FLAGS_QMASK
) >> 1);
583 } else if (irq_type
== VFIO_PCI_MSIX_IRQ_INDEX
) {
587 pos
= vdev
->pdev
->msix_cap
;
589 pci_read_config_word(vdev
->pdev
,
590 pos
+ PCI_MSIX_FLAGS
, &flags
);
592 return (flags
& PCI_MSIX_FLAGS_QSIZE
) + 1;
594 } else if (irq_type
== VFIO_PCI_ERR_IRQ_INDEX
) {
595 if (pci_is_pcie(vdev
->pdev
))
597 } else if (irq_type
== VFIO_PCI_REQ_IRQ_INDEX
) {
604 static int vfio_pci_count_devs(struct pci_dev
*pdev
, void *data
)
610 struct vfio_pci_fill_info
{
613 struct vfio_pci_dependent_device
*devices
;
616 static int vfio_pci_fill_devs(struct pci_dev
*pdev
, void *data
)
618 struct vfio_pci_fill_info
*fill
= data
;
619 struct iommu_group
*iommu_group
;
621 if (fill
->cur
== fill
->max
)
622 return -EAGAIN
; /* Something changed, try again */
624 iommu_group
= iommu_group_get(&pdev
->dev
);
626 return -EPERM
; /* Cannot reset non-isolated devices */
628 fill
->devices
[fill
->cur
].group_id
= iommu_group_id(iommu_group
);
629 fill
->devices
[fill
->cur
].segment
= pci_domain_nr(pdev
->bus
);
630 fill
->devices
[fill
->cur
].bus
= pdev
->bus
->number
;
631 fill
->devices
[fill
->cur
].devfn
= pdev
->devfn
;
633 iommu_group_put(iommu_group
);
637 struct vfio_pci_group_entry
{
638 struct vfio_group
*group
;
642 struct vfio_pci_group_info
{
644 struct vfio_pci_group_entry
*groups
;
647 static int vfio_pci_validate_devs(struct pci_dev
*pdev
, void *data
)
649 struct vfio_pci_group_info
*info
= data
;
650 struct iommu_group
*group
;
653 group
= iommu_group_get(&pdev
->dev
);
657 id
= iommu_group_id(group
);
659 for (i
= 0; i
< info
->count
; i
++)
660 if (info
->groups
[i
].id
== id
)
663 iommu_group_put(group
);
665 return (i
== info
->count
) ? -EINVAL
: 0;
668 static bool vfio_pci_dev_below_slot(struct pci_dev
*pdev
, struct pci_slot
*slot
)
670 for (; pdev
; pdev
= pdev
->bus
->self
)
671 if (pdev
->bus
== slot
->bus
)
672 return (pdev
->slot
== slot
);
676 struct vfio_pci_walk_info
{
677 int (*fn
)(struct pci_dev
*, void *data
);
679 struct pci_dev
*pdev
;
684 static int vfio_pci_walk_wrapper(struct pci_dev
*pdev
, void *data
)
686 struct vfio_pci_walk_info
*walk
= data
;
688 if (!walk
->slot
|| vfio_pci_dev_below_slot(pdev
, walk
->pdev
->slot
))
689 walk
->ret
= walk
->fn(pdev
, walk
->data
);
694 static int vfio_pci_for_each_slot_or_bus(struct pci_dev
*pdev
,
695 int (*fn
)(struct pci_dev
*,
696 void *data
), void *data
,
699 struct vfio_pci_walk_info walk
= {
700 .fn
= fn
, .data
= data
, .pdev
= pdev
, .slot
= slot
, .ret
= 0,
703 pci_walk_bus(pdev
->bus
, vfio_pci_walk_wrapper
, &walk
);
708 static int msix_mmappable_cap(struct vfio_pci_device
*vdev
,
709 struct vfio_info_cap
*caps
)
711 struct vfio_info_cap_header header
= {
712 .id
= VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
,
716 return vfio_info_add_capability(caps
, &header
, sizeof(header
));
719 int vfio_pci_register_dev_region(struct vfio_pci_device
*vdev
,
720 unsigned int type
, unsigned int subtype
,
721 const struct vfio_pci_regops
*ops
,
722 size_t size
, u32 flags
, void *data
)
724 struct vfio_pci_region
*region
;
726 region
= krealloc(vdev
->region
,
727 (vdev
->num_regions
+ 1) * sizeof(*region
),
732 vdev
->region
= region
;
733 vdev
->region
[vdev
->num_regions
].type
= type
;
734 vdev
->region
[vdev
->num_regions
].subtype
= subtype
;
735 vdev
->region
[vdev
->num_regions
].ops
= ops
;
736 vdev
->region
[vdev
->num_regions
].size
= size
;
737 vdev
->region
[vdev
->num_regions
].flags
= flags
;
738 vdev
->region
[vdev
->num_regions
].data
= data
;
745 struct vfio_devices
{
746 struct vfio_device
**devices
;
751 static long vfio_pci_ioctl(void *device_data
,
752 unsigned int cmd
, unsigned long arg
)
754 struct vfio_pci_device
*vdev
= device_data
;
757 if (cmd
== VFIO_DEVICE_GET_INFO
) {
758 struct vfio_device_info info
;
760 minsz
= offsetofend(struct vfio_device_info
, num_irqs
);
762 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
765 if (info
.argsz
< minsz
)
768 info
.flags
= VFIO_DEVICE_FLAGS_PCI
;
770 if (vdev
->reset_works
)
771 info
.flags
|= VFIO_DEVICE_FLAGS_RESET
;
773 info
.num_regions
= VFIO_PCI_NUM_REGIONS
+ vdev
->num_regions
;
774 info
.num_irqs
= VFIO_PCI_NUM_IRQS
;
776 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
779 } else if (cmd
== VFIO_DEVICE_GET_REGION_INFO
) {
780 struct pci_dev
*pdev
= vdev
->pdev
;
781 struct vfio_region_info info
;
782 struct vfio_info_cap caps
= { .buf
= NULL
, .size
= 0 };
785 minsz
= offsetofend(struct vfio_region_info
, offset
);
787 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
790 if (info
.argsz
< minsz
)
793 switch (info
.index
) {
794 case VFIO_PCI_CONFIG_REGION_INDEX
:
795 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
796 info
.size
= pdev
->cfg_size
;
797 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
798 VFIO_REGION_INFO_FLAG_WRITE
;
800 case VFIO_PCI_BAR0_REGION_INDEX
... VFIO_PCI_BAR5_REGION_INDEX
:
801 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
802 info
.size
= pci_resource_len(pdev
, info
.index
);
808 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
809 VFIO_REGION_INFO_FLAG_WRITE
;
810 if (vdev
->bar_mmap_supported
[info
.index
]) {
811 info
.flags
|= VFIO_REGION_INFO_FLAG_MMAP
;
812 if (info
.index
== vdev
->msix_bar
) {
813 ret
= msix_mmappable_cap(vdev
, &caps
);
820 case VFIO_PCI_ROM_REGION_INDEX
:
826 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
829 /* Report the BAR size, not the ROM size */
830 info
.size
= pci_resource_len(pdev
, info
.index
);
832 /* Shadow ROMs appear as PCI option ROMs */
833 if (pdev
->resource
[PCI_ROM_RESOURCE
].flags
&
834 IORESOURCE_ROM_SHADOW
)
841 * Is it really there? Enable memory decode for
842 * implicit access in pci_map_rom().
844 cmd
= vfio_pci_memory_lock_and_enable(vdev
);
845 io
= pci_map_rom(pdev
, &size
);
847 info
.flags
= VFIO_REGION_INFO_FLAG_READ
;
848 pci_unmap_rom(pdev
, io
);
852 vfio_pci_memory_unlock_and_restore(vdev
, cmd
);
856 case VFIO_PCI_VGA_REGION_INDEX
:
860 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
862 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
863 VFIO_REGION_INFO_FLAG_WRITE
;
868 struct vfio_region_info_cap_type cap_type
= {
869 .header
.id
= VFIO_REGION_INFO_CAP_TYPE
,
870 .header
.version
= 1 };
873 VFIO_PCI_NUM_REGIONS
+ vdev
->num_regions
)
875 info
.index
= array_index_nospec(info
.index
,
876 VFIO_PCI_NUM_REGIONS
+
879 i
= info
.index
- VFIO_PCI_NUM_REGIONS
;
881 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
882 info
.size
= vdev
->region
[i
].size
;
883 info
.flags
= vdev
->region
[i
].flags
;
885 cap_type
.type
= vdev
->region
[i
].type
;
886 cap_type
.subtype
= vdev
->region
[i
].subtype
;
888 ret
= vfio_info_add_capability(&caps
, &cap_type
.header
,
893 if (vdev
->region
[i
].ops
->add_capability
) {
894 ret
= vdev
->region
[i
].ops
->add_capability(vdev
,
895 &vdev
->region
[i
], &caps
);
903 info
.flags
|= VFIO_REGION_INFO_FLAG_CAPS
;
904 if (info
.argsz
< sizeof(info
) + caps
.size
) {
905 info
.argsz
= sizeof(info
) + caps
.size
;
908 vfio_info_cap_shift(&caps
, sizeof(info
));
909 if (copy_to_user((void __user
*)arg
+
910 sizeof(info
), caps
.buf
,
915 info
.cap_offset
= sizeof(info
);
921 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
924 } else if (cmd
== VFIO_DEVICE_GET_IRQ_INFO
) {
925 struct vfio_irq_info info
;
927 minsz
= offsetofend(struct vfio_irq_info
, count
);
929 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
932 if (info
.argsz
< minsz
|| info
.index
>= VFIO_PCI_NUM_IRQS
)
935 switch (info
.index
) {
936 case VFIO_PCI_INTX_IRQ_INDEX
... VFIO_PCI_MSIX_IRQ_INDEX
:
937 case VFIO_PCI_REQ_IRQ_INDEX
:
939 case VFIO_PCI_ERR_IRQ_INDEX
:
940 if (pci_is_pcie(vdev
->pdev
))
947 info
.flags
= VFIO_IRQ_INFO_EVENTFD
;
949 info
.count
= vfio_pci_get_irq_count(vdev
, info
.index
);
951 if (info
.index
== VFIO_PCI_INTX_IRQ_INDEX
)
952 info
.flags
|= (VFIO_IRQ_INFO_MASKABLE
|
953 VFIO_IRQ_INFO_AUTOMASKED
);
955 info
.flags
|= VFIO_IRQ_INFO_NORESIZE
;
957 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
960 } else if (cmd
== VFIO_DEVICE_SET_IRQS
) {
961 struct vfio_irq_set hdr
;
964 size_t data_size
= 0;
966 minsz
= offsetofend(struct vfio_irq_set
, count
);
968 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
971 max
= vfio_pci_get_irq_count(vdev
, hdr
.index
);
973 ret
= vfio_set_irqs_validate_and_prepare(&hdr
, max
,
974 VFIO_PCI_NUM_IRQS
, &data_size
);
979 data
= memdup_user((void __user
*)(arg
+ minsz
),
982 return PTR_ERR(data
);
985 mutex_lock(&vdev
->igate
);
987 ret
= vfio_pci_set_irqs_ioctl(vdev
, hdr
.flags
, hdr
.index
,
988 hdr
.start
, hdr
.count
, data
);
990 mutex_unlock(&vdev
->igate
);
995 } else if (cmd
== VFIO_DEVICE_RESET
) {
998 if (!vdev
->reset_works
)
1001 vfio_pci_zap_and_down_write_memory_lock(vdev
);
1002 ret
= pci_try_reset_function(vdev
->pdev
);
1003 up_write(&vdev
->memory_lock
);
1007 } else if (cmd
== VFIO_DEVICE_GET_PCI_HOT_RESET_INFO
) {
1008 struct vfio_pci_hot_reset_info hdr
;
1009 struct vfio_pci_fill_info fill
= { 0 };
1010 struct vfio_pci_dependent_device
*devices
= NULL
;
1014 minsz
= offsetofend(struct vfio_pci_hot_reset_info
, count
);
1016 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
1019 if (hdr
.argsz
< minsz
)
1024 /* Can we do a slot or bus reset or neither? */
1025 if (!pci_probe_reset_slot(vdev
->pdev
->slot
))
1027 else if (pci_probe_reset_bus(vdev
->pdev
->bus
))
1030 /* How many devices are affected? */
1031 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
1032 vfio_pci_count_devs
,
1037 WARN_ON(!fill
.max
); /* Should always be at least one */
1040 * If there's enough space, fill it now, otherwise return
1041 * -ENOSPC and the number of devices affected.
1043 if (hdr
.argsz
< sizeof(hdr
) + (fill
.max
* sizeof(*devices
))) {
1045 hdr
.count
= fill
.max
;
1046 goto reset_info_exit
;
1049 devices
= kcalloc(fill
.max
, sizeof(*devices
), GFP_KERNEL
);
1053 fill
.devices
= devices
;
1055 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
1060 * If a device was removed between counting and filling,
1061 * we may come up short of fill.max. If a device was
1062 * added, we'll have a return of -EAGAIN above.
1065 hdr
.count
= fill
.cur
;
1068 if (copy_to_user((void __user
*)arg
, &hdr
, minsz
))
1072 if (copy_to_user((void __user
*)(arg
+ minsz
), devices
,
1073 hdr
.count
* sizeof(*devices
)))
1080 } else if (cmd
== VFIO_DEVICE_PCI_HOT_RESET
) {
1081 struct vfio_pci_hot_reset hdr
;
1083 struct vfio_pci_group_entry
*groups
;
1084 struct vfio_pci_group_info info
;
1085 struct vfio_devices devs
= { .cur_index
= 0 };
1087 int i
, group_idx
, mem_idx
= 0, count
= 0, ret
= 0;
1089 minsz
= offsetofend(struct vfio_pci_hot_reset
, count
);
1091 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
1094 if (hdr
.argsz
< minsz
|| hdr
.flags
)
1097 /* Can we do a slot or bus reset or neither? */
1098 if (!pci_probe_reset_slot(vdev
->pdev
->slot
))
1100 else if (pci_probe_reset_bus(vdev
->pdev
->bus
))
1104 * We can't let userspace give us an arbitrarily large
1105 * buffer to copy, so verify how many we think there
1106 * could be. Note groups can have multiple devices so
1107 * one group per device is the max.
1109 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
1110 vfio_pci_count_devs
,
1115 /* Somewhere between 1 and count is OK */
1116 if (!hdr
.count
|| hdr
.count
> count
)
1119 group_fds
= kcalloc(hdr
.count
, sizeof(*group_fds
), GFP_KERNEL
);
1120 groups
= kcalloc(hdr
.count
, sizeof(*groups
), GFP_KERNEL
);
1121 if (!group_fds
|| !groups
) {
1127 if (copy_from_user(group_fds
, (void __user
*)(arg
+ minsz
),
1128 hdr
.count
* sizeof(*group_fds
))) {
1135 * For each group_fd, get the group through the vfio external
1136 * user interface and store the group and iommu ID. This
1137 * ensures the group is held across the reset.
1139 for (group_idx
= 0; group_idx
< hdr
.count
; group_idx
++) {
1140 struct vfio_group
*group
;
1141 struct fd f
= fdget(group_fds
[group_idx
]);
1147 group
= vfio_group_get_external_user(f
.file
);
1149 if (IS_ERR(group
)) {
1150 ret
= PTR_ERR(group
);
1154 groups
[group_idx
].group
= group
;
1155 groups
[group_idx
].id
=
1156 vfio_external_user_iommu_id(group
);
1161 /* release reference to groups on error */
1163 goto hot_reset_release
;
1165 info
.count
= hdr
.count
;
1166 info
.groups
= groups
;
1169 * Test whether all the affected devices are contained
1170 * by the set of groups provided by the user.
1172 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
1173 vfio_pci_validate_devs
,
1176 goto hot_reset_release
;
1178 devs
.max_index
= count
;
1179 devs
.devices
= kcalloc(count
, sizeof(struct vfio_device
*),
1181 if (!devs
.devices
) {
1183 goto hot_reset_release
;
1187 * We need to get memory_lock for each device, but devices
1188 * can share mmap_lock, therefore we need to zap and hold
1189 * the vma_lock for each device, and only then get each
1192 ret
= vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
1193 vfio_pci_try_zap_and_vma_lock_cb
,
1196 goto hot_reset_release
;
1198 for (; mem_idx
< devs
.cur_index
; mem_idx
++) {
1199 struct vfio_pci_device
*tmp
;
1201 tmp
= vfio_device_data(devs
.devices
[mem_idx
]);
1203 ret
= down_write_trylock(&tmp
->memory_lock
);
1206 goto hot_reset_release
;
1208 mutex_unlock(&tmp
->vma_lock
);
1211 /* User has access, do the reset */
1212 ret
= pci_reset_bus(vdev
->pdev
);
1215 for (i
= 0; i
< devs
.cur_index
; i
++) {
1216 struct vfio_device
*device
;
1217 struct vfio_pci_device
*tmp
;
1219 device
= devs
.devices
[i
];
1220 tmp
= vfio_device_data(device
);
1223 up_write(&tmp
->memory_lock
);
1225 mutex_unlock(&tmp
->vma_lock
);
1226 vfio_device_put(device
);
1228 kfree(devs
.devices
);
1230 for (group_idx
--; group_idx
>= 0; group_idx
--)
1231 vfio_group_put_external_user(groups
[group_idx
].group
);
1235 } else if (cmd
== VFIO_DEVICE_IOEVENTFD
) {
1236 struct vfio_device_ioeventfd ioeventfd
;
1239 minsz
= offsetofend(struct vfio_device_ioeventfd
, fd
);
1241 if (copy_from_user(&ioeventfd
, (void __user
*)arg
, minsz
))
1244 if (ioeventfd
.argsz
< minsz
)
1247 if (ioeventfd
.flags
& ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK
)
1250 count
= ioeventfd
.flags
& VFIO_DEVICE_IOEVENTFD_SIZE_MASK
;
1252 if (hweight8(count
) != 1 || ioeventfd
.fd
< -1)
1255 return vfio_pci_ioeventfd(vdev
, ioeventfd
.offset
,
1256 ioeventfd
.data
, count
, ioeventfd
.fd
);
1257 } else if (cmd
== VFIO_DEVICE_FEATURE
) {
1258 struct vfio_device_feature feature
;
1261 minsz
= offsetofend(struct vfio_device_feature
, flags
);
1263 if (copy_from_user(&feature
, (void __user
*)arg
, minsz
))
1266 if (feature
.argsz
< minsz
)
1269 /* Check unknown flags */
1270 if (feature
.flags
& ~(VFIO_DEVICE_FEATURE_MASK
|
1271 VFIO_DEVICE_FEATURE_SET
|
1272 VFIO_DEVICE_FEATURE_GET
|
1273 VFIO_DEVICE_FEATURE_PROBE
))
1276 /* GET & SET are mutually exclusive except with PROBE */
1277 if (!(feature
.flags
& VFIO_DEVICE_FEATURE_PROBE
) &&
1278 (feature
.flags
& VFIO_DEVICE_FEATURE_SET
) &&
1279 (feature
.flags
& VFIO_DEVICE_FEATURE_GET
))
1282 switch (feature
.flags
& VFIO_DEVICE_FEATURE_MASK
) {
1283 case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN
:
1284 if (!vdev
->vf_token
)
1288 * We do not support GET of the VF Token UUID as this
1289 * could expose the token of the previous device user.
1291 if (feature
.flags
& VFIO_DEVICE_FEATURE_GET
)
1294 if (feature
.flags
& VFIO_DEVICE_FEATURE_PROBE
)
1297 /* Don't SET unless told to do so */
1298 if (!(feature
.flags
& VFIO_DEVICE_FEATURE_SET
))
1301 if (feature
.argsz
< minsz
+ sizeof(uuid
))
1304 if (copy_from_user(&uuid
, (void __user
*)(arg
+ minsz
),
1308 mutex_lock(&vdev
->vf_token
->lock
);
1309 uuid_copy(&vdev
->vf_token
->uuid
, &uuid
);
1310 mutex_unlock(&vdev
->vf_token
->lock
);
1321 static ssize_t
vfio_pci_rw(void *device_data
, char __user
*buf
,
1322 size_t count
, loff_t
*ppos
, bool iswrite
)
1324 unsigned int index
= VFIO_PCI_OFFSET_TO_INDEX(*ppos
);
1325 struct vfio_pci_device
*vdev
= device_data
;
1327 if (index
>= VFIO_PCI_NUM_REGIONS
+ vdev
->num_regions
)
1331 case VFIO_PCI_CONFIG_REGION_INDEX
:
1332 return vfio_pci_config_rw(vdev
, buf
, count
, ppos
, iswrite
);
1334 case VFIO_PCI_ROM_REGION_INDEX
:
1337 return vfio_pci_bar_rw(vdev
, buf
, count
, ppos
, false);
1339 case VFIO_PCI_BAR0_REGION_INDEX
... VFIO_PCI_BAR5_REGION_INDEX
:
1340 return vfio_pci_bar_rw(vdev
, buf
, count
, ppos
, iswrite
);
1342 case VFIO_PCI_VGA_REGION_INDEX
:
1343 return vfio_pci_vga_rw(vdev
, buf
, count
, ppos
, iswrite
);
1345 index
-= VFIO_PCI_NUM_REGIONS
;
1346 return vdev
->region
[index
].ops
->rw(vdev
, buf
,
1347 count
, ppos
, iswrite
);
1353 static ssize_t
vfio_pci_read(void *device_data
, char __user
*buf
,
1354 size_t count
, loff_t
*ppos
)
1359 return vfio_pci_rw(device_data
, buf
, count
, ppos
, false);
1362 static ssize_t
vfio_pci_write(void *device_data
, const char __user
*buf
,
1363 size_t count
, loff_t
*ppos
)
1368 return vfio_pci_rw(device_data
, (char __user
*)buf
, count
, ppos
, true);
1371 /* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */
1372 static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device
*vdev
, bool try)
1374 struct vfio_pci_mmap_vma
*mmap_vma
, *tmp
;
1378 * vma_lock is nested under mmap_lock for vm_ops callback paths.
1379 * The memory_lock semaphore is used by both code paths calling
1380 * into this function to zap vmas and the vm_ops.fault callback
1381 * to protect the memory enable state of the device.
1383 * When zapping vmas we need to maintain the mmap_lock => vma_lock
1384 * ordering, which requires using vma_lock to walk vma_list to
1385 * acquire an mm, then dropping vma_lock to get the mmap_lock and
1386 * reacquiring vma_lock. This logic is derived from similar
1387 * requirements in uverbs_user_mmap_disassociate().
1389 * mmap_lock must always be the top-level lock when it is taken.
1390 * Therefore we can only hold the memory_lock write lock when
1391 * vma_list is empty, as we'd need to take mmap_lock to clear
1392 * entries. vma_list can only be guaranteed empty when holding
1393 * vma_lock, thus memory_lock is nested under vma_lock.
1395 * This enables the vm_ops.fault callback to acquire vma_lock,
1396 * followed by memory_lock read lock, while already holding
1397 * mmap_lock without risk of deadlock.
1400 struct mm_struct
*mm
= NULL
;
1403 if (!mutex_trylock(&vdev
->vma_lock
))
1406 mutex_lock(&vdev
->vma_lock
);
1408 while (!list_empty(&vdev
->vma_list
)) {
1409 mmap_vma
= list_first_entry(&vdev
->vma_list
,
1410 struct vfio_pci_mmap_vma
,
1412 mm
= mmap_vma
->vma
->vm_mm
;
1413 if (mmget_not_zero(mm
))
1416 list_del(&mmap_vma
->vma_next
);
1422 mutex_unlock(&vdev
->vma_lock
);
1425 if (!mmap_read_trylock(mm
)) {
1432 if (mmget_still_valid(mm
)) {
1434 if (!mutex_trylock(&vdev
->vma_lock
)) {
1435 mmap_read_unlock(mm
);
1440 mutex_lock(&vdev
->vma_lock
);
1442 list_for_each_entry_safe(mmap_vma
, tmp
,
1443 &vdev
->vma_list
, vma_next
) {
1444 struct vm_area_struct
*vma
= mmap_vma
->vma
;
1446 if (vma
->vm_mm
!= mm
)
1449 list_del(&mmap_vma
->vma_next
);
1452 zap_vma_ptes(vma
, vma
->vm_start
,
1453 vma
->vm_end
- vma
->vm_start
);
1455 mutex_unlock(&vdev
->vma_lock
);
1457 mmap_read_unlock(mm
);
1462 void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_device
*vdev
)
1464 vfio_pci_zap_and_vma_lock(vdev
, false);
1465 down_write(&vdev
->memory_lock
);
1466 mutex_unlock(&vdev
->vma_lock
);
1469 u16
vfio_pci_memory_lock_and_enable(struct vfio_pci_device
*vdev
)
1473 down_write(&vdev
->memory_lock
);
1474 pci_read_config_word(vdev
->pdev
, PCI_COMMAND
, &cmd
);
1475 if (!(cmd
& PCI_COMMAND_MEMORY
))
1476 pci_write_config_word(vdev
->pdev
, PCI_COMMAND
,
1477 cmd
| PCI_COMMAND_MEMORY
);
1482 void vfio_pci_memory_unlock_and_restore(struct vfio_pci_device
*vdev
, u16 cmd
)
1484 pci_write_config_word(vdev
->pdev
, PCI_COMMAND
, cmd
);
1485 up_write(&vdev
->memory_lock
);
1488 /* Caller holds vma_lock */
1489 static int __vfio_pci_add_vma(struct vfio_pci_device
*vdev
,
1490 struct vm_area_struct
*vma
)
1492 struct vfio_pci_mmap_vma
*mmap_vma
;
1494 mmap_vma
= kmalloc(sizeof(*mmap_vma
), GFP_KERNEL
);
1498 mmap_vma
->vma
= vma
;
1499 list_add(&mmap_vma
->vma_next
, &vdev
->vma_list
);
1505 * Zap mmaps on open so that we can fault them in on access and therefore
1506 * our vma_list only tracks mappings accessed since last zap.
1508 static void vfio_pci_mmap_open(struct vm_area_struct
*vma
)
1510 zap_vma_ptes(vma
, vma
->vm_start
, vma
->vm_end
- vma
->vm_start
);
1513 static void vfio_pci_mmap_close(struct vm_area_struct
*vma
)
1515 struct vfio_pci_device
*vdev
= vma
->vm_private_data
;
1516 struct vfio_pci_mmap_vma
*mmap_vma
;
1518 mutex_lock(&vdev
->vma_lock
);
1519 list_for_each_entry(mmap_vma
, &vdev
->vma_list
, vma_next
) {
1520 if (mmap_vma
->vma
== vma
) {
1521 list_del(&mmap_vma
->vma_next
);
1526 mutex_unlock(&vdev
->vma_lock
);
1529 static vm_fault_t
vfio_pci_mmap_fault(struct vm_fault
*vmf
)
1531 struct vm_area_struct
*vma
= vmf
->vma
;
1532 struct vfio_pci_device
*vdev
= vma
->vm_private_data
;
1533 vm_fault_t ret
= VM_FAULT_NOPAGE
;
1535 mutex_lock(&vdev
->vma_lock
);
1536 down_read(&vdev
->memory_lock
);
1538 if (!__vfio_pci_memory_enabled(vdev
)) {
1539 ret
= VM_FAULT_SIGBUS
;
1540 mutex_unlock(&vdev
->vma_lock
);
1544 if (__vfio_pci_add_vma(vdev
, vma
)) {
1546 mutex_unlock(&vdev
->vma_lock
);
1550 mutex_unlock(&vdev
->vma_lock
);
1552 if (remap_pfn_range(vma
, vma
->vm_start
, vma
->vm_pgoff
,
1553 vma
->vm_end
- vma
->vm_start
, vma
->vm_page_prot
))
1554 ret
= VM_FAULT_SIGBUS
;
1557 up_read(&vdev
->memory_lock
);
1561 static const struct vm_operations_struct vfio_pci_mmap_ops
= {
1562 .open
= vfio_pci_mmap_open
,
1563 .close
= vfio_pci_mmap_close
,
1564 .fault
= vfio_pci_mmap_fault
,
1567 static int vfio_pci_mmap(void *device_data
, struct vm_area_struct
*vma
)
1569 struct vfio_pci_device
*vdev
= device_data
;
1570 struct pci_dev
*pdev
= vdev
->pdev
;
1572 u64 phys_len
, req_len
, pgoff
, req_start
;
1575 index
= vma
->vm_pgoff
>> (VFIO_PCI_OFFSET_SHIFT
- PAGE_SHIFT
);
1577 if (vma
->vm_end
< vma
->vm_start
)
1579 if ((vma
->vm_flags
& VM_SHARED
) == 0)
1581 if (index
>= VFIO_PCI_NUM_REGIONS
) {
1582 int regnum
= index
- VFIO_PCI_NUM_REGIONS
;
1583 struct vfio_pci_region
*region
= vdev
->region
+ regnum
;
1585 if (region
&& region
->ops
&& region
->ops
->mmap
&&
1586 (region
->flags
& VFIO_REGION_INFO_FLAG_MMAP
))
1587 return region
->ops
->mmap(vdev
, region
, vma
);
1590 if (index
>= VFIO_PCI_ROM_REGION_INDEX
)
1592 if (!vdev
->bar_mmap_supported
[index
])
1595 phys_len
= PAGE_ALIGN(pci_resource_len(pdev
, index
));
1596 req_len
= vma
->vm_end
- vma
->vm_start
;
1597 pgoff
= vma
->vm_pgoff
&
1598 ((1U << (VFIO_PCI_OFFSET_SHIFT
- PAGE_SHIFT
)) - 1);
1599 req_start
= pgoff
<< PAGE_SHIFT
;
1601 if (req_start
+ req_len
> phys_len
)
1605 * Even though we don't make use of the barmap for the mmap,
1606 * we need to request the region and the barmap tracks that.
1608 if (!vdev
->barmap
[index
]) {
1609 ret
= pci_request_selected_regions(pdev
,
1610 1 << index
, "vfio-pci");
1614 vdev
->barmap
[index
] = pci_iomap(pdev
, index
, 0);
1615 if (!vdev
->barmap
[index
]) {
1616 pci_release_selected_regions(pdev
, 1 << index
);
1621 vma
->vm_private_data
= vdev
;
1622 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
1623 vma
->vm_pgoff
= (pci_resource_start(pdev
, index
) >> PAGE_SHIFT
) + pgoff
;
1626 * See remap_pfn_range(), called from vfio_pci_fault() but we can't
1627 * change vm_flags within the fault handler. Set them now.
1629 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTEXPAND
| VM_DONTDUMP
;
1630 vma
->vm_ops
= &vfio_pci_mmap_ops
;
1635 static void vfio_pci_request(void *device_data
, unsigned int count
)
1637 struct vfio_pci_device
*vdev
= device_data
;
1638 struct pci_dev
*pdev
= vdev
->pdev
;
1640 mutex_lock(&vdev
->igate
);
1642 if (vdev
->req_trigger
) {
1644 pci_notice_ratelimited(pdev
,
1645 "Relaying device request to user (#%u)\n",
1647 eventfd_signal(vdev
->req_trigger
, 1);
1648 } else if (count
== 0) {
1650 "No device request channel registered, blocked until released by user\n");
1653 mutex_unlock(&vdev
->igate
);
1656 static int vfio_pci_validate_vf_token(struct vfio_pci_device
*vdev
,
1657 bool vf_token
, uuid_t
*uuid
)
1660 * There's always some degree of trust or collaboration between SR-IOV
1661 * PF and VFs, even if just that the PF hosts the SR-IOV capability and
1662 * can disrupt VFs with a reset, but often the PF has more explicit
1663 * access to deny service to the VF or access data passed through the
1664 * VF. We therefore require an opt-in via a shared VF token (UUID) to
1665 * represent this trust. This both prevents that a VF driver might
1666 * assume the PF driver is a trusted, in-kernel driver, and also that
1667 * a PF driver might be replaced with a rogue driver, unknown to in-use
1670 * Therefore when presented with a VF, if the PF is a vfio device and
1671 * it is bound to the vfio-pci driver, the user needs to provide a VF
1672 * token to access the device, in the form of appending a vf_token to
1673 * the device name, for example:
1675 * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
1677 * When presented with a PF which has VFs in use, the user must also
1678 * provide the current VF token to prove collaboration with existing
1679 * VF users. If VFs are not in use, the VF token provided for the PF
1680 * device will act to set the VF token.
1682 * If the VF token is provided but unused, an error is generated.
1684 if (!vdev
->pdev
->is_virtfn
&& !vdev
->vf_token
&& !vf_token
)
1685 return 0; /* No VF token provided or required */
1687 if (vdev
->pdev
->is_virtfn
) {
1688 struct vfio_device
*pf_dev
;
1689 struct vfio_pci_device
*pf_vdev
= get_pf_vdev(vdev
, &pf_dev
);
1694 return 0; /* PF is not vfio-pci, no VF token */
1696 pci_info_ratelimited(vdev
->pdev
,
1697 "VF token incorrectly provided, PF not bound to vfio-pci\n");
1702 vfio_device_put(pf_dev
);
1703 pci_info_ratelimited(vdev
->pdev
,
1704 "VF token required to access device\n");
1708 mutex_lock(&pf_vdev
->vf_token
->lock
);
1709 match
= uuid_equal(uuid
, &pf_vdev
->vf_token
->uuid
);
1710 mutex_unlock(&pf_vdev
->vf_token
->lock
);
1712 vfio_device_put(pf_dev
);
1715 pci_info_ratelimited(vdev
->pdev
,
1716 "Incorrect VF token provided for device\n");
1719 } else if (vdev
->vf_token
) {
1720 mutex_lock(&vdev
->vf_token
->lock
);
1721 if (vdev
->vf_token
->users
) {
1723 mutex_unlock(&vdev
->vf_token
->lock
);
1724 pci_info_ratelimited(vdev
->pdev
,
1725 "VF token required to access device\n");
1729 if (!uuid_equal(uuid
, &vdev
->vf_token
->uuid
)) {
1730 mutex_unlock(&vdev
->vf_token
->lock
);
1731 pci_info_ratelimited(vdev
->pdev
,
1732 "Incorrect VF token provided for device\n");
1735 } else if (vf_token
) {
1736 uuid_copy(&vdev
->vf_token
->uuid
, uuid
);
1739 mutex_unlock(&vdev
->vf_token
->lock
);
1740 } else if (vf_token
) {
1741 pci_info_ratelimited(vdev
->pdev
,
1742 "VF token incorrectly provided, not a PF or VF\n");
1749 #define VF_TOKEN_ARG "vf_token="
1751 static int vfio_pci_match(void *device_data
, char *buf
)
1753 struct vfio_pci_device
*vdev
= device_data
;
1754 bool vf_token
= false;
1758 if (strncmp(pci_name(vdev
->pdev
), buf
, strlen(pci_name(vdev
->pdev
))))
1759 return 0; /* No match */
1761 if (strlen(buf
) > strlen(pci_name(vdev
->pdev
))) {
1762 buf
+= strlen(pci_name(vdev
->pdev
));
1765 return 0; /* No match: non-whitespace after name */
1773 if (!vf_token
&& !strncmp(buf
, VF_TOKEN_ARG
,
1774 strlen(VF_TOKEN_ARG
))) {
1775 buf
+= strlen(VF_TOKEN_ARG
);
1777 if (strlen(buf
) < UUID_STRING_LEN
)
1780 ret
= uuid_parse(buf
, &uuid
);
1785 buf
+= UUID_STRING_LEN
;
1787 /* Unknown/duplicate option */
1793 ret
= vfio_pci_validate_vf_token(vdev
, vf_token
, &uuid
);
1797 return 1; /* Match */
1800 static const struct vfio_device_ops vfio_pci_ops
= {
1802 .open
= vfio_pci_open
,
1803 .release
= vfio_pci_release
,
1804 .ioctl
= vfio_pci_ioctl
,
1805 .read
= vfio_pci_read
,
1806 .write
= vfio_pci_write
,
1807 .mmap
= vfio_pci_mmap
,
1808 .request
= vfio_pci_request
,
1809 .match
= vfio_pci_match
,
1812 static int vfio_pci_reflck_attach(struct vfio_pci_device
*vdev
);
1813 static void vfio_pci_reflck_put(struct vfio_pci_reflck
*reflck
);
1814 static struct pci_driver vfio_pci_driver
;
1816 static int vfio_pci_bus_notifier(struct notifier_block
*nb
,
1817 unsigned long action
, void *data
)
1819 struct vfio_pci_device
*vdev
= container_of(nb
,
1820 struct vfio_pci_device
, nb
);
1821 struct device
*dev
= data
;
1822 struct pci_dev
*pdev
= to_pci_dev(dev
);
1823 struct pci_dev
*physfn
= pci_physfn(pdev
);
1825 if (action
== BUS_NOTIFY_ADD_DEVICE
&&
1826 pdev
->is_virtfn
&& physfn
== vdev
->pdev
) {
1827 pci_info(vdev
->pdev
, "Captured SR-IOV VF %s driver_override\n",
1829 pdev
->driver_override
= kasprintf(GFP_KERNEL
, "%s",
1831 } else if (action
== BUS_NOTIFY_BOUND_DRIVER
&&
1832 pdev
->is_virtfn
&& physfn
== vdev
->pdev
) {
1833 struct pci_driver
*drv
= pci_dev_driver(pdev
);
1835 if (drv
&& drv
!= &vfio_pci_driver
)
1836 pci_warn(vdev
->pdev
,
1837 "VF %s bound to driver %s while PF bound to vfio-pci\n",
1838 pci_name(pdev
), drv
->name
);
1844 static int vfio_pci_probe(struct pci_dev
*pdev
, const struct pci_device_id
*id
)
1846 struct vfio_pci_device
*vdev
;
1847 struct iommu_group
*group
;
1850 if (pdev
->hdr_type
!= PCI_HEADER_TYPE_NORMAL
)
1854 * Prevent binding to PFs with VFs enabled, the VFs might be in use
1855 * by the host or other users. We cannot capture the VFs if they
1856 * already exist, nor can we track VF users. Disabling SR-IOV here
1857 * would initiate removing the VFs, which would unbind the driver,
1858 * which is prone to blocking if that VF is also in use by vfio-pci.
1859 * Just reject these PFs and let the user sort it out.
1861 if (pci_num_vf(pdev
)) {
1862 pci_warn(pdev
, "Cannot bind to PF with SR-IOV enabled\n");
1866 group
= vfio_iommu_group_get(&pdev
->dev
);
1870 vdev
= kzalloc(sizeof(*vdev
), GFP_KERNEL
);
1877 vdev
->irq_type
= VFIO_PCI_NUM_IRQS
;
1878 mutex_init(&vdev
->igate
);
1879 spin_lock_init(&vdev
->irqlock
);
1880 mutex_init(&vdev
->ioeventfds_lock
);
1881 INIT_LIST_HEAD(&vdev
->ioeventfds_list
);
1882 mutex_init(&vdev
->vma_lock
);
1883 INIT_LIST_HEAD(&vdev
->vma_list
);
1884 init_rwsem(&vdev
->memory_lock
);
1886 ret
= vfio_add_group_dev(&pdev
->dev
, &vfio_pci_ops
, vdev
);
1890 ret
= vfio_pci_reflck_attach(vdev
);
1892 goto out_del_group_dev
;
1894 if (pdev
->is_physfn
) {
1895 vdev
->vf_token
= kzalloc(sizeof(*vdev
->vf_token
), GFP_KERNEL
);
1896 if (!vdev
->vf_token
) {
1901 mutex_init(&vdev
->vf_token
->lock
);
1902 uuid_gen(&vdev
->vf_token
->uuid
);
1904 vdev
->nb
.notifier_call
= vfio_pci_bus_notifier
;
1905 ret
= bus_register_notifier(&pci_bus_type
, &vdev
->nb
);
1910 if (vfio_pci_is_vga(pdev
)) {
1911 vga_client_register(pdev
, vdev
, NULL
, vfio_pci_set_vga_decode
);
1912 vga_set_legacy_decoding(pdev
,
1913 vfio_pci_set_vga_decode(vdev
, false));
1916 vfio_pci_probe_power_state(vdev
);
1918 if (!disable_idle_d3
) {
1920 * pci-core sets the device power state to an unknown value at
1921 * bootup and after being removed from a driver. The only
1922 * transition it allows from this unknown state is to D0, which
1923 * typically happens when a driver calls pci_enable_device().
1924 * We're not ready to enable the device yet, but we do want to
1925 * be able to get to D3. Therefore first do a D0 transition
1926 * before going to D3.
1928 vfio_pci_set_power_state(vdev
, PCI_D0
);
1929 vfio_pci_set_power_state(vdev
, PCI_D3hot
);
1935 kfree(vdev
->vf_token
);
1937 vfio_pci_reflck_put(vdev
->reflck
);
1939 vfio_del_group_dev(&pdev
->dev
);
1943 vfio_iommu_group_put(group
, &pdev
->dev
);
1947 static void vfio_pci_remove(struct pci_dev
*pdev
)
1949 struct vfio_pci_device
*vdev
;
1951 pci_disable_sriov(pdev
);
1953 vdev
= vfio_del_group_dev(&pdev
->dev
);
1957 if (vdev
->vf_token
) {
1958 WARN_ON(vdev
->vf_token
->users
);
1959 mutex_destroy(&vdev
->vf_token
->lock
);
1960 kfree(vdev
->vf_token
);
1963 if (vdev
->nb
.notifier_call
)
1964 bus_unregister_notifier(&pci_bus_type
, &vdev
->nb
);
1966 vfio_pci_reflck_put(vdev
->reflck
);
1968 vfio_iommu_group_put(pdev
->dev
.iommu_group
, &pdev
->dev
);
1969 kfree(vdev
->region
);
1970 mutex_destroy(&vdev
->ioeventfds_lock
);
1972 if (!disable_idle_d3
)
1973 vfio_pci_set_power_state(vdev
, PCI_D0
);
1975 kfree(vdev
->pm_save
);
1978 if (vfio_pci_is_vga(pdev
)) {
1979 vga_client_register(pdev
, NULL
, NULL
, NULL
);
1980 vga_set_legacy_decoding(pdev
,
1981 VGA_RSRC_NORMAL_IO
| VGA_RSRC_NORMAL_MEM
|
1982 VGA_RSRC_LEGACY_IO
| VGA_RSRC_LEGACY_MEM
);
1986 static pci_ers_result_t
vfio_pci_aer_err_detected(struct pci_dev
*pdev
,
1987 pci_channel_state_t state
)
1989 struct vfio_pci_device
*vdev
;
1990 struct vfio_device
*device
;
1992 device
= vfio_device_get_from_dev(&pdev
->dev
);
1994 return PCI_ERS_RESULT_DISCONNECT
;
1996 vdev
= vfio_device_data(device
);
1998 vfio_device_put(device
);
1999 return PCI_ERS_RESULT_DISCONNECT
;
2002 mutex_lock(&vdev
->igate
);
2004 if (vdev
->err_trigger
)
2005 eventfd_signal(vdev
->err_trigger
, 1);
2007 mutex_unlock(&vdev
->igate
);
2009 vfio_device_put(device
);
2011 return PCI_ERS_RESULT_CAN_RECOVER
;
2014 static int vfio_pci_sriov_configure(struct pci_dev
*pdev
, int nr_virtfn
)
2016 struct vfio_pci_device
*vdev
;
2017 struct vfio_device
*device
;
2025 device
= vfio_device_get_from_dev(&pdev
->dev
);
2029 vdev
= vfio_device_data(device
);
2031 vfio_device_put(device
);
2036 pci_disable_sriov(pdev
);
2038 ret
= pci_enable_sriov(pdev
, nr_virtfn
);
2040 vfio_device_put(device
);
2042 return ret
< 0 ? ret
: nr_virtfn
;
2045 static const struct pci_error_handlers vfio_err_handlers
= {
2046 .error_detected
= vfio_pci_aer_err_detected
,
2049 static struct pci_driver vfio_pci_driver
= {
2051 .id_table
= NULL
, /* only dynamic ids */
2052 .probe
= vfio_pci_probe
,
2053 .remove
= vfio_pci_remove
,
2054 .sriov_configure
= vfio_pci_sriov_configure
,
2055 .err_handler
= &vfio_err_handlers
,
2058 static DEFINE_MUTEX(reflck_lock
);
2060 static struct vfio_pci_reflck
*vfio_pci_reflck_alloc(void)
2062 struct vfio_pci_reflck
*reflck
;
2064 reflck
= kzalloc(sizeof(*reflck
), GFP_KERNEL
);
2066 return ERR_PTR(-ENOMEM
);
2068 kref_init(&reflck
->kref
);
2069 mutex_init(&reflck
->lock
);
2074 static void vfio_pci_reflck_get(struct vfio_pci_reflck
*reflck
)
2076 kref_get(&reflck
->kref
);
2079 static int vfio_pci_reflck_find(struct pci_dev
*pdev
, void *data
)
2081 struct vfio_pci_reflck
**preflck
= data
;
2082 struct vfio_device
*device
;
2083 struct vfio_pci_device
*vdev
;
2085 device
= vfio_device_get_from_dev(&pdev
->dev
);
2089 if (pci_dev_driver(pdev
) != &vfio_pci_driver
) {
2090 vfio_device_put(device
);
2094 vdev
= vfio_device_data(device
);
2097 vfio_pci_reflck_get(vdev
->reflck
);
2098 *preflck
= vdev
->reflck
;
2099 vfio_device_put(device
);
2103 vfio_device_put(device
);
2107 static int vfio_pci_reflck_attach(struct vfio_pci_device
*vdev
)
2109 bool slot
= !pci_probe_reset_slot(vdev
->pdev
->slot
);
2111 mutex_lock(&reflck_lock
);
2113 if (pci_is_root_bus(vdev
->pdev
->bus
) ||
2114 vfio_pci_for_each_slot_or_bus(vdev
->pdev
, vfio_pci_reflck_find
,
2115 &vdev
->reflck
, slot
) <= 0)
2116 vdev
->reflck
= vfio_pci_reflck_alloc();
2118 mutex_unlock(&reflck_lock
);
2120 return PTR_ERR_OR_ZERO(vdev
->reflck
);
2123 static void vfio_pci_reflck_release(struct kref
*kref
)
2125 struct vfio_pci_reflck
*reflck
= container_of(kref
,
2126 struct vfio_pci_reflck
,
2130 mutex_unlock(&reflck_lock
);
2133 static void vfio_pci_reflck_put(struct vfio_pci_reflck
*reflck
)
2135 kref_put_mutex(&reflck
->kref
, vfio_pci_reflck_release
, &reflck_lock
);
2138 static int vfio_pci_get_unused_devs(struct pci_dev
*pdev
, void *data
)
2140 struct vfio_devices
*devs
= data
;
2141 struct vfio_device
*device
;
2142 struct vfio_pci_device
*vdev
;
2144 if (devs
->cur_index
== devs
->max_index
)
2147 device
= vfio_device_get_from_dev(&pdev
->dev
);
2151 if (pci_dev_driver(pdev
) != &vfio_pci_driver
) {
2152 vfio_device_put(device
);
2156 vdev
= vfio_device_data(device
);
2158 /* Fault if the device is not unused */
2160 vfio_device_put(device
);
2164 devs
->devices
[devs
->cur_index
++] = device
;
2168 static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev
*pdev
, void *data
)
2170 struct vfio_devices
*devs
= data
;
2171 struct vfio_device
*device
;
2172 struct vfio_pci_device
*vdev
;
2174 if (devs
->cur_index
== devs
->max_index
)
2177 device
= vfio_device_get_from_dev(&pdev
->dev
);
2181 if (pci_dev_driver(pdev
) != &vfio_pci_driver
) {
2182 vfio_device_put(device
);
2186 vdev
= vfio_device_data(device
);
2189 * Locking multiple devices is prone to deadlock, runaway and
2190 * unwind if we hit contention.
2192 if (!vfio_pci_zap_and_vma_lock(vdev
, true)) {
2193 vfio_device_put(device
);
2197 devs
->devices
[devs
->cur_index
++] = device
;
2202 * If a bus or slot reset is available for the provided device and:
2203 * - All of the devices affected by that bus or slot reset are unused
2205 * - At least one of the affected devices is marked dirty via
2206 * needs_reset (such as by lack of FLR support)
2207 * Then attempt to perform that bus or slot reset. Callers are required
2208 * to hold vdev->reflck->lock, protecting the bus/slot reset group from
2209 * concurrent opens. A vfio_device reference is acquired for each device
2210 * to prevent unbinds during the reset operation.
2212 * NB: vfio-core considers a group to be viable even if some devices are
2213 * bound to drivers like pci-stub or pcieport. Here we require all devices
2214 * to be bound to vfio_pci since that's the only way we can be sure they
2217 static void vfio_pci_try_bus_reset(struct vfio_pci_device
*vdev
)
2219 struct vfio_devices devs
= { .cur_index
= 0 };
2220 int i
= 0, ret
= -EINVAL
;
2222 struct vfio_pci_device
*tmp
;
2224 if (!pci_probe_reset_slot(vdev
->pdev
->slot
))
2226 else if (pci_probe_reset_bus(vdev
->pdev
->bus
))
2229 if (vfio_pci_for_each_slot_or_bus(vdev
->pdev
, vfio_pci_count_devs
,
2234 devs
.devices
= kcalloc(i
, sizeof(struct vfio_device
*), GFP_KERNEL
);
2238 if (vfio_pci_for_each_slot_or_bus(vdev
->pdev
,
2239 vfio_pci_get_unused_devs
,
2243 /* Does at least one need a reset? */
2244 for (i
= 0; i
< devs
.cur_index
; i
++) {
2245 tmp
= vfio_device_data(devs
.devices
[i
]);
2246 if (tmp
->needs_reset
) {
2247 ret
= pci_reset_bus(vdev
->pdev
);
2253 for (i
= 0; i
< devs
.cur_index
; i
++) {
2254 tmp
= vfio_device_data(devs
.devices
[i
]);
2257 * If reset was successful, affected devices no longer need
2258 * a reset and we should return all the collateral devices
2259 * to low power. If not successful, we either didn't reset
2260 * the bus or timed out waiting for it, so let's not touch
2264 tmp
->needs_reset
= false;
2266 if (tmp
!= vdev
&& !disable_idle_d3
)
2267 vfio_pci_set_power_state(tmp
, PCI_D3hot
);
2270 vfio_device_put(devs
.devices
[i
]);
2273 kfree(devs
.devices
);
2276 static void __exit
vfio_pci_cleanup(void)
2278 pci_unregister_driver(&vfio_pci_driver
);
2279 vfio_pci_uninit_perm_bits();
2282 static void __init
vfio_pci_fill_ids(void)
2287 /* no ids passed actually */
2291 /* add ids specified in the module parameter */
2293 while ((id
= strsep(&p
, ","))) {
2294 unsigned int vendor
, device
, subvendor
= PCI_ANY_ID
,
2295 subdevice
= PCI_ANY_ID
, class = 0, class_mask
= 0;
2301 fields
= sscanf(id
, "%x:%x:%x:%x:%x:%x",
2302 &vendor
, &device
, &subvendor
, &subdevice
,
2303 &class, &class_mask
);
2306 pr_warn("invalid id string \"%s\"\n", id
);
2310 rc
= pci_add_dynid(&vfio_pci_driver
, vendor
, device
,
2311 subvendor
, subdevice
, class, class_mask
, 0);
2313 pr_warn("failed to add dynamic id [%04x:%04x[%04x:%04x]] class %#08x/%08x (%d)\n",
2314 vendor
, device
, subvendor
, subdevice
,
2315 class, class_mask
, rc
);
2317 pr_info("add [%04x:%04x[%04x:%04x]] class %#08x/%08x\n",
2318 vendor
, device
, subvendor
, subdevice
,
2323 static int __init
vfio_pci_init(void)
2327 /* Allocate shared config space permision data used by all devices */
2328 ret
= vfio_pci_init_perm_bits();
2332 /* Register and scan for devices */
2333 ret
= pci_register_driver(&vfio_pci_driver
);
2337 vfio_pci_fill_ids();
2342 vfio_pci_uninit_perm_bits();
2346 module_init(vfio_pci_init
);
2347 module_exit(vfio_pci_cleanup
);
2349 MODULE_VERSION(DRIVER_VERSION
);
2350 MODULE_LICENSE("GPL v2");
2351 MODULE_AUTHOR(DRIVER_AUTHOR
);
2352 MODULE_DESCRIPTION(DRIVER_DESC
);