1 // SPDX-License-Identifier: GPL-2.0
3 * Volume Management Device driver
4 * Copyright (c) 2015, Intel Corporation.
7 #include <linux/device.h>
8 #include <linux/interrupt.h>
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <linux/msi.h>
13 #include <linux/pci.h>
14 #include <linux/pci-acpi.h>
15 #include <linux/pci-ecam.h>
16 #include <linux/srcu.h>
17 #include <linux/rculist.h>
18 #include <linux/rcupdate.h>
20 #include <asm/irqdomain.h>
26 #define PCI_REG_VMCAP 0x40
27 #define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1)
28 #define PCI_REG_VMCONFIG 0x44
29 #define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3)
30 #define VMCONFIG_MSI_REMAP 0x2
31 #define PCI_REG_VMLOCK 0x70
32 #define MB2_SHADOW_EN(vmlock) (vmlock & 0x2)
34 #define MB2_SHADOW_OFFSET 0x2000
35 #define MB2_SHADOW_SIZE 16
39 * Device may contain registers which hint the physical location of the
40 * membars, in order to allow proper address translation during
41 * resource assignment to enable guest virtualization
43 VMD_FEAT_HAS_MEMBAR_SHADOW
= (1 << 0),
46 * Device may provide root port configuration information which limits
49 VMD_FEAT_HAS_BUS_RESTRICTIONS
= (1 << 1),
52 * Device contains physical location shadow registers in
53 * vendor-specific capability space
55 VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP
= (1 << 2),
58 * Device may use MSI-X vector 0 for software triggering and will not
59 * be used for MSI remapping
61 VMD_FEAT_OFFSET_FIRST_VECTOR
= (1 << 3),
64 * Device can bypass remapping MSI-X transactions into its MSI-X table,
65 * avoiding the requirement of a VMD MSI domain for child device
68 VMD_FEAT_CAN_BYPASS_MSI_REMAP
= (1 << 4),
71 * Enable ASPM on the PCIE root ports and set the default LTR of the
72 * storage devices on platforms where these values are not configured by
73 * BIOS. This is needed for laptops, which require these settings for
74 * proper power management of the SoC.
76 VMD_FEAT_BIOS_PM_QUIRK
= (1 << 5),
79 #define VMD_BIOS_PM_QUIRK_LTR 0x1003 /* 3145728 ns */
81 #define VMD_FEATS_CLIENT (VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | \
82 VMD_FEAT_HAS_BUS_RESTRICTIONS | \
83 VMD_FEAT_OFFSET_FIRST_VECTOR | \
84 VMD_FEAT_BIOS_PM_QUIRK)
86 static DEFINE_IDA(vmd_instance_ida
);
89 * Lock for manipulating VMD IRQ lists.
91 static DEFINE_RAW_SPINLOCK(list_lock
);
94 * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
95 * @node: list item for parent traversal.
96 * @irq: back pointer to parent.
97 * @enabled: true if driver enabled IRQ
98 * @virq: the virtual IRQ value provided to the requesting driver.
100 * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
101 * a VMD IRQ using this structure.
104 struct list_head node
;
105 struct vmd_irq_list
*irq
;
111 * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
112 * @irq_list: the list of irq's the VMD one demuxes to.
113 * @srcu: SRCU struct for local synchronization.
114 * @count: number of child IRQs assigned to this vector; used to track
116 * @virq: The underlying VMD Linux interrupt number
118 struct vmd_irq_list
{
119 struct list_head irq_list
;
120 struct srcu_struct srcu
;
129 void __iomem
*cfgbar
;
132 struct vmd_irq_list
*irqs
;
134 struct pci_sysdata sysdata
;
135 struct resource resources
[3];
136 struct irq_domain
*irq_domain
;
144 static inline struct vmd_dev
*vmd_from_bus(struct pci_bus
*bus
)
146 return container_of(bus
->sysdata
, struct vmd_dev
, sysdata
);
149 static inline unsigned int index_from_irqs(struct vmd_dev
*vmd
,
150 struct vmd_irq_list
*irqs
)
152 return irqs
- vmd
->irqs
;
156 * Drivers managing a device in a VMD domain allocate their own IRQs as before,
157 * but the MSI entry for the hardware it's driving will be programmed with a
158 * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its
159 * domain into one of its own, and the VMD driver de-muxes these for the
160 * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations
161 * and irq_chip to set this up.
163 static void vmd_compose_msi_msg(struct irq_data
*data
, struct msi_msg
*msg
)
165 struct vmd_irq
*vmdirq
= data
->chip_data
;
166 struct vmd_irq_list
*irq
= vmdirq
->irq
;
167 struct vmd_dev
*vmd
= irq_data_get_irq_handler_data(data
);
169 memset(msg
, 0, sizeof(*msg
));
170 msg
->address_hi
= X86_MSI_BASE_ADDRESS_HIGH
;
171 msg
->arch_addr_lo
.base_address
= X86_MSI_BASE_ADDRESS_LOW
;
172 msg
->arch_addr_lo
.destid_0_7
= index_from_irqs(vmd
, irq
);
176 * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops.
178 static void vmd_irq_enable(struct irq_data
*data
)
180 struct vmd_irq
*vmdirq
= data
->chip_data
;
183 raw_spin_lock_irqsave(&list_lock
, flags
);
184 WARN_ON(vmdirq
->enabled
);
185 list_add_tail_rcu(&vmdirq
->node
, &vmdirq
->irq
->irq_list
);
186 vmdirq
->enabled
= true;
187 raw_spin_unlock_irqrestore(&list_lock
, flags
);
189 data
->chip
->irq_unmask(data
);
192 static void vmd_irq_disable(struct irq_data
*data
)
194 struct vmd_irq
*vmdirq
= data
->chip_data
;
197 data
->chip
->irq_mask(data
);
199 raw_spin_lock_irqsave(&list_lock
, flags
);
200 if (vmdirq
->enabled
) {
201 list_del_rcu(&vmdirq
->node
);
202 vmdirq
->enabled
= false;
204 raw_spin_unlock_irqrestore(&list_lock
, flags
);
208 * XXX: Stubbed until we develop acceptable way to not create conflicts with
209 * other devices sharing the same vector.
211 static int vmd_irq_set_affinity(struct irq_data
*data
,
212 const struct cpumask
*dest
, bool force
)
217 static struct irq_chip vmd_msi_controller
= {
219 .irq_enable
= vmd_irq_enable
,
220 .irq_disable
= vmd_irq_disable
,
221 .irq_compose_msi_msg
= vmd_compose_msi_msg
,
222 .irq_set_affinity
= vmd_irq_set_affinity
,
225 static irq_hw_number_t
vmd_get_hwirq(struct msi_domain_info
*info
,
226 msi_alloc_info_t
*arg
)
232 * XXX: We can be even smarter selecting the best IRQ once we solve the
235 static struct vmd_irq_list
*vmd_next_irq(struct vmd_dev
*vmd
, struct msi_desc
*desc
)
240 if (vmd
->msix_count
== 1 + vmd
->first_vec
)
241 return &vmd
->irqs
[vmd
->first_vec
];
244 * White list for fast-interrupt handlers. All others will share the
245 * "slow" interrupt vector.
247 switch (msi_desc_to_pci_dev(desc
)->class) {
248 case PCI_CLASS_STORAGE_EXPRESS
:
251 return &vmd
->irqs
[vmd
->first_vec
];
254 raw_spin_lock_irqsave(&list_lock
, flags
);
255 best
= vmd
->first_vec
+ 1;
256 for (i
= best
; i
< vmd
->msix_count
; i
++)
257 if (vmd
->irqs
[i
].count
< vmd
->irqs
[best
].count
)
259 vmd
->irqs
[best
].count
++;
260 raw_spin_unlock_irqrestore(&list_lock
, flags
);
262 return &vmd
->irqs
[best
];
265 static int vmd_msi_init(struct irq_domain
*domain
, struct msi_domain_info
*info
,
266 unsigned int virq
, irq_hw_number_t hwirq
,
267 msi_alloc_info_t
*arg
)
269 struct msi_desc
*desc
= arg
->desc
;
270 struct vmd_dev
*vmd
= vmd_from_bus(msi_desc_to_pci_dev(desc
)->bus
);
271 struct vmd_irq
*vmdirq
= kzalloc(sizeof(*vmdirq
), GFP_KERNEL
);
276 INIT_LIST_HEAD(&vmdirq
->node
);
277 vmdirq
->irq
= vmd_next_irq(vmd
, desc
);
280 irq_domain_set_info(domain
, virq
, vmdirq
->irq
->virq
, info
->chip
, vmdirq
,
281 handle_untracked_irq
, vmd
, NULL
);
285 static void vmd_msi_free(struct irq_domain
*domain
,
286 struct msi_domain_info
*info
, unsigned int virq
)
288 struct vmd_irq
*vmdirq
= irq_get_chip_data(virq
);
291 synchronize_srcu(&vmdirq
->irq
->srcu
);
293 /* XXX: Potential optimization to rebalance */
294 raw_spin_lock_irqsave(&list_lock
, flags
);
295 vmdirq
->irq
->count
--;
296 raw_spin_unlock_irqrestore(&list_lock
, flags
);
301 static int vmd_msi_prepare(struct irq_domain
*domain
, struct device
*dev
,
302 int nvec
, msi_alloc_info_t
*arg
)
304 struct pci_dev
*pdev
= to_pci_dev(dev
);
305 struct vmd_dev
*vmd
= vmd_from_bus(pdev
->bus
);
307 if (nvec
> vmd
->msix_count
)
308 return vmd
->msix_count
;
310 memset(arg
, 0, sizeof(*arg
));
314 static void vmd_set_desc(msi_alloc_info_t
*arg
, struct msi_desc
*desc
)
319 static struct msi_domain_ops vmd_msi_domain_ops
= {
320 .get_hwirq
= vmd_get_hwirq
,
321 .msi_init
= vmd_msi_init
,
322 .msi_free
= vmd_msi_free
,
323 .msi_prepare
= vmd_msi_prepare
,
324 .set_desc
= vmd_set_desc
,
327 static struct msi_domain_info vmd_msi_domain_info
= {
328 .flags
= MSI_FLAG_USE_DEF_DOM_OPS
| MSI_FLAG_USE_DEF_CHIP_OPS
|
330 .ops
= &vmd_msi_domain_ops
,
331 .chip
= &vmd_msi_controller
,
334 static void vmd_set_msi_remapping(struct vmd_dev
*vmd
, bool enable
)
338 pci_read_config_word(vmd
->dev
, PCI_REG_VMCONFIG
, ®
);
339 reg
= enable
? (reg
& ~VMCONFIG_MSI_REMAP
) :
340 (reg
| VMCONFIG_MSI_REMAP
);
341 pci_write_config_word(vmd
->dev
, PCI_REG_VMCONFIG
, reg
);
344 static int vmd_create_irq_domain(struct vmd_dev
*vmd
)
346 struct fwnode_handle
*fn
;
348 fn
= irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd
->sysdata
.domain
);
352 vmd
->irq_domain
= pci_msi_create_irq_domain(fn
, &vmd_msi_domain_info
, NULL
);
353 if (!vmd
->irq_domain
) {
354 irq_domain_free_fwnode(fn
);
361 static void vmd_remove_irq_domain(struct vmd_dev
*vmd
)
364 * Some production BIOS won't enable remapping between soft reboots.
365 * Ensure remapping is restored before unloading the driver.
367 if (!vmd
->msix_count
)
368 vmd_set_msi_remapping(vmd
, true);
370 if (vmd
->irq_domain
) {
371 struct fwnode_handle
*fn
= vmd
->irq_domain
->fwnode
;
373 irq_domain_remove(vmd
->irq_domain
);
374 irq_domain_free_fwnode(fn
);
378 static void __iomem
*vmd_cfg_addr(struct vmd_dev
*vmd
, struct pci_bus
*bus
,
379 unsigned int devfn
, int reg
, int len
)
381 unsigned int busnr_ecam
= bus
->number
- vmd
->busn_start
;
382 u32 offset
= PCIE_ECAM_OFFSET(busnr_ecam
, devfn
, reg
);
384 if (offset
+ len
>= resource_size(&vmd
->dev
->resource
[VMD_CFGBAR
]))
387 return vmd
->cfgbar
+ offset
;
391 * CPU may deadlock if config space is not serialized on some versions of this
392 * hardware, so all config space access is done under a spinlock.
394 static int vmd_pci_read(struct pci_bus
*bus
, unsigned int devfn
, int reg
,
397 struct vmd_dev
*vmd
= vmd_from_bus(bus
);
398 void __iomem
*addr
= vmd_cfg_addr(vmd
, bus
, devfn
, reg
, len
);
405 spin_lock_irqsave(&vmd
->cfg_lock
, flags
);
408 *value
= readb(addr
);
411 *value
= readw(addr
);
414 *value
= readl(addr
);
420 spin_unlock_irqrestore(&vmd
->cfg_lock
, flags
);
425 * VMD h/w converts non-posted config writes to posted memory writes. The
426 * read-back in this function forces the completion so it returns only after
427 * the config space was written, as expected.
429 static int vmd_pci_write(struct pci_bus
*bus
, unsigned int devfn
, int reg
,
432 struct vmd_dev
*vmd
= vmd_from_bus(bus
);
433 void __iomem
*addr
= vmd_cfg_addr(vmd
, bus
, devfn
, reg
, len
);
440 spin_lock_irqsave(&vmd
->cfg_lock
, flags
);
458 spin_unlock_irqrestore(&vmd
->cfg_lock
, flags
);
462 static struct pci_ops vmd_ops
= {
463 .read
= vmd_pci_read
,
464 .write
= vmd_pci_write
,
468 static struct acpi_device
*vmd_acpi_find_companion(struct pci_dev
*pci_dev
)
470 struct pci_host_bridge
*bridge
;
473 if (pci_dev
->bus
->ops
!= &vmd_ops
)
476 bridge
= pci_find_host_bridge(pci_dev
->bus
);
477 busnr
= pci_dev
->bus
->number
- bridge
->bus
->number
;
479 * The address computation below is only applicable to relative bus
485 addr
= (busnr
<< 24) | ((u32
)pci_dev
->devfn
<< 16) | 0x8000FFFFU
;
487 dev_dbg(&pci_dev
->dev
, "Looking for ACPI companion (address 0x%x)\n",
490 return acpi_find_child_device(ACPI_COMPANION(bridge
->dev
.parent
), addr
,
494 static bool hook_installed
;
496 static void vmd_acpi_begin(void)
498 if (pci_acpi_set_companion_lookup_hook(vmd_acpi_find_companion
))
501 hook_installed
= true;
504 static void vmd_acpi_end(void)
509 pci_acpi_clear_companion_lookup_hook();
510 hook_installed
= false;
513 static inline void vmd_acpi_begin(void) { }
514 static inline void vmd_acpi_end(void) { }
515 #endif /* CONFIG_ACPI */
517 static void vmd_domain_reset(struct vmd_dev
*vmd
)
519 u16 bus
, max_buses
= resource_size(&vmd
->resources
[0]);
520 u8 dev
, functions
, fn
, hdr_type
;
523 for (bus
= 0; bus
< max_buses
; bus
++) {
524 for (dev
= 0; dev
< 32; dev
++) {
525 base
= vmd
->cfgbar
+ PCIE_ECAM_OFFSET(bus
,
526 PCI_DEVFN(dev
, 0), 0);
528 hdr_type
= readb(base
+ PCI_HEADER_TYPE
);
530 functions
= (hdr_type
& PCI_HEADER_TYPE_MFD
) ? 8 : 1;
531 for (fn
= 0; fn
< functions
; fn
++) {
532 base
= vmd
->cfgbar
+ PCIE_ECAM_OFFSET(bus
,
533 PCI_DEVFN(dev
, fn
), 0);
535 hdr_type
= readb(base
+ PCI_HEADER_TYPE
) &
536 PCI_HEADER_TYPE_MASK
;
538 if (hdr_type
!= PCI_HEADER_TYPE_BRIDGE
||
539 (readw(base
+ PCI_CLASS_DEVICE
) !=
540 PCI_CLASS_BRIDGE_PCI
))
544 * Temporarily disable the I/O range before updating
547 writel(0x0000ffff, base
+ PCI_IO_BASE_UPPER16
);
548 /* Update lower 16 bits of I/O base/limit */
549 writew(0x00f0, base
+ PCI_IO_BASE
);
550 /* Update upper 16 bits of I/O base/limit */
551 writel(0, base
+ PCI_IO_BASE_UPPER16
);
553 /* MMIO Base/Limit */
554 writel(0x0000fff0, base
+ PCI_MEMORY_BASE
);
556 /* Prefetchable MMIO Base/Limit */
557 writel(0, base
+ PCI_PREF_LIMIT_UPPER32
);
558 writel(0x0000fff0, base
+ PCI_PREF_MEMORY_BASE
);
559 writel(0xffffffff, base
+ PCI_PREF_BASE_UPPER32
);
565 static void vmd_attach_resources(struct vmd_dev
*vmd
)
567 vmd
->dev
->resource
[VMD_MEMBAR1
].child
= &vmd
->resources
[1];
568 vmd
->dev
->resource
[VMD_MEMBAR2
].child
= &vmd
->resources
[2];
571 static void vmd_detach_resources(struct vmd_dev
*vmd
)
573 vmd
->dev
->resource
[VMD_MEMBAR1
].child
= NULL
;
574 vmd
->dev
->resource
[VMD_MEMBAR2
].child
= NULL
;
578 * VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
579 * Per ACPI r6.0, sec 6.5.6, _SEG returns an integer, of which the lower
580 * 16 bits are the PCI Segment Group (domain) number. Other bits are
581 * currently reserved.
583 static int vmd_find_free_domain(void)
586 struct pci_bus
*bus
= NULL
;
588 while ((bus
= pci_find_next_bus(bus
)) != NULL
)
589 domain
= max_t(int, domain
, pci_domain_nr(bus
));
593 static int vmd_get_phys_offsets(struct vmd_dev
*vmd
, bool native_hint
,
594 resource_size_t
*offset1
,
595 resource_size_t
*offset2
)
597 struct pci_dev
*dev
= vmd
->dev
;
604 ret
= pci_read_config_dword(dev
, PCI_REG_VMLOCK
, &vmlock
);
605 if (ret
|| PCI_POSSIBLE_ERROR(vmlock
))
608 if (MB2_SHADOW_EN(vmlock
)) {
609 void __iomem
*membar2
;
611 membar2
= pci_iomap(dev
, VMD_MEMBAR2
, 0);
614 phys1
= readq(membar2
+ MB2_SHADOW_OFFSET
);
615 phys2
= readq(membar2
+ MB2_SHADOW_OFFSET
+ 8);
616 pci_iounmap(dev
, membar2
);
620 /* Hypervisor-Emulated Vendor-Specific Capability */
621 int pos
= pci_find_capability(dev
, PCI_CAP_ID_VNDR
);
624 pci_read_config_dword(dev
, pos
+ 4, ®
);
627 if (pos
&& reg
== 0x53484457) {
628 pci_read_config_dword(dev
, pos
+ 8, ®
);
629 pci_read_config_dword(dev
, pos
+ 12, ®u
);
630 phys1
= (u64
) regu
<< 32 | reg
;
632 pci_read_config_dword(dev
, pos
+ 16, ®
);
633 pci_read_config_dword(dev
, pos
+ 20, ®u
);
634 phys2
= (u64
) regu
<< 32 | reg
;
639 *offset1
= dev
->resource
[VMD_MEMBAR1
].start
-
640 (phys1
& PCI_BASE_ADDRESS_MEM_MASK
);
641 *offset2
= dev
->resource
[VMD_MEMBAR2
].start
-
642 (phys2
& PCI_BASE_ADDRESS_MEM_MASK
);
647 static int vmd_get_bus_number_start(struct vmd_dev
*vmd
)
649 struct pci_dev
*dev
= vmd
->dev
;
652 pci_read_config_word(dev
, PCI_REG_VMCAP
, ®
);
653 if (BUS_RESTRICT_CAP(reg
)) {
654 pci_read_config_word(dev
, PCI_REG_VMCONFIG
, ®
);
656 switch (BUS_RESTRICT_CFG(reg
)) {
661 vmd
->busn_start
= 128;
664 vmd
->busn_start
= 224;
667 pci_err(dev
, "Unknown Bus Offset Setting (%d)\n",
668 BUS_RESTRICT_CFG(reg
));
676 static irqreturn_t
vmd_irq(int irq
, void *data
)
678 struct vmd_irq_list
*irqs
= data
;
679 struct vmd_irq
*vmdirq
;
682 idx
= srcu_read_lock(&irqs
->srcu
);
683 list_for_each_entry_rcu(vmdirq
, &irqs
->irq_list
, node
)
684 generic_handle_irq(vmdirq
->virq
);
685 srcu_read_unlock(&irqs
->srcu
, idx
);
690 static int vmd_alloc_irqs(struct vmd_dev
*vmd
)
692 struct pci_dev
*dev
= vmd
->dev
;
695 vmd
->msix_count
= pci_msix_vec_count(dev
);
696 if (vmd
->msix_count
< 0)
699 vmd
->msix_count
= pci_alloc_irq_vectors(dev
, vmd
->first_vec
+ 1,
700 vmd
->msix_count
, PCI_IRQ_MSIX
);
701 if (vmd
->msix_count
< 0)
702 return vmd
->msix_count
;
704 vmd
->irqs
= devm_kcalloc(&dev
->dev
, vmd
->msix_count
, sizeof(*vmd
->irqs
),
709 for (i
= 0; i
< vmd
->msix_count
; i
++) {
710 err
= init_srcu_struct(&vmd
->irqs
[i
].srcu
);
714 INIT_LIST_HEAD(&vmd
->irqs
[i
].irq_list
);
715 vmd
->irqs
[i
].virq
= pci_irq_vector(dev
, i
);
716 err
= devm_request_irq(&dev
->dev
, vmd
->irqs
[i
].virq
,
717 vmd_irq
, IRQF_NO_THREAD
,
718 vmd
->name
, &vmd
->irqs
[i
]);
727 * Since VMD is an aperture to regular PCIe root ports, only allow it to
728 * control features that the OS is allowed to control on the physical PCI bus.
730 static void vmd_copy_host_bridge_flags(struct pci_host_bridge
*root_bridge
,
731 struct pci_host_bridge
*vmd_bridge
)
733 vmd_bridge
->native_pcie_hotplug
= root_bridge
->native_pcie_hotplug
;
734 vmd_bridge
->native_shpc_hotplug
= root_bridge
->native_shpc_hotplug
;
735 vmd_bridge
->native_aer
= root_bridge
->native_aer
;
736 vmd_bridge
->native_pme
= root_bridge
->native_pme
;
737 vmd_bridge
->native_ltr
= root_bridge
->native_ltr
;
738 vmd_bridge
->native_dpc
= root_bridge
->native_dpc
;
742 * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
744 static int vmd_pm_enable_quirk(struct pci_dev
*pdev
, void *userdata
)
746 unsigned long features
= *(unsigned long *)userdata
;
747 u16 ltr
= VMD_BIOS_PM_QUIRK_LTR
;
751 if (!(features
& VMD_FEAT_BIOS_PM_QUIRK
))
754 pci_enable_link_state_locked(pdev
, PCIE_LINK_STATE_ALL
);
756 pos
= pci_find_ext_capability(pdev
, PCI_EXT_CAP_ID_LTR
);
761 * Skip if the max snoop LTR is non-zero, indicating BIOS has set it
762 * so the LTR quirk is not needed.
764 pci_read_config_dword(pdev
, pos
+ PCI_LTR_MAX_SNOOP_LAT
, <r_reg
);
765 if (!!(ltr_reg
& (PCI_LTR_VALUE_MASK
| PCI_LTR_SCALE_MASK
)))
769 * Set the default values to the maximum required by the platform to
770 * allow the deepest power management savings. Write as a DWORD where
771 * the lower word is the max snoop latency and the upper word is the
772 * max non-snoop latency.
774 ltr_reg
= (ltr
<< 16) | ltr
;
775 pci_write_config_dword(pdev
, pos
+ PCI_LTR_MAX_SNOOP_LAT
, ltr_reg
);
776 pci_info(pdev
, "VMD: Default LTR value set by driver\n");
781 static int vmd_enable_domain(struct vmd_dev
*vmd
, unsigned long features
)
783 struct pci_sysdata
*sd
= &vmd
->sysdata
;
784 struct resource
*res
;
787 LIST_HEAD(resources
);
788 resource_size_t offset
[2] = {0};
789 resource_size_t membar2_offset
= 0x2000;
790 struct pci_bus
*child
;
795 * Shadow registers may exist in certain VMD device ids which allow
796 * guests to correctly assign host physical addresses to the root ports
797 * and child devices. These registers will either return the host value
798 * or 0, depending on an enable bit in the VMD device.
800 if (features
& VMD_FEAT_HAS_MEMBAR_SHADOW
) {
801 membar2_offset
= MB2_SHADOW_OFFSET
+ MB2_SHADOW_SIZE
;
802 ret
= vmd_get_phys_offsets(vmd
, true, &offset
[0], &offset
[1]);
805 } else if (features
& VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP
) {
806 ret
= vmd_get_phys_offsets(vmd
, false, &offset
[0], &offset
[1]);
812 * Certain VMD devices may have a root port configuration option which
813 * limits the bus range to between 0-127, 128-255, or 224-255
815 if (features
& VMD_FEAT_HAS_BUS_RESTRICTIONS
) {
816 ret
= vmd_get_bus_number_start(vmd
);
821 res
= &vmd
->dev
->resource
[VMD_CFGBAR
];
822 vmd
->resources
[0] = (struct resource
) {
823 .name
= "VMD CFGBAR",
824 .start
= vmd
->busn_start
,
825 .end
= vmd
->busn_start
+ (resource_size(res
) >> 20) - 1,
826 .flags
= IORESOURCE_BUS
| IORESOURCE_PCI_FIXED
,
830 * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
831 * put 32-bit resources in the window.
833 * There's no hardware reason why a 64-bit window *couldn't*
834 * contain a 32-bit resource, but pbus_size_mem() computes the
835 * bridge window size assuming a 64-bit window will contain no
836 * 32-bit resources. __pci_assign_resource() enforces that
837 * artificial restriction to make sure everything will fit.
839 * The only way we could use a 64-bit non-prefetchable MEMBAR is
840 * if its address is <4GB so that we can convert it to a 32-bit
841 * resource. To be visible to the host OS, all VMD endpoints must
842 * be initially configured by platform BIOS, which includes setting
843 * up these resources. We can assume the device is configured
844 * according to the platform needs.
846 res
= &vmd
->dev
->resource
[VMD_MEMBAR1
];
847 upper_bits
= upper_32_bits(res
->end
);
848 flags
= res
->flags
& ~IORESOURCE_SIZEALIGN
;
850 flags
&= ~IORESOURCE_MEM_64
;
851 vmd
->resources
[1] = (struct resource
) {
852 .name
= "VMD MEMBAR1",
859 res
= &vmd
->dev
->resource
[VMD_MEMBAR2
];
860 upper_bits
= upper_32_bits(res
->end
);
861 flags
= res
->flags
& ~IORESOURCE_SIZEALIGN
;
863 flags
&= ~IORESOURCE_MEM_64
;
864 vmd
->resources
[2] = (struct resource
) {
865 .name
= "VMD MEMBAR2",
866 .start
= res
->start
+ membar2_offset
,
872 sd
->vmd_dev
= vmd
->dev
;
873 sd
->domain
= vmd_find_free_domain();
877 sd
->node
= pcibus_to_node(vmd
->dev
->bus
);
880 * Currently MSI remapping must be enabled in guest passthrough mode
881 * due to some missing interrupt remapping plumbing. This is probably
882 * acceptable because the guest is usually CPU-limited and MSI
883 * remapping doesn't become a performance bottleneck.
885 if (!(features
& VMD_FEAT_CAN_BYPASS_MSI_REMAP
) ||
886 offset
[0] || offset
[1]) {
887 ret
= vmd_alloc_irqs(vmd
);
891 vmd_set_msi_remapping(vmd
, true);
893 ret
= vmd_create_irq_domain(vmd
);
898 * Override the IRQ domain bus token so the domain can be
899 * distinguished from a regular PCI/MSI domain.
901 irq_domain_update_bus_token(vmd
->irq_domain
, DOMAIN_BUS_VMD_MSI
);
903 vmd_set_msi_remapping(vmd
, false);
906 pci_add_resource(&resources
, &vmd
->resources
[0]);
907 pci_add_resource_offset(&resources
, &vmd
->resources
[1], offset
[0]);
908 pci_add_resource_offset(&resources
, &vmd
->resources
[2], offset
[1]);
910 vmd
->bus
= pci_create_root_bus(&vmd
->dev
->dev
, vmd
->busn_start
,
911 &vmd_ops
, sd
, &resources
);
913 pci_free_resource_list(&resources
);
914 vmd_remove_irq_domain(vmd
);
918 vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd
->dev
->bus
),
919 to_pci_host_bridge(vmd
->bus
->bridge
));
921 vmd_attach_resources(vmd
);
923 dev_set_msi_domain(&vmd
->bus
->dev
, vmd
->irq_domain
);
925 dev_set_msi_domain(&vmd
->bus
->dev
,
926 dev_get_msi_domain(&vmd
->dev
->dev
));
930 pci_scan_child_bus(vmd
->bus
);
931 vmd_domain_reset(vmd
);
933 /* When Intel VMD is enabled, the OS does not discover the Root Ports
934 * owned by Intel VMD within the MMCFG space. pci_reset_bus() applies
935 * a reset to the parent of the PCI device supplied as argument. This
936 * is why we pass a child device, so the reset can be triggered at
937 * the Intel bridge level and propagated to all the children in the
940 list_for_each_entry(child
, &vmd
->bus
->children
, node
) {
941 if (!list_empty(&child
->devices
)) {
942 dev
= list_first_entry(&child
->devices
,
943 struct pci_dev
, bus_list
);
944 ret
= pci_reset_bus(dev
);
946 pci_warn(dev
, "can't reset device: %d\n", ret
);
952 pci_assign_unassigned_bus_resources(vmd
->bus
);
954 pci_walk_bus(vmd
->bus
, vmd_pm_enable_quirk
, &features
);
957 * VMD root buses are virtual and don't return true on pci_is_pcie()
958 * and will fail pcie_bus_configure_settings() early. It can instead be
959 * run on each of the real root ports.
961 list_for_each_entry(child
, &vmd
->bus
->children
, node
)
962 pcie_bus_configure_settings(child
);
964 pci_bus_add_devices(vmd
->bus
);
968 WARN(sysfs_create_link(&vmd
->dev
->dev
.kobj
, &vmd
->bus
->dev
.kobj
,
969 "domain"), "Can't create symlink to domain\n");
973 static int vmd_probe(struct pci_dev
*dev
, const struct pci_device_id
*id
)
975 unsigned long features
= (unsigned long) id
->driver_data
;
979 if (resource_size(&dev
->resource
[VMD_CFGBAR
]) < (1 << 20))
982 vmd
= devm_kzalloc(&dev
->dev
, sizeof(*vmd
), GFP_KERNEL
);
987 vmd
->instance
= ida_simple_get(&vmd_instance_ida
, 0, 0, GFP_KERNEL
);
988 if (vmd
->instance
< 0)
989 return vmd
->instance
;
991 vmd
->name
= devm_kasprintf(&dev
->dev
, GFP_KERNEL
, "vmd%d",
995 goto out_release_instance
;
998 err
= pcim_enable_device(dev
);
1000 goto out_release_instance
;
1002 vmd
->cfgbar
= pcim_iomap(dev
, VMD_CFGBAR
, 0);
1005 goto out_release_instance
;
1008 pci_set_master(dev
);
1009 if (dma_set_mask_and_coherent(&dev
->dev
, DMA_BIT_MASK(64)) &&
1010 dma_set_mask_and_coherent(&dev
->dev
, DMA_BIT_MASK(32))) {
1012 goto out_release_instance
;
1015 if (features
& VMD_FEAT_OFFSET_FIRST_VECTOR
)
1018 spin_lock_init(&vmd
->cfg_lock
);
1019 pci_set_drvdata(dev
, vmd
);
1020 err
= vmd_enable_domain(vmd
, features
);
1022 goto out_release_instance
;
1024 dev_info(&vmd
->dev
->dev
, "Bound to PCI domain %04x\n",
1025 vmd
->sysdata
.domain
);
1028 out_release_instance
:
1029 ida_simple_remove(&vmd_instance_ida
, vmd
->instance
);
1033 static void vmd_cleanup_srcu(struct vmd_dev
*vmd
)
1037 for (i
= 0; i
< vmd
->msix_count
; i
++)
1038 cleanup_srcu_struct(&vmd
->irqs
[i
].srcu
);
1041 static void vmd_remove(struct pci_dev
*dev
)
1043 struct vmd_dev
*vmd
= pci_get_drvdata(dev
);
1045 sysfs_remove_link(&vmd
->dev
->dev
.kobj
, "domain");
1046 pci_stop_root_bus(vmd
->bus
);
1047 pci_remove_root_bus(vmd
->bus
);
1048 vmd_cleanup_srcu(vmd
);
1049 vmd_detach_resources(vmd
);
1050 vmd_remove_irq_domain(vmd
);
1051 ida_simple_remove(&vmd_instance_ida
, vmd
->instance
);
1054 static void vmd_shutdown(struct pci_dev
*dev
)
1056 struct vmd_dev
*vmd
= pci_get_drvdata(dev
);
1058 vmd_remove_irq_domain(vmd
);
1061 #ifdef CONFIG_PM_SLEEP
1062 static int vmd_suspend(struct device
*dev
)
1064 struct pci_dev
*pdev
= to_pci_dev(dev
);
1065 struct vmd_dev
*vmd
= pci_get_drvdata(pdev
);
1068 for (i
= 0; i
< vmd
->msix_count
; i
++)
1069 devm_free_irq(dev
, vmd
->irqs
[i
].virq
, &vmd
->irqs
[i
]);
1074 static int vmd_resume(struct device
*dev
)
1076 struct pci_dev
*pdev
= to_pci_dev(dev
);
1077 struct vmd_dev
*vmd
= pci_get_drvdata(pdev
);
1080 vmd_set_msi_remapping(vmd
, !!vmd
->irq_domain
);
1082 for (i
= 0; i
< vmd
->msix_count
; i
++) {
1083 err
= devm_request_irq(dev
, vmd
->irqs
[i
].virq
,
1084 vmd_irq
, IRQF_NO_THREAD
,
1085 vmd
->name
, &vmd
->irqs
[i
]);
1093 static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops
, vmd_suspend
, vmd_resume
);
1095 static const struct pci_device_id vmd_ids
[] = {
1096 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_VMD_201D
),
1097 .driver_data
= VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP
,},
1098 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_VMD_28C0
),
1099 .driver_data
= VMD_FEAT_HAS_MEMBAR_SHADOW
|
1100 VMD_FEAT_HAS_BUS_RESTRICTIONS
|
1101 VMD_FEAT_CAN_BYPASS_MSI_REMAP
,},
1102 {PCI_VDEVICE(INTEL
, 0x467f),
1103 .driver_data
= VMD_FEATS_CLIENT
,},
1104 {PCI_VDEVICE(INTEL
, 0x4c3d),
1105 .driver_data
= VMD_FEATS_CLIENT
,},
1106 {PCI_VDEVICE(INTEL
, 0xa77f),
1107 .driver_data
= VMD_FEATS_CLIENT
,},
1108 {PCI_VDEVICE(INTEL
, 0x7d0b),
1109 .driver_data
= VMD_FEATS_CLIENT
,},
1110 {PCI_VDEVICE(INTEL
, 0xad0b),
1111 .driver_data
= VMD_FEATS_CLIENT
,},
1112 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_VMD_9A0B
),
1113 .driver_data
= VMD_FEATS_CLIENT
,},
1116 MODULE_DEVICE_TABLE(pci
, vmd_ids
);
1118 static struct pci_driver vmd_drv
= {
1120 .id_table
= vmd_ids
,
1122 .remove
= vmd_remove
,
1123 .shutdown
= vmd_shutdown
,
1125 .pm
= &vmd_dev_pm_ops
,
1128 module_pci_driver(vmd_drv
);
1130 MODULE_AUTHOR("Intel Corporation");
1131 MODULE_LICENSE("GPL v2");
1132 MODULE_VERSION("0.6");