1 From: Suresh Siddha <suresh.b.siddha@intel.com>
2 Subject: x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure
3 References: fate #303948 and fate #303984
4 Patch-Mainline: queued for .28
5 Commit-ID: 75c46fa61bc5b4ccd20a168ff325c58771248fcd
7 Signed-off-by: Thomas Renninger <trenn@suse.de>
9 MSI and MSI-X support for interrupt remapping infrastructure.
11 MSI address register will be programmed with interrupt-remapping table
12 entry(IRTE) index and the IRTE will contain information about the vector,
15 For MSI-X, all the IRTE's will be consecutively allocated in the table,
16 and the address registers will contain the starting index to the block
17 and the data register will contain the subindex with in that block.
19 This also introduces a new irq_chip for cleaner irq migration (in the process
20 context as opposed to the current irq migration in the context of an interrupt.
21 interrupt-remapping infrastructure will help us achieve this).
23 As MSI is edge triggered, irq migration is a simple atomic update(of vector
24 and cpu destination) of IRTE and flushing the hardware cache.
26 Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
27 Cc: akpm@linux-foundation.org
28 Cc: arjan@linux.intel.com
29 Cc: andi@firstfloor.org
30 Cc: ebiederm@xmission.com
31 Cc: jbarnes@virtuousgeek.org
33 Signed-off-by: Ingo Molnar <mingo@elte.hu>
36 arch/x86/kernel/io_apic_64.c | 230 +++++++++++++++++++++++++++++++++++++++++--
37 drivers/pci/intr_remapping.c | 11 ++
38 include/asm-x86/msidef.h | 4
39 include/linux/dmar.h | 1
40 4 files changed, 238 insertions(+), 8 deletions(-)
42 Index: linux-2.6.26/arch/x86/kernel/io_apic_64.c
43 ===================================================================
44 --- linux-2.6.26.orig/arch/x86/kernel/io_apic_64.c
45 +++ linux-2.6.26/arch/x86/kernel/io_apic_64.c
46 @@ -2295,6 +2295,9 @@ void destroy_irq(unsigned int irq)
48 dynamic_irq_cleanup(irq);
50 +#ifdef CONFIG_INTR_REMAP
53 spin_lock_irqsave(&vector_lock, flags);
54 __clear_irq_vector(irq);
55 spin_unlock_irqrestore(&vector_lock, flags);
56 @@ -2313,11 +2316,42 @@ static int msi_compose_msg(struct pci_de
59 err = assign_irq_vector(irq, tmp);
61 - cpus_and(tmp, cfg->domain, tmp);
62 - dest = cpu_mask_to_apicid(tmp);
66 + cpus_and(tmp, cfg->domain, tmp);
67 + dest = cpu_mask_to_apicid(tmp);
69 +#ifdef CONFIG_INTR_REMAP
70 + if (irq_remapped(irq)) {
75 + ir_index = map_irq_to_irte_handle(irq, &sub_handle);
76 + BUG_ON(ir_index == -1);
78 + memset (&irte, 0, sizeof(irte));
81 + irte.dst_mode = INT_DEST_MODE;
82 + irte.trigger_mode = 0; /* edge */
83 + irte.dlvry_mode = INT_DELIVERY_MODE;
84 + irte.vector = cfg->vector;
85 + irte.dest_id = IRTE_DEST(dest);
87 + modify_irte(irq, &irte);
89 msg->address_hi = MSI_ADDR_BASE_HI;
90 + msg->data = sub_handle;
91 + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
93 + MSI_ADDR_IR_INDEX1(ir_index) |
94 + MSI_ADDR_IR_INDEX2(ir_index);
98 + msg->address_hi = MSI_ADDR_BASE_HI;
101 ((INT_DEST_MODE == 0) ?
102 @@ -2367,6 +2401,55 @@ static void set_msi_irq_affinity(unsigne
103 write_msi_msg(irq, &msg);
104 irq_desc[irq].affinity = mask;
107 +#ifdef CONFIG_INTR_REMAP
109 + * Migrate the MSI irq to another cpumask. This migration is
110 + * done in the process context using interrupt-remapping hardware.
112 +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
114 + struct irq_cfg *cfg = irq_cfg + irq;
116 + cpumask_t tmp, cleanup_mask;
119 + cpus_and(tmp, mask, cpu_online_map);
120 + if (cpus_empty(tmp))
123 + if (get_irte(irq, &irte))
126 + if (assign_irq_vector(irq, mask))
129 + cpus_and(tmp, cfg->domain, mask);
130 + dest = cpu_mask_to_apicid(tmp);
132 + irte.vector = cfg->vector;
133 + irte.dest_id = IRTE_DEST(dest);
136 + * atomically update the IRTE with the new destination and vector.
138 + modify_irte(irq, &irte);
141 + * After this point, all the interrupts will start arriving
142 + * at the new destination. So, time to cleanup the previous
143 + * vector allocation.
145 + if (cfg->move_in_progress) {
146 + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
147 + cfg->move_cleanup_count = cpus_weight(cleanup_mask);
148 + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
149 + cfg->move_in_progress = 0;
152 + irq_desc[irq].affinity = mask;
155 #endif /* CONFIG_SMP */
158 @@ -2384,26 +2467,157 @@ static struct irq_chip msi_chip = {
159 .retrigger = ioapic_retrigger_irq,
162 -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
163 +#ifdef CONFIG_INTR_REMAP
164 +static struct irq_chip msi_ir_chip = {
165 + .name = "IR-PCI-MSI",
166 + .unmask = unmask_msi_irq,
167 + .mask = mask_msi_irq,
168 + .ack = ack_x2apic_edge,
170 + .set_affinity = ir_set_msi_irq_affinity,
172 + .retrigger = ioapic_retrigger_irq,
176 + * Map the PCI dev to the corresponding remapping hardware unit
177 + * and allocate 'nvec' consecutive interrupt-remapping table entries
180 +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
182 + struct intel_iommu *iommu;
185 + iommu = map_dev_to_ir(dev);
188 + "Unable to map PCI %s to iommu\n", pci_name(dev));
192 + index = alloc_irte(iommu, irq, nvec);
195 + "Unable to allocate %d IRTE for PCI %s\n", nvec,
203 +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
208 + ret = msi_compose_msg(dev, irq, &msg);
212 + set_irq_msi(irq, desc);
213 + write_msi_msg(irq, &msg);
215 +#ifdef CONFIG_INTR_REMAP
216 + if (irq_remapped(irq)) {
217 + struct irq_desc *desc = irq_desc + irq;
219 + * irq migration in process context
221 + desc->status |= IRQ_MOVE_PCNTXT;
222 + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
225 + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
230 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
238 - ret = msi_compose_msg(dev, irq, &msg);
239 +#ifdef CONFIG_INTR_REMAP
240 + if (!intr_remapping_enabled)
243 + ret = msi_alloc_irte(dev, irq, 1);
248 + ret = setup_msi_irq(dev, desc, irq);
255 - set_irq_msi(irq, desc);
256 - write_msi_msg(irq, &msg);
257 +#ifdef CONFIG_INTR_REMAP
264 - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
265 +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
267 + int irq, ret, sub_handle;
268 + struct msi_desc *desc;
269 +#ifdef CONFIG_INTR_REMAP
270 + struct intel_iommu *iommu = 0;
275 + list_for_each_entry(desc, &dev->msi_list, list) {
276 + irq = create_irq();
279 +#ifdef CONFIG_INTR_REMAP
280 + if (!intr_remapping_enabled)
285 + * allocate the consecutive block of IRTE's
288 + index = msi_alloc_irte(dev, irq, nvec);
294 + iommu = map_dev_to_ir(dev);
300 + * setup the mapping between the irq and the IRTE
301 + * base index, the sub_handle pointing to the
302 + * appropriate interrupt remap table entry.
304 + set_irte_irq(irq, iommu, index, sub_handle);
308 + ret = setup_msi_irq(dev, desc, irq);
320 void arch_teardown_msi_irq(unsigned int irq)
321 Index: linux-2.6.26/drivers/pci/intr_remapping.c
322 ===================================================================
323 --- linux-2.6.26.orig/drivers/pci/intr_remapping.c
324 +++ linux-2.6.26/drivers/pci/intr_remapping.c
325 @@ -230,6 +230,17 @@ struct intel_iommu *map_ioapic_to_ir(int
329 +struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
331 + struct dmar_drhd_unit *drhd;
333 + drhd = dmar_find_matched_drhd_unit(dev);
337 + return drhd->iommu;
340 int free_irte(int irq)
343 Index: linux-2.6.26/include/asm-x86/msidef.h
344 ===================================================================
345 --- linux-2.6.26.orig/include/asm-x86/msidef.h
346 +++ linux-2.6.26/include/asm-x86/msidef.h
348 #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
349 MSI_ADDR_DEST_ID_MASK)
351 +#define MSI_ADDR_IR_EXT_INT (1 << 4)
352 +#define MSI_ADDR_IR_SHV (1 << 3)
353 +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
354 +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
355 #endif /* ASM_MSIDEF_H */
356 Index: linux-2.6.26/include/linux/dmar.h
357 ===================================================================
358 --- linux-2.6.26.orig/include/linux/dmar.h
359 +++ linux-2.6.26/include/linux/dmar.h
360 @@ -109,6 +109,7 @@ extern int flush_irte(int irq);
361 extern int free_irte(int irq);
363 extern int irq_remapped(int irq);
364 +extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
365 extern struct intel_iommu *map_ioapic_to_ir(int apic);
367 #define irq_remapped(irq) (0)