]> git.ipfire.org Git - ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.arch/x2APIC_PATCH_23_of_41_89027d35aa5b8f45ce0f7fa0911db85b46563da0
Add a patch to fix Intel E100 wake-on-lan problems.
[ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.arch / x2APIC_PATCH_23_of_41_89027d35aa5b8f45ce0f7fa0911db85b46563da0
CommitLineData
2cb7cef9
BS
1From: Suresh Siddha <suresh.b.siddha@intel.com>
2Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
3References: fate #303948 and fate #303984
4Patch-Mainline: queued for .28
5Commit-ID: 89027d35aa5b8f45ce0f7fa0911db85b46563da0
6
7Signed-off-by: Thomas Renninger <trenn@suse.de>
8
9IO-APIC support in the presence of interrupt-remapping infrastructure.
10
11IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
12index and the IRTE will contain information about the vector, cpu destination,
13trigger mode etc, which traditionally was present in the IO-APIC RTE.
14
15Introduce a new irq_chip for cleaner irq migration (in the process
16context as opposed to the current irq migration in the context of an interrupt.
17interrupt-remapping infrastructure will help us achieve this cleanly).
18
19For edge triggered, irq migration is a simple atomic update(of vector
20and cpu destination) of IRTE and flush the hardware cache.
21
22For level triggered, we need to modify the io-apic RTE aswell with the update
23vector information, along with modifying IRTE with vector and cpu destination.
24So irq migration for level triggered is little bit more complex compared to
25edge triggered migration. But the good news is, we use the same algorithm
26for level triggered migration as we have today, only difference being,
27we now initiate the irq migration from process context instead of the
28interrupt context.
29
30In future, when we do a directed EOI (combined with cpu EOI broadcast
31suppression) to the IO-APIC, level triggered irq migration will also be
32as simple as edge triggered migration and we can do the irq migration
33with a simple atomic update to IO-APIC RTE.
34
35TBD: some tests/changes needed in the presence of fixup_irqs() for
36level triggered irq migration.
37
38Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
39Cc: akpm@linux-foundation.org
40Cc: arjan@linux.intel.com
41Cc: andi@firstfloor.org
42Cc: ebiederm@xmission.com
43Cc: jbarnes@virtuousgeek.org
44Cc: steiner@sgi.com
45Signed-off-by: Ingo Molnar <mingo@elte.hu>
46
47---
48 arch/x86/kernel/apic_64.c | 1
49 arch/x86/kernel/io_apic_64.c | 300 +++++++++++++++++++++++++++++++++++++---
50 drivers/pci/intr_remapping.c | 10 +
51 include/asm-x86/apic.h | 8 +
52 include/asm-x86/io_apic.h | 14 +
53 include/asm-x86/irq_remapping.h | 8 +
54 include/linux/dmar.h | 1
55 7 files changed, 320 insertions(+), 22 deletions(-)
56
57Index: linux-2.6.26/arch/x86/kernel/apic_64.c
58===================================================================
59--- linux-2.6.26.orig/arch/x86/kernel/apic_64.c
60+++ linux-2.6.26/arch/x86/kernel/apic_64.c
61@@ -46,6 +46,7 @@
62 static int disable_apic_timer __cpuinitdata;
63 static int apic_calibrate_pmtmr __initdata;
64 int disable_apic;
65+int x2apic;
66
67 /* Local APIC timer works in C2 */
68 int local_apic_timer_c2_ok;
69Index: linux-2.6.26/arch/x86/kernel/io_apic_64.c
70===================================================================
71--- linux-2.6.26.orig/arch/x86/kernel/io_apic_64.c
72+++ linux-2.6.26/arch/x86/kernel/io_apic_64.c
73@@ -37,6 +37,7 @@
74 #include <acpi/acpi_bus.h>
75 #endif
76 #include <linux/bootmem.h>
77+#include <linux/dmar.h>
78
79 #include <asm/idle.h>
80 #include <asm/io.h>
81@@ -49,6 +50,7 @@
82 #include <asm/nmi.h>
83 #include <asm/msidef.h>
84 #include <asm/hypertransport.h>
85+#include <asm/irq_remapping.h>
86
87 #include <mach_ipi.h>
88 #include <mach_apic.h>
89@@ -306,7 +308,12 @@ static void __target_IO_APIC_irq(unsigne
90 pin = entry->pin;
91 if (pin == -1)
92 break;
93- io_apic_write(apic, 0x11 + pin*2, dest);
94+ /*
95+ * With interrupt-remapping, destination information comes
96+ * from interrupt-remapping table entry.
97+ */
98+ if (!irq_remapped(irq))
99+ io_apic_write(apic, 0x11 + pin*2, dest);
100 reg = io_apic_read(apic, 0x10 + pin*2);
101 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
102 reg |= vector;
103@@ -905,18 +912,98 @@ void __setup_vector_irq(int cpu)
104 }
105
106 static struct irq_chip ioapic_chip;
107+#ifdef CONFIG_INTR_REMAP
108+static struct irq_chip ir_ioapic_chip;
109+#endif
110
111 static void ioapic_register_intr(int irq, unsigned long trigger)
112 {
113- if (trigger) {
114+ if (trigger)
115 irq_desc[irq].status |= IRQ_LEVEL;
116- set_irq_chip_and_handler_name(irq, &ioapic_chip,
117- handle_fasteoi_irq, "fasteoi");
118- } else {
119+ else
120 irq_desc[irq].status &= ~IRQ_LEVEL;
121+
122+#ifdef CONFIG_INTR_REMAP
123+ if (irq_remapped(irq)) {
124+ irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
125+ if (trigger)
126+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
127+ handle_fasteoi_irq,
128+ "fasteoi");
129+ else
130+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
131+ handle_edge_irq, "edge");
132+ return;
133+ }
134+#endif
135+ if (trigger)
136+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
137+ handle_fasteoi_irq,
138+ "fasteoi");
139+ else
140 set_irq_chip_and_handler_name(irq, &ioapic_chip,
141 handle_edge_irq, "edge");
142+}
143+
144+static int setup_ioapic_entry(int apic, int irq,
145+ struct IO_APIC_route_entry *entry,
146+ unsigned int destination, int trigger,
147+ int polarity, int vector)
148+{
149+ /*
150+ * add it to the IO-APIC irq-routing table:
151+ */
152+ memset(entry,0,sizeof(*entry));
153+
154+#ifdef CONFIG_INTR_REMAP
155+ if (intr_remapping_enabled) {
156+ struct intel_iommu *iommu = map_ioapic_to_ir(apic);
157+ struct irte irte;
158+ struct IR_IO_APIC_route_entry *ir_entry =
159+ (struct IR_IO_APIC_route_entry *) entry;
160+ int index;
161+
162+ if (!iommu)
163+ panic("No mapping iommu for ioapic %d\n", apic);
164+
165+ index = alloc_irte(iommu, irq, 1);
166+ if (index < 0)
167+ panic("Failed to allocate IRTE for ioapic %d\n", apic);
168+
169+ memset(&irte, 0, sizeof(irte));
170+
171+ irte.present = 1;
172+ irte.dst_mode = INT_DEST_MODE;
173+ irte.trigger_mode = trigger;
174+ irte.dlvry_mode = INT_DELIVERY_MODE;
175+ irte.vector = vector;
176+ irte.dest_id = IRTE_DEST(destination);
177+
178+ modify_irte(irq, &irte);
179+
180+ ir_entry->index2 = (index >> 15) & 0x1;
181+ ir_entry->zero = 0;
182+ ir_entry->format = 1;
183+ ir_entry->index = (index & 0x7fff);
184+ } else
185+#endif
186+ {
187+ entry->delivery_mode = INT_DELIVERY_MODE;
188+ entry->dest_mode = INT_DEST_MODE;
189+ entry->dest = destination;
190 }
191+
192+ entry->mask = 0; /* enable IRQ */
193+ entry->trigger = trigger;
194+ entry->polarity = polarity;
195+ entry->vector = vector;
196+
197+ /* Mask level triggered irqs.
198+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
199+ */
200+ if (trigger)
201+ entry->mask = 1;
202+ return 0;
203 }
204
205 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
206@@ -941,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic,
207 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
208 irq, trigger, polarity);
209
210- /*
211- * add it to the IO-APIC irq-routing table:
212- */
213- memset(&entry,0,sizeof(entry));
214
215- entry.delivery_mode = INT_DELIVERY_MODE;
216- entry.dest_mode = INT_DEST_MODE;
217- entry.dest = cpu_mask_to_apicid(mask);
218- entry.mask = 0; /* enable IRQ */
219- entry.trigger = trigger;
220- entry.polarity = polarity;
221- entry.vector = cfg->vector;
222-
223- /* Mask level triggered irqs.
224- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
225- */
226- if (trigger)
227- entry.mask = 1;
228+ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
229+ cpu_mask_to_apicid(mask), trigger, polarity,
230+ cfg->vector)) {
231+ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
232+ mp_ioapics[apic].mp_apicid, pin);
233+ __clear_irq_vector(irq);
234+ return;
235+ }
236
237 ioapic_register_intr(irq, trigger);
238 if (irq < 16)
239@@ -1010,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(
240 {
241 struct IO_APIC_route_entry entry;
242
243+ if (intr_remapping_enabled)
244+ return;
245+
246 memset(&entry, 0, sizeof(entry));
247
248 /*
249@@ -1463,6 +1544,147 @@ static int ioapic_retrigger_irq(unsigned
250 */
251
252 #ifdef CONFIG_SMP
253+
254+#ifdef CONFIG_INTR_REMAP
255+static void ir_irq_migration(struct work_struct *work);
256+
257+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
258+
259+/*
260+ * Migrate the IO-APIC irq in the presence of intr-remapping.
261+ *
262+ * For edge triggered, irq migration is a simple atomic update(of vector
263+ * and cpu destination) of IRTE and flush the hardware cache.
264+ *
265+ * For level triggered, we need to modify the io-apic RTE aswell with the update
266+ * vector information, along with modifying IRTE with vector and destination.
267+ * So irq migration for level triggered is little bit more complex compared to
268+ * edge triggered migration. But the good news is, we use the same algorithm
269+ * for level triggered migration as we have today, only difference being,
270+ * we now initiate the irq migration from process context instead of the
271+ * interrupt context.
272+ *
273+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
274+ * suppression) to the IO-APIC, level triggered irq migration will also be
275+ * as simple as edge triggered migration and we can do the irq migration
276+ * with a simple atomic update to IO-APIC RTE.
277+ */
278+static void migrate_ioapic_irq(int irq, cpumask_t mask)
279+{
280+ struct irq_cfg *cfg = irq_cfg + irq;
281+ struct irq_desc *desc = irq_desc + irq;
282+ cpumask_t tmp, cleanup_mask;
283+ struct irte irte;
284+ int modify_ioapic_rte = desc->status & IRQ_LEVEL;
285+ unsigned int dest;
286+ unsigned long flags;
287+
288+ cpus_and(tmp, mask, cpu_online_map);
289+ if (cpus_empty(tmp))
290+ return;
291+
292+ if (get_irte(irq, &irte))
293+ return;
294+
295+ if (assign_irq_vector(irq, mask))
296+ return;
297+
298+ cpus_and(tmp, cfg->domain, mask);
299+ dest = cpu_mask_to_apicid(tmp);
300+
301+ if (modify_ioapic_rte) {
302+ spin_lock_irqsave(&ioapic_lock, flags);
303+ __target_IO_APIC_irq(irq, dest, cfg->vector);
304+ spin_unlock_irqrestore(&ioapic_lock, flags);
305+ }
306+
307+ irte.vector = cfg->vector;
308+ irte.dest_id = IRTE_DEST(dest);
309+
310+ /*
311+ * Modified the IRTE and flushes the Interrupt entry cache.
312+ */
313+ modify_irte(irq, &irte);
314+
315+ if (cfg->move_in_progress) {
316+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
317+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
318+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
319+ cfg->move_in_progress = 0;
320+ }
321+
322+ irq_desc[irq].affinity = mask;
323+}
324+
325+static int migrate_irq_remapped_level(int irq)
326+{
327+ int ret = -1;
328+
329+ mask_IO_APIC_irq(irq);
330+
331+ if (io_apic_level_ack_pending(irq)) {
332+ /*
333+ * Interrupt in progress. Migrating irq now will change the
334+ * vector information in the IO-APIC RTE and that will confuse
335+ * the EOI broadcast performed by cpu.
336+ * So, delay the irq migration to the next instance.
337+ */
338+ schedule_delayed_work(&ir_migration_work, 1);
339+ goto unmask;
340+ }
341+
342+ /* everthing is clear. we have right of way */
343+ migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
344+
345+ ret = 0;
346+ irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
347+ cpus_clear(irq_desc[irq].pending_mask);
348+
349+unmask:
350+ unmask_IO_APIC_irq(irq);
351+ return ret;
352+}
353+
354+static void ir_irq_migration(struct work_struct *work)
355+{
356+ int irq;
357+
358+ for (irq = 0; irq < NR_IRQS; irq++) {
359+ struct irq_desc *desc = irq_desc + irq;
360+ if (desc->status & IRQ_MOVE_PENDING) {
361+ unsigned long flags;
362+
363+ spin_lock_irqsave(&desc->lock, flags);
364+ if (!desc->chip->set_affinity ||
365+ !(desc->status & IRQ_MOVE_PENDING)) {
366+ desc->status &= ~IRQ_MOVE_PENDING;
367+ spin_unlock_irqrestore(&desc->lock, flags);
368+ continue;
369+ }
370+
371+ desc->chip->set_affinity(irq,
372+ irq_desc[irq].pending_mask);
373+ spin_unlock_irqrestore(&desc->lock, flags);
374+ }
375+ }
376+}
377+
378+/*
379+ * Migrates the IRQ destination in the process context.
380+ */
381+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
382+{
383+ if (irq_desc[irq].status & IRQ_LEVEL) {
384+ irq_desc[irq].status |= IRQ_MOVE_PENDING;
385+ irq_desc[irq].pending_mask = mask;
386+ migrate_irq_remapped_level(irq);
387+ return;
388+ }
389+
390+ migrate_ioapic_irq(irq, mask);
391+}
392+#endif
393+
394 asmlinkage void smp_irq_move_cleanup_interrupt(void)
395 {
396 unsigned vector, me;
397@@ -1519,6 +1741,17 @@ static void irq_complete_move(unsigned i
398 #else
399 static inline void irq_complete_move(unsigned int irq) {}
400 #endif
401+#ifdef CONFIG_INTR_REMAP
402+static void ack_x2apic_level(unsigned int irq)
403+{
404+ ack_x2APIC_irq();
405+}
406+
407+static void ack_x2apic_edge(unsigned int irq)
408+{
409+ ack_x2APIC_irq();
410+}
411+#endif
412
413 static void ack_apic_edge(unsigned int irq)
414 {
415@@ -1593,6 +1826,21 @@ static struct irq_chip ioapic_chip __rea
416 .retrigger = ioapic_retrigger_irq,
417 };
418
419+#ifdef CONFIG_INTR_REMAP
420+static struct irq_chip ir_ioapic_chip __read_mostly = {
421+ .name = "IR-IO-APIC",
422+ .startup = startup_ioapic_irq,
423+ .mask = mask_IO_APIC_irq,
424+ .unmask = unmask_IO_APIC_irq,
425+ .ack = ack_x2apic_edge,
426+ .eoi = ack_x2apic_level,
427+#ifdef CONFIG_SMP
428+ .set_affinity = set_ir_ioapic_affinity_irq,
429+#endif
430+ .retrigger = ioapic_retrigger_irq,
431+};
432+#endif
433+
434 static inline void init_IO_APIC_traps(void)
435 {
436 int irq;
437@@ -1778,6 +2026,8 @@ static inline void __init check_timer(vo
438 * 8259A.
439 */
440 if (pin1 == -1) {
441+ if (intr_remapping_enabled)
442+ panic("BIOS bug: timer not connected to IO-APIC");
443 pin1 = pin2;
444 apic1 = apic2;
445 no_pin1 = 1;
446@@ -1804,6 +2054,8 @@ static inline void __init check_timer(vo
447 clear_IO_APIC_pin(0, pin1);
448 goto out;
449 }
450+ if (intr_remapping_enabled)
451+ panic("timer doesn't work through Interrupt-remapped IO-APIC");
452 clear_IO_APIC_pin(apic1, pin1);
453 if (!no_pin1)
454 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
455@@ -2399,6 +2651,10 @@ void __init setup_ioapic_dest(void)
456 setup_IO_APIC_irq(ioapic, pin, irq,
457 irq_trigger(irq_entry),
458 irq_polarity(irq_entry));
459+#ifdef CONFIG_INTR_REMAP
460+ else if (intr_remapping_enabled)
461+ set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
462+#endif
463 else
464 set_ioapic_affinity_irq(irq, TARGET_CPUS);
465 }
466Index: linux-2.6.26/drivers/pci/intr_remapping.c
467===================================================================
468--- linux-2.6.26.orig/drivers/pci/intr_remapping.c
469+++ linux-2.6.26/drivers/pci/intr_remapping.c
470@@ -220,6 +220,16 @@ int flush_irte(int irq)
471 return 0;
472 }
473
474+struct intel_iommu *map_ioapic_to_ir(int apic)
475+{
476+ int i;
477+
478+ for (i = 0; i < MAX_IO_APICS; i++)
479+ if (ir_ioapic[i].id == apic)
480+ return ir_ioapic[i].iommu;
481+ return NULL;
482+}
483+
484 int free_irte(int irq)
485 {
486 int index, i;
487Index: linux-2.6.26/include/asm-x86/apic.h
488===================================================================
489--- linux-2.6.26.orig/include/asm-x86/apic.h
490+++ linux-2.6.26/include/asm-x86/apic.h
491@@ -123,6 +123,14 @@ extern struct apic_ops *apic_ops;
492
493 extern int get_physical_broadcast(void);
494
495+#ifdef CONFIG_X86_64
496+static inline void ack_x2APIC_irq(void)
497+{
498+ /* Docs say use 0 for future compatibility */
499+ native_apic_msr_write(APIC_EOI, 0);
500+}
501+#endif
502+
503 static inline void ack_APIC_irq(void)
504 {
505 /*
506Index: linux-2.6.26/include/asm-x86/io_apic.h
507===================================================================
508--- linux-2.6.26.orig/include/asm-x86/io_apic.h
509+++ linux-2.6.26/include/asm-x86/io_apic.h
510@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
511
512 } __attribute__ ((packed));
513
514+struct IR_IO_APIC_route_entry {
515+ __u64 vector : 8,
516+ zero : 3,
517+ index2 : 1,
518+ delivery_status : 1,
519+ polarity : 1,
520+ irr : 1,
521+ trigger : 1,
522+ mask : 1,
523+ reserved : 31,
524+ format : 1,
525+ index : 15;
526+} __attribute__ ((packed));
527+
528 #ifdef CONFIG_X86_IO_APIC
529
530 /*
531Index: linux-2.6.26/include/asm-x86/irq_remapping.h
532===================================================================
533--- /dev/null
534+++ linux-2.6.26/include/asm-x86/irq_remapping.h
535@@ -0,0 +1,8 @@
536+#ifndef _ASM_IRQ_REMAPPING_H
537+#define _ASM_IRQ_REMAPPING_H
538+
539+extern int x2apic;
540+
541+#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
542+
543+#endif
544Index: linux-2.6.26/include/linux/dmar.h
545===================================================================
546--- linux-2.6.26.orig/include/linux/dmar.h
547+++ linux-2.6.26/include/linux/dmar.h
548@@ -109,6 +109,7 @@ extern int flush_irte(int irq);
549 extern int free_irte(int irq);
550
551 extern int irq_remapped(int irq);
552+extern struct intel_iommu *map_ioapic_to_ir(int apic);
553 #else
554 #define irq_remapped(irq) (0)
555 #define enable_intr_remapping(mode) (-1)