]> git.ipfire.org Git - ipfire-2.x.git/blobdiff - src/patches/suse-2.6.27.39/patches.arch/x2APIC_PATCH_23_of_41_89027d35aa5b8f45ce0f7fa0911db85b46563da0
Imported linux-2.6.27.39 suse/xen patches.
[ipfire-2.x.git] / src / patches / suse-2.6.27.39 / patches.arch / x2APIC_PATCH_23_of_41_89027d35aa5b8f45ce0f7fa0911db85b46563da0
diff --git a/src/patches/suse-2.6.27.39/patches.arch/x2APIC_PATCH_23_of_41_89027d35aa5b8f45ce0f7fa0911db85b46563da0 b/src/patches/suse-2.6.27.39/patches.arch/x2APIC_PATCH_23_of_41_89027d35aa5b8f45ce0f7fa0911db85b46563da0
new file mode 100644 (file)
index 0000000..d26498e
--- /dev/null
@@ -0,0 +1,555 @@
+From: Suresh Siddha <suresh.b.siddha@intel.com>
+Subject: x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping
+References: fate #303948 and fate #303984
+Patch-Mainline: queued for .28
+Commit-ID: 89027d35aa5b8f45ce0f7fa0911db85b46563da0
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+IO-APIC support in the presence of interrupt-remapping infrastructure.
+
+IO-APIC RTE will be programmed with interrupt-remapping table entry(IRTE)
+index and the IRTE will contain information about the vector, cpu destination,
+trigger mode etc, which traditionally was present in the IO-APIC RTE.
+
+Introduce a new irq_chip for cleaner irq migration (in the process
+context as opposed to the current irq migration in the context of an interrupt.
+interrupt-remapping infrastructure will help us achieve this cleanly).
+
+For edge triggered, irq migration is a simple atomic update(of vector
+and cpu destination) of IRTE and flush the hardware cache.
+
+For level triggered, we need to modify the io-apic RTE aswell with the update
+vector information, along with modifying IRTE with vector and cpu destination.
+So irq migration for level triggered is little  bit more complex compared to
+edge triggered migration. But the good news is, we use the same algorithm
+for level triggered migration as we have today, only difference being,
+we now initiate the irq migration from process context instead of the
+interrupt context.
+
+In future, when we do a directed EOI (combined with cpu EOI broadcast
+suppression) to the IO-APIC, level triggered irq migration will also be
+as simple as edge triggered migration and we can do the irq migration
+with a simple atomic update to IO-APIC RTE.
+
+TBD: some tests/changes needed in the presence of fixup_irqs() for
+level triggered irq migration.
+
+Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
+Cc: akpm@linux-foundation.org
+Cc: arjan@linux.intel.com
+Cc: andi@firstfloor.org
+Cc: ebiederm@xmission.com
+Cc: jbarnes@virtuousgeek.org
+Cc: steiner@sgi.com
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+---
+ arch/x86/kernel/apic_64.c       |    1 
+ arch/x86/kernel/io_apic_64.c    |  300 +++++++++++++++++++++++++++++++++++++---
+ drivers/pci/intr_remapping.c    |   10 +
+ include/asm-x86/apic.h          |    8 +
+ include/asm-x86/io_apic.h       |   14 +
+ include/asm-x86/irq_remapping.h |    8 +
+ include/linux/dmar.h            |    1 
+ 7 files changed, 320 insertions(+), 22 deletions(-)
+
+Index: linux-2.6.26/arch/x86/kernel/apic_64.c
+===================================================================
+--- linux-2.6.26.orig/arch/x86/kernel/apic_64.c
++++ linux-2.6.26/arch/x86/kernel/apic_64.c
+@@ -46,6 +46,7 @@
+ static int disable_apic_timer __cpuinitdata;
+ static int apic_calibrate_pmtmr __initdata;
+ int disable_apic;
++int x2apic;
+ /* Local APIC timer works in C2 */
+ int local_apic_timer_c2_ok;
+Index: linux-2.6.26/arch/x86/kernel/io_apic_64.c
+===================================================================
+--- linux-2.6.26.orig/arch/x86/kernel/io_apic_64.c
++++ linux-2.6.26/arch/x86/kernel/io_apic_64.c
+@@ -37,6 +37,7 @@
+ #include <acpi/acpi_bus.h>
+ #endif
+ #include <linux/bootmem.h>
++#include <linux/dmar.h>
+ #include <asm/idle.h>
+ #include <asm/io.h>
+@@ -49,6 +50,7 @@
+ #include <asm/nmi.h>
+ #include <asm/msidef.h>
+ #include <asm/hypertransport.h>
++#include <asm/irq_remapping.h>
+ #include <mach_ipi.h>
+ #include <mach_apic.h>
+@@ -306,7 +308,12 @@ static void __target_IO_APIC_irq(unsigne
+               pin = entry->pin;
+               if (pin == -1)
+                       break;
+-              io_apic_write(apic, 0x11 + pin*2, dest);
++              /*
++               * With interrupt-remapping, destination information comes
++               * from interrupt-remapping table entry.
++               */
++              if (!irq_remapped(irq))
++                      io_apic_write(apic, 0x11 + pin*2, dest);
+               reg = io_apic_read(apic, 0x10 + pin*2);
+               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+               reg |= vector;
+@@ -905,18 +912,98 @@ void __setup_vector_irq(int cpu)
+ }
+ static struct irq_chip ioapic_chip;
++#ifdef CONFIG_INTR_REMAP
++static struct irq_chip ir_ioapic_chip;
++#endif
+ static void ioapic_register_intr(int irq, unsigned long trigger)
+ {
+-      if (trigger) {
++      if (trigger)
+               irq_desc[irq].status |= IRQ_LEVEL;
+-              set_irq_chip_and_handler_name(irq, &ioapic_chip,
+-                                            handle_fasteoi_irq, "fasteoi");
+-      } else {
++      else
+               irq_desc[irq].status &= ~IRQ_LEVEL;
++
++#ifdef CONFIG_INTR_REMAP
++      if (irq_remapped(irq)) {
++              irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
++              if (trigger)
++                      set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
++                                                    handle_fasteoi_irq,
++                                                   "fasteoi");
++              else
++                      set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
++                                                    handle_edge_irq, "edge");
++              return;
++      }
++#endif
++      if (trigger)
++              set_irq_chip_and_handler_name(irq, &ioapic_chip,
++                                            handle_fasteoi_irq,
++                                            "fasteoi");
++      else
+               set_irq_chip_and_handler_name(irq, &ioapic_chip,
+                                             handle_edge_irq, "edge");
++}
++
++static int setup_ioapic_entry(int apic, int irq,
++                            struct IO_APIC_route_entry *entry,
++                            unsigned int destination, int trigger,
++                            int polarity, int vector)
++{
++      /*
++       * add it to the IO-APIC irq-routing table:
++       */
++      memset(entry,0,sizeof(*entry));
++
++#ifdef CONFIG_INTR_REMAP
++      if (intr_remapping_enabled) {
++              struct intel_iommu *iommu = map_ioapic_to_ir(apic);
++              struct irte irte;
++              struct IR_IO_APIC_route_entry *ir_entry =
++                      (struct IR_IO_APIC_route_entry *) entry;
++              int index;
++
++              if (!iommu)
++                      panic("No mapping iommu for ioapic %d\n", apic);
++
++              index = alloc_irte(iommu, irq, 1);
++              if (index < 0)
++                      panic("Failed to allocate IRTE for ioapic %d\n", apic);
++
++              memset(&irte, 0, sizeof(irte));
++
++              irte.present = 1;
++              irte.dst_mode = INT_DEST_MODE;
++              irte.trigger_mode = trigger;
++              irte.dlvry_mode = INT_DELIVERY_MODE;
++              irte.vector = vector;
++              irte.dest_id = IRTE_DEST(destination);
++
++              modify_irte(irq, &irte);
++
++              ir_entry->index2 = (index >> 15) & 0x1;
++              ir_entry->zero = 0;
++              ir_entry->format = 1;
++              ir_entry->index = (index & 0x7fff);
++      } else
++#endif
++      {
++              entry->delivery_mode = INT_DELIVERY_MODE;
++              entry->dest_mode = INT_DEST_MODE;
++              entry->dest = destination;
+       }
++
++      entry->mask = 0;                                /* enable IRQ */
++      entry->trigger = trigger;
++      entry->polarity = polarity;
++      entry->vector = vector;
++
++      /* Mask level triggered irqs.
++       * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
++       */
++      if (trigger)
++              entry->mask = 1;
++      return 0;
+ }
+ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+@@ -941,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, 
+                   apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+                   irq, trigger, polarity);
+-      /*
+-       * add it to the IO-APIC irq-routing table:
+-       */
+-      memset(&entry,0,sizeof(entry));
+-      entry.delivery_mode = INT_DELIVERY_MODE;
+-      entry.dest_mode = INT_DEST_MODE;
+-      entry.dest = cpu_mask_to_apicid(mask);
+-      entry.mask = 0;                         /* enable IRQ */
+-      entry.trigger = trigger;
+-      entry.polarity = polarity;
+-      entry.vector = cfg->vector;
+-
+-      /* Mask level triggered irqs.
+-       * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+-       */
+-      if (trigger)
+-              entry.mask = 1;
++      if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
++                             cpu_mask_to_apicid(mask), trigger, polarity,
++                             cfg->vector)) {
++              printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
++                     mp_ioapics[apic].mp_apicid, pin);
++              __clear_irq_vector(irq);
++              return;
++      }
+       ioapic_register_intr(irq, trigger);
+       if (irq < 16)
+@@ -1010,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(
+ {
+       struct IO_APIC_route_entry entry;
++      if (intr_remapping_enabled)
++              return;
++
+       memset(&entry, 0, sizeof(entry));
+       /*
+@@ -1463,6 +1544,147 @@ static int ioapic_retrigger_irq(unsigned
+  */
+ #ifdef CONFIG_SMP
++
++#ifdef CONFIG_INTR_REMAP
++static void ir_irq_migration(struct work_struct *work);
++
++static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
++
++/*
++ * Migrate the IO-APIC irq in the presence of intr-remapping.
++ *
++ * For edge triggered, irq migration is a simple atomic update(of vector
++ * and cpu destination) of IRTE and flush the hardware cache.
++ *
++ * For level triggered, we need to modify the io-apic RTE aswell with the update
++ * vector information, along with modifying IRTE with vector and destination.
++ * So irq migration for level triggered is little  bit more complex compared to
++ * edge triggered migration. But the good news is, we use the same algorithm
++ * for level triggered migration as we have today, only difference being,
++ * we now initiate the irq migration from process context instead of the
++ * interrupt context.
++ *
++ * In future, when we do a directed EOI (combined with cpu EOI broadcast
++ * suppression) to the IO-APIC, level triggered irq migration will also be
++ * as simple as edge triggered migration and we can do the irq migration
++ * with a simple atomic update to IO-APIC RTE.
++ */
++static void migrate_ioapic_irq(int irq, cpumask_t mask)
++{
++      struct irq_cfg *cfg = irq_cfg + irq;
++      struct irq_desc *desc = irq_desc + irq;
++      cpumask_t tmp, cleanup_mask;
++      struct irte irte;
++      int modify_ioapic_rte = desc->status & IRQ_LEVEL;
++      unsigned int dest;
++      unsigned long flags;
++
++      cpus_and(tmp, mask, cpu_online_map);
++      if (cpus_empty(tmp))
++              return;
++
++      if (get_irte(irq, &irte))
++              return;
++
++      if (assign_irq_vector(irq, mask))
++              return;
++
++      cpus_and(tmp, cfg->domain, mask);
++      dest = cpu_mask_to_apicid(tmp);
++
++      if (modify_ioapic_rte) {
++              spin_lock_irqsave(&ioapic_lock, flags);
++              __target_IO_APIC_irq(irq, dest, cfg->vector);
++              spin_unlock_irqrestore(&ioapic_lock, flags);
++      }
++
++      irte.vector = cfg->vector;
++      irte.dest_id = IRTE_DEST(dest);
++
++      /*
++       * Modified the IRTE and flushes the Interrupt entry cache.
++       */
++      modify_irte(irq, &irte);
++
++      if (cfg->move_in_progress) {
++              cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
++              cfg->move_cleanup_count = cpus_weight(cleanup_mask);
++              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++              cfg->move_in_progress = 0;
++      }
++
++      irq_desc[irq].affinity = mask;
++}
++
++static int migrate_irq_remapped_level(int irq)
++{
++      int ret = -1;
++
++      mask_IO_APIC_irq(irq);
++
++      if (io_apic_level_ack_pending(irq)) {
++              /*
++               * Interrupt in progress. Migrating irq now will change the
++               * vector information in the IO-APIC RTE and that will confuse
++               * the EOI broadcast performed by cpu.
++               * So, delay the irq migration to the next instance.
++               */
++              schedule_delayed_work(&ir_migration_work, 1);
++              goto unmask;
++      }
++
++      /* everthing is clear. we have right of way */
++      migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
++
++      ret = 0;
++      irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
++      cpus_clear(irq_desc[irq].pending_mask);
++
++unmask:
++      unmask_IO_APIC_irq(irq);
++      return ret;
++}
++
++static void ir_irq_migration(struct work_struct *work)
++{
++      int irq;
++
++      for (irq = 0; irq < NR_IRQS; irq++) {
++              struct irq_desc *desc = irq_desc + irq;
++              if (desc->status & IRQ_MOVE_PENDING) {
++                      unsigned long flags;
++
++                      spin_lock_irqsave(&desc->lock, flags);
++                      if (!desc->chip->set_affinity ||
++                          !(desc->status & IRQ_MOVE_PENDING)) {
++                              desc->status &= ~IRQ_MOVE_PENDING;
++                              spin_unlock_irqrestore(&desc->lock, flags);
++                              continue;
++                      }
++
++                      desc->chip->set_affinity(irq,
++                                               irq_desc[irq].pending_mask);
++                      spin_unlock_irqrestore(&desc->lock, flags);
++              }
++      }
++}
++
++/*
++ * Migrates the IRQ destination in the process context.
++ */
++static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
++{
++      if (irq_desc[irq].status & IRQ_LEVEL) {
++              irq_desc[irq].status |= IRQ_MOVE_PENDING;
++              irq_desc[irq].pending_mask = mask;
++              migrate_irq_remapped_level(irq);
++              return;
++      }
++
++      migrate_ioapic_irq(irq, mask);
++}
++#endif
++
+ asmlinkage void smp_irq_move_cleanup_interrupt(void)
+ {
+       unsigned vector, me;
+@@ -1519,6 +1741,17 @@ static void irq_complete_move(unsigned i
+ #else
+ static inline void irq_complete_move(unsigned int irq) {}
+ #endif
++#ifdef CONFIG_INTR_REMAP
++static void ack_x2apic_level(unsigned int irq)
++{
++      ack_x2APIC_irq();
++}
++
++static void ack_x2apic_edge(unsigned int irq)
++{
++      ack_x2APIC_irq();
++}
++#endif
+ static void ack_apic_edge(unsigned int irq)
+ {
+@@ -1593,6 +1826,21 @@ static struct irq_chip ioapic_chip __rea
+       .retrigger      = ioapic_retrigger_irq,
+ };
++#ifdef CONFIG_INTR_REMAP
++static struct irq_chip ir_ioapic_chip __read_mostly = {
++      .name           = "IR-IO-APIC",
++      .startup        = startup_ioapic_irq,
++      .mask           = mask_IO_APIC_irq,
++      .unmask         = unmask_IO_APIC_irq,
++      .ack            = ack_x2apic_edge,
++      .eoi            = ack_x2apic_level,
++#ifdef CONFIG_SMP
++      .set_affinity   = set_ir_ioapic_affinity_irq,
++#endif
++      .retrigger      = ioapic_retrigger_irq,
++};
++#endif
++
+ static inline void init_IO_APIC_traps(void)
+ {
+       int irq;
+@@ -1778,6 +2026,8 @@ static inline void __init check_timer(vo
+        * 8259A.
+        */
+       if (pin1 == -1) {
++              if (intr_remapping_enabled)
++                      panic("BIOS bug: timer not connected to IO-APIC");
+               pin1 = pin2;
+               apic1 = apic2;
+               no_pin1 = 1;
+@@ -1804,6 +2054,8 @@ static inline void __init check_timer(vo
+                               clear_IO_APIC_pin(0, pin1);
+                       goto out;
+               }
++              if (intr_remapping_enabled)
++                      panic("timer doesn't work through Interrupt-remapped IO-APIC");
+               clear_IO_APIC_pin(apic1, pin1);
+               if (!no_pin1)
+                       apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+@@ -2399,6 +2651,10 @@ void __init setup_ioapic_dest(void)
+                               setup_IO_APIC_irq(ioapic, pin, irq,
+                                                 irq_trigger(irq_entry),
+                                                 irq_polarity(irq_entry));
++#ifdef CONFIG_INTR_REMAP
++                      else if (intr_remapping_enabled)
++                              set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
++#endif
+                       else
+                               set_ioapic_affinity_irq(irq, TARGET_CPUS);
+               }
+Index: linux-2.6.26/drivers/pci/intr_remapping.c
+===================================================================
+--- linux-2.6.26.orig/drivers/pci/intr_remapping.c
++++ linux-2.6.26/drivers/pci/intr_remapping.c
+@@ -220,6 +220,16 @@ int flush_irte(int irq)
+       return 0;
+ }
++struct intel_iommu *map_ioapic_to_ir(int apic)
++{
++      int i;
++
++      for (i = 0; i < MAX_IO_APICS; i++)
++              if (ir_ioapic[i].id == apic)
++                      return ir_ioapic[i].iommu;
++      return NULL;
++}
++
+ int free_irte(int irq)
+ {
+       int index, i;
+Index: linux-2.6.26/include/asm-x86/apic.h
+===================================================================
+--- linux-2.6.26.orig/include/asm-x86/apic.h
++++ linux-2.6.26/include/asm-x86/apic.h
+@@ -123,6 +123,14 @@ extern struct apic_ops *apic_ops;
+ extern int get_physical_broadcast(void);
++#ifdef CONFIG_X86_64
++static inline void ack_x2APIC_irq(void)
++{
++      /* Docs say use 0 for future compatibility */
++      native_apic_msr_write(APIC_EOI, 0);
++}
++#endif
++
+ static inline void ack_APIC_irq(void)
+ {
+       /*
+Index: linux-2.6.26/include/asm-x86/io_apic.h
+===================================================================
+--- linux-2.6.26.orig/include/asm-x86/io_apic.h
++++ linux-2.6.26/include/asm-x86/io_apic.h
+@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
+ } __attribute__ ((packed));
++struct IR_IO_APIC_route_entry {
++      __u64   vector          : 8,
++              zero            : 3,
++              index2          : 1,
++              delivery_status : 1,
++              polarity        : 1,
++              irr             : 1,
++              trigger         : 1,
++              mask            : 1,
++              reserved        : 31,
++              format          : 1,
++              index           : 15;
++} __attribute__ ((packed));
++
+ #ifdef CONFIG_X86_IO_APIC
+ /*
+Index: linux-2.6.26/include/asm-x86/irq_remapping.h
+===================================================================
+--- /dev/null
++++ linux-2.6.26/include/asm-x86/irq_remapping.h
+@@ -0,0 +1,8 @@
++#ifndef _ASM_IRQ_REMAPPING_H
++#define _ASM_IRQ_REMAPPING_H
++
++extern int x2apic;
++
++#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
++
++#endif
+Index: linux-2.6.26/include/linux/dmar.h
+===================================================================
+--- linux-2.6.26.orig/include/linux/dmar.h
++++ linux-2.6.26/include/linux/dmar.h
+@@ -109,6 +109,7 @@ extern int flush_irte(int irq);
+ extern int free_irte(int irq);
+ extern int irq_remapped(int irq);
++extern struct intel_iommu *map_ioapic_to_ir(int apic);
+ #else
+ #define irq_remapped(irq)             (0)
+ #define enable_intr_remapping(mode)   (-1)