genirq: Introduce common irq_force_complete_move() implementation

author Thomas Gleixner <tglx@linutronix.de>

Mon, 17 Feb 2025 08:56:50 +0000 (14:26 +0530)

committer Thomas Gleixner <tglx@linutronix.de>

Thu, 20 Feb 2025 14:19:26 +0000 (15:19 +0100)
author Thomas Gleixner <tglx@linutronix.de>
Mon, 17 Feb 2025 08:56:50 +0000 (14:26 +0530)
committer Thomas Gleixner <tglx@linutronix.de>
Thu, 20 Feb 2025 14:19:26 +0000 (15:19 +0100)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c

index 736f62812f5c2bbe930cf2cc36f35421c2d1d666..72fa4bb78f0a614838f3ab9bb63c70e2eca6e484 100644 (file)
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd,
         return err ? err : IRQ_SET_MASK_OK;
  }
  
+static void free_moved_vector(struct apic_chip_data *apicd)
+{
+       unsigned int vector = apicd->prev_vector;
+       unsigned int cpu = apicd->prev_cpu;
+       bool managed = apicd->is_managed;
+
+       /*
+        * Managed interrupts are usually not migrated away
+        * from an online CPU, but CPU isolation 'managed_irq'
+        * can make that happen.
+        * 1) Activation does not take the isolation into account
+        *    to keep the code simple
+        * 2) Migration away from an isolated CPU can happen when
+        *    a non-isolated CPU which is in the calculated
+        *    affinity mask comes online.
+        */
+       trace_vector_free_moved(apicd->irq, cpu, vector, managed);
+       irq_matrix_free(vector_matrix, cpu, vector, managed);
+       per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
+       hlist_del_init(&apicd->clist);
+       apicd->prev_vector = 0;
+       apicd->move_in_progress = 0;
+}
+
+/*
+ * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
+ */
+static void apic_force_complete_move(struct irq_data *irqd)
+{
+       unsigned int cpu = smp_processor_id();
+       struct apic_chip_data *apicd;
+       unsigned int vector;
+
+       guard(raw_spinlock)(&vector_lock);
+       apicd = apic_chip_data(irqd);
+       if (!apicd)
+               return;
+
+       /*
+        * If prev_vector is empty or the descriptor is neither currently
+        * nor previously on the outgoing CPU no action required.
+        */
+       vector = apicd->prev_vector;
+       if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+               return;
+
+       /*
+        * This is tricky. If the cleanup of the old vector has not been
+        * done yet, then the following setaffinity call will fail with
+        * -EBUSY. This can leave the interrupt in a stale state.
+        *
+        * All CPUs are stuck in stop machine with interrupts disabled so
+        * calling __irq_complete_move() would be completely pointless.
+        *
+        * 1) The interrupt is in move_in_progress state. That means that we
+        *    have not seen an interrupt since the io_apic was reprogrammed to
+        *    the new vector.
+        *
+        * 2) The interrupt has fired on the new vector, but the cleanup IPIs
+        *    have not been processed yet.
+        */
+       if (apicd->move_in_progress) {
+               /*
+                * In theory there is a race:
+                *
+                * set_ioapic(new_vector) <-- Interrupt is raised before update
+                *                            is effective, i.e. it's raised on
+                *                            the old vector.
+                *
+                * So if the target cpu cannot handle that interrupt before
+                * the old vector is cleaned up, we get a spurious interrupt
+                * and in the worst case the ioapic irq line becomes stale.
+                *
+                * But in case of cpu hotplug this should be a non issue
+                * because if the affinity update happens right before all
+                * cpus rendezvous in stop machine, there is no way that the
+                * interrupt can be blocked on the target cpu because all cpus
+                * loops first with interrupts enabled in stop machine, so the
+                * old vector is not yet cleaned up when the interrupt fires.
+                *
+                * So the only way to run into this issue is if the delivery
+                * of the interrupt on the apic/system bus would be delayed
+                * beyond the point where the target cpu disables interrupts
+                * in stop machine. I doubt that it can happen, but at least
+                * there is a theoretical chance. Virtualization might be
+                * able to expose this, but AFAICT the IOAPIC emulation is not
+                * as stupid as the real hardware.
+                *
+                * Anyway, there is nothing we can do about that at this point
+                * w/o refactoring the whole fixup_irq() business completely.
+                * We print at least the irq number and the old vector number,
+                * so we have the necessary information when a problem in that
+                * area arises.
+                */
+               pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
+                       irqd->irq, vector);
+       }
+       free_moved_vector(apicd);
+}
+
  #else
-# define apic_set_affinity     NULL
+# define apic_set_affinity             NULL
+# define apic_force_complete_move      NULL
  #endif
  
  static int apic_retrigger_irq(struct irq_data *irqd)
@@ -923,39 +1024,16 @@ static void x86_vector_msi_compose_msg(struct irq_data *data,
  }
  
  static struct irq_chip lapic_controller = {
-       .name                   = "APIC",
-       .irq_ack                = apic_ack_edge,
-       .irq_set_affinity       = apic_set_affinity,
-       .irq_compose_msi_msg    = x86_vector_msi_compose_msg,
-       .irq_retrigger          = apic_retrigger_irq,
+       .name                           = "APIC",
+       .irq_ack                        = apic_ack_edge,
+       .irq_set_affinity               = apic_set_affinity,
+       .irq_compose_msi_msg            = x86_vector_msi_compose_msg,
+       .irq_force_complete_move        = apic_force_complete_move,
+       .irq_retrigger                  = apic_retrigger_irq,
  };
  
  #ifdef CONFIG_SMP
  
-static void free_moved_vector(struct apic_chip_data *apicd)
-{
-       unsigned int vector = apicd->prev_vector;
-       unsigned int cpu = apicd->prev_cpu;
-       bool managed = apicd->is_managed;
-
-       /*
-        * Managed interrupts are usually not migrated away
-        * from an online CPU, but CPU isolation 'managed_irq'
-        * can make that happen.
-        * 1) Activation does not take the isolation into account
-        *    to keep the code simple
-        * 2) Migration away from an isolated CPU can happen when
-        *    a non-isolated CPU which is in the calculated
-        *    affinity mask comes online.
-        */
-       trace_vector_free_moved(apicd->irq, cpu, vector, managed);
-       irq_matrix_free(vector_matrix, cpu, vector, managed);
-       per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-       hlist_del_init(&apicd->clist);
-       apicd->prev_vector = 0;
-       apicd->move_in_progress = 0;
-}
-
  static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
  {
         struct apic_chip_data *apicd;
@@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg)
                 __vector_schedule_cleanup(apicd);
  }
  
-/*
- * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
- */
-void irq_force_complete_move(struct irq_desc *desc)
-{
-       unsigned int cpu = smp_processor_id();
-       struct apic_chip_data *apicd;
-       struct irq_data *irqd;
-       unsigned int vector;
-
-       /*
-        * The function is called for all descriptors regardless of which
-        * irqdomain they belong to. For example if an IRQ is provided by
-        * an irq_chip as part of a GPIO driver, the chip data for that
-        * descriptor is specific to the irq_chip in question.
-        *
-        * Check first that the chip_data is what we expect
-        * (apic_chip_data) before touching it any further.
-        */
-       irqd = irq_domain_get_irq_data(x86_vector_domain,
-                                      irq_desc_get_irq(desc));
-       if (!irqd)
-               return;
-
-       raw_spin_lock(&vector_lock);
-       apicd = apic_chip_data(irqd);
-       if (!apicd)
-               goto unlock;
-
-       /*
-        * If prev_vector is empty or the descriptor is neither currently
-        * nor previously on the outgoing CPU no action required.
-        */
-       vector = apicd->prev_vector;
-       if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
-               goto unlock;
-
-       /*
-        * This is tricky. If the cleanup of the old vector has not been
-        * done yet, then the following setaffinity call will fail with
-        * -EBUSY. This can leave the interrupt in a stale state.
-        *
-        * All CPUs are stuck in stop machine with interrupts disabled so
-        * calling __irq_complete_move() would be completely pointless.
-        *
-        * 1) The interrupt is in move_in_progress state. That means that we
-        *    have not seen an interrupt since the io_apic was reprogrammed to
-        *    the new vector.
-        *
-        * 2) The interrupt has fired on the new vector, but the cleanup IPIs
-        *    have not been processed yet.
-        */
-       if (apicd->move_in_progress) {
-               /*
-                * In theory there is a race:
-                *
-                * set_ioapic(new_vector) <-- Interrupt is raised before update
-                *                            is effective, i.e. it's raised on
-                *                            the old vector.
-                *
-                * So if the target cpu cannot handle that interrupt before
-                * the old vector is cleaned up, we get a spurious interrupt
-                * and in the worst case the ioapic irq line becomes stale.
-                *
-                * But in case of cpu hotplug this should be a non issue
-                * because if the affinity update happens right before all
-                * cpus rendezvous in stop machine, there is no way that the
-                * interrupt can be blocked on the target cpu because all cpus
-                * loops first with interrupts enabled in stop machine, so the
-                * old vector is not yet cleaned up when the interrupt fires.
-                *
-                * So the only way to run into this issue is if the delivery
-                * of the interrupt on the apic/system bus would be delayed
-                * beyond the point where the target cpu disables interrupts
-                * in stop machine. I doubt that it can happen, but at least
-                * there is a theoretical chance. Virtualization might be
-                * able to expose this, but AFAICT the IOAPIC emulation is not
-                * as stupid as the real hardware.
-                *
-                * Anyway, there is nothing we can do about that at this point
-                * w/o refactoring the whole fixup_irq() business completely.
-                * We print at least the irq number and the old vector number,
-                * so we have the necessary information when a problem in that
-                * area arises.
-                */
-               pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
-                       irqd->irq, vector);
-       }
-       free_moved_vector(apicd);
-unlock:
-       raw_spin_unlock(&vector_lock);
-}
-
  #ifdef CONFIG_HOTPLUG_CPU
  /*
   * Note, this is not accurate accounting, but at least good enough to
diff --git a/include/linux/irq.h b/include/linux/irq.h

index 8daa17f0107ac06ff0c05e4ba94e62f50ea27d5c..56f6583093d2887b85cf96ed9572ffacb6320e40 100644 (file)
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
   * @ipi_send_mask:     send an IPI to destination cpus in cpumask
   * @irq_nmi_setup:     function called from core code before enabling an NMI
   * @irq_nmi_teardown:  function called from core code after disabling an NMI
+ * @irq_force_complete_move:   optional function to force complete pending irq move
   * @flags:             chip specific flags
   */
  struct irq_chip {
@@ -537,6 +538,8 @@ struct irq_chip {
         int             (*irq_nmi_setup)(struct irq_data *data);
         void            (*irq_nmi_teardown)(struct irq_data *data);
  
+       void            (*irq_force_complete_move)(struct irq_data *data);
+
         unsigned long   flags;
  };
  
@@ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data)
                 __irq_move_irq(data);
  }
  void irq_move_masked_irq(struct irq_data *data);
-void irq_force_complete_move(struct irq_desc *desc);
  #else
  static inline void irq_move_irq(struct irq_data *data) { }
  static inline void irq_move_masked_irq(struct irq_data *data) { }
-static inline void irq_force_complete_move(struct irq_desc *desc) { }
  #endif
  
  extern int no_irq_affinity;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h

index a979523640d0a05c1481027a304fa2b85a352a0f..d4e190e690bdda962674a57b03fa382a43f7b978 100644 (file)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
         return desc->pending_mask;
  }
  bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
+void irq_force_complete_move(struct irq_desc *desc);
  #else /* CONFIG_GENERIC_PENDING_IRQ */
  static inline bool irq_can_move_pcntxt(struct irq_data *data)
  {
@@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
  {
         return false;
  }
+static inline void irq_force_complete_move(struct irq_desc *desc) { }
  #endif /* !CONFIG_GENERIC_PENDING_IRQ */
  
  #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c

index eb150afd671f6de72624d1e686f6226e2b8aee08..e110300ad650fe2cef9d2deb9bf5bd0a41eb15c2 100644 (file)
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
         return true;
  }
  
+void irq_force_complete_move(struct irq_desc *desc)
+{
+       for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
+               if (d->chip && d->chip->irq_force_complete_move) {
+                       d->chip->irq_force_complete_move(d);
+                       return;
+               }
+       }
+}
+
  void irq_move_masked_irq(struct irq_data *idata)
  {
         struct irq_desc *desc = irq_data_to_desc(idata);
author	Thomas Gleixner <tglx@linutronix.de>
	Mon, 17 Feb 2025 08:56:50 +0000 (14:26 +0530)
committer	Thomas Gleixner <tglx@linutronix.de>
	Thu, 20 Feb 2025 14:19:26 +0000 (15:19 +0100)
arch/x86/kernel/apic/vector.c		patch \| blob \| blame \| history
include/linux/irq.h		patch \| blob \| blame \| history
kernel/irq/internals.h		patch \| blob \| blame \| history
kernel/irq/migration.c		patch \| blob \| blame \| history