--- /dev/null
+From a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 Mon Sep 17 00:00:00 2001
+From: Dongli Zhang <dongli.zhang@oracle.com>
+Date: Wed, 22 May 2024 15:02:18 -0700
+Subject: genirq/cpuhotplug, x86/vector: Prevent vector leak during CPU offline
+
+From: Dongli Zhang <dongli.zhang@oracle.com>
+
+commit a6c11c0a5235fb144a65e0cb2ffd360ddc1f6c32 upstream.
+
+The absence of IRQD_MOVE_PCNTXT prevents immediate effectiveness of
+interrupt affinity reconfiguration via procfs. Instead, the change is
+deferred until the next instance of the interrupt being triggered on the
+original CPU.
+
+When the interrupt next triggers on the original CPU, the new affinity is
+enforced within __irq_move_irq(). A vector is allocated from the new CPU,
+but the old vector on the original CPU remains and is not immediately
+reclaimed. Instead, apicd->move_in_progress is flagged, and the reclaiming
+process is delayed until the next trigger of the interrupt on the new CPU.
+
+Upon the subsequent triggering of the interrupt on the new CPU,
+irq_complete_move() adds a task to the old CPU's vector_cleanup list if it
+remains online. Subsequently, the timer on the old CPU iterates over its
+vector_cleanup list, reclaiming old vectors.
+
+However, a rare scenario arises if the old CPU is outgoing before the
+interrupt triggers again on the new CPU.
+
+In that case irq_force_complete_move() is not invoked on the outgoing CPU
+to reclaim the old apicd->prev_vector because the interrupt isn't currently
+affine to the outgoing CPU, and irq_needs_fixup() returns false. Even
+though __vector_schedule_cleanup() is later called on the new CPU, it
+doesn't reclaim apicd->prev_vector; instead, it simply resets both
+apicd->move_in_progress and apicd->prev_vector to 0.
+
+As a result, the vector remains unreclaimed in vector_matrix, leading to a
+CPU vector leak.
+
+To address this issue, move the invocation of irq_force_complete_move()
+before the irq_needs_fixup() call to reclaim apicd->prev_vector, if the
+interrupt is currently or used to be affine to the outgoing CPU.
+
+Additionally, reclaim the vector in __vector_schedule_cleanup() as well,
+following a warning message, although theoretically it should never see
+apicd->move_in_progress with apicd->prev_cpu pointing to an offline CPU.
+
+Fixes: f0383c24b485 ("genirq/cpuhotplug: Add support for cleaning up move in progress")
+Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240522220218.162423-1-dongli.zhang@oracle.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/apic/vector.c | 9 ++++++---
+ kernel/irq/cpuhotplug.c | 16 ++++++++--------
+ 2 files changed, 14 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -920,7 +920,8 @@ static void __send_cleanup_vector(struct
+ hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
+ apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+- apicd->prev_vector = 0;
++ pr_warn("IRQ %u schedule cleanup for offline CPU %u\n", apicd->irq, cpu);
++ free_moved_vector(apicd);
+ }
+ raw_spin_unlock(&vector_lock);
+ }
+@@ -957,6 +958,7 @@ void irq_complete_move(struct irq_cfg *c
+ */
+ void irq_force_complete_move(struct irq_desc *desc)
+ {
++ unsigned int cpu = smp_processor_id();
+ struct apic_chip_data *apicd;
+ struct irq_data *irqd;
+ unsigned int vector;
+@@ -981,10 +983,11 @@ void irq_force_complete_move(struct irq_
+ goto unlock;
+
+ /*
+- * If prev_vector is empty, no action required.
++ * If prev_vector is empty or the descriptor is neither currently
++ * nor previously on the outgoing CPU no action required.
+ */
+ vector = apicd->prev_vector;
+- if (!vector)
++ if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
+ goto unlock;
+
+ /*
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_d
+ }
+
+ /*
++ * Complete an eventually pending irq move cleanup. If this
++ * interrupt was moved in hard irq context, then the vectors need
++ * to be cleaned up. It can't wait until this interrupt actually
++ * happens and this CPU was involved.
++ */
++ irq_force_complete_move(desc);
++
++ /*
+ * No move required, if:
+ * - Interrupt is per cpu
+ * - Interrupt is not started
+@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_d
+ }
+
+ /*
+- * Complete an eventually pending irq move cleanup. If this
+- * interrupt was moved in hard irq context, then the vectors need
+- * to be cleaned up. It can't wait until this interrupt actually
+- * happens and this CPU was involved.
+- */
+- irq_force_complete_move(desc);
+-
+- /*
+ * If there is a setaffinity pending, then try to reuse the pending
+ * mask, so the last change of the affinity does not get lost. If
+ * there is no move pending or the pending mask does not contain