--- /dev/null
+From 62e0468650c30f0298822c580f382b16328119f6 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 28 Jun 2019 13:11:51 +0200
+Subject: genirq: Add optional hardware synchronization for shutdown
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 62e0468650c30f0298822c580f382b16328119f6 upstream.
+
+free_irq() ensures that no hardware interrupt handler is executing on a
+different CPU before actually releasing resources and deactivating the
+interrupt completely in a domain hierarchy.
+
+But that does not catch the case where the interrupt is on flight at the
+hardware level but not yet serviced by the target CPU. That creates an
+interesing race condition:
+
+ CPU 0 CPU 1 IRQ CHIP
+
+ interrupt is raised
+ sent to CPU1
+ Unable to handle
+ immediately
+ (interrupts off,
+ deep idle delay)
+ mask()
+ ...
+ free()
+ shutdown()
+ synchronize_irq()
+ release_resources()
+ do_IRQ()
+ -> resources are not available
+
+That might be harmless and just trigger a spurious interrupt warning, but
+some interrupt chips might get into a wedged state.
+
+Utilize the existing irq_get_irqchip_state() callback for the
+synchronization in free_irq().
+
+synchronize_hardirq() is not using this mechanism as it might actually
+deadlock unter certain conditions, e.g. when called with interrupts
+disabled and the target CPU is the one on which the synchronization is
+invoked. synchronize_irq() uses it because that function cannot be called
+from non preemtible contexts as it might sleep.
+
+No functional change intended and according to Marc the existing GIC
+implementations where the driver supports the callback should be able
+to cope with that core change. Famous last words.
+
+Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode")
+Reported-by: Robert Hodaszi <Robert.Hodaszi@digi.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
+Tested-by: Marc Zyngier <marc.zyngier@arm.com>
+Link: https://lkml.kernel.org/r/20190628111440.279463375@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/irq/internals.h | 4 ++
+ kernel/irq/manage.c | 75 ++++++++++++++++++++++++++++++++++++-------------
+ 2 files changed, 60 insertions(+), 19 deletions(-)
+
+--- a/kernel/irq/internals.h
++++ b/kernel/irq/internals.h
+@@ -97,6 +97,10 @@ static inline void irq_mark_irq(unsigned
+ extern void irq_mark_irq(unsigned int irq);
+ #endif
+
++extern int __irq_get_irqchip_state(struct irq_data *data,
++ enum irqchip_irq_state which,
++ bool *state);
++
+ extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
+
+ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -35,8 +35,9 @@ static int __init setup_forced_irqthread
+ early_param("threadirqs", setup_forced_irqthreads);
+ #endif
+
+-static void __synchronize_hardirq(struct irq_desc *desc)
++static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
+ {
++ struct irq_data *irqd = irq_desc_get_irq_data(desc);
+ bool inprogress;
+
+ do {
+@@ -52,6 +53,20 @@ static void __synchronize_hardirq(struct
+ /* Ok, that indicated we're done: double-check carefully. */
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ inprogress = irqd_irq_inprogress(&desc->irq_data);
++
++ /*
++ * If requested and supported, check at the chip whether it
++ * is in flight at the hardware level, i.e. already pending
++ * in a CPU and waiting for service and acknowledge.
++ */
++ if (!inprogress && sync_chip) {
++ /*
++ * Ignore the return code. inprogress is only updated
++ * when the chip supports it.
++ */
++ __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
++ &inprogress);
++ }
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ /* Oops, that failed? */
+@@ -74,13 +89,18 @@ static void __synchronize_hardirq(struct
+ * Returns: false if a threaded handler is active.
+ *
+ * This function may be called - with care - from IRQ context.
++ *
++ * It does not check whether there is an interrupt in flight at the
++ * hardware level, but not serviced yet, as this might deadlock when
++ * called with interrupts disabled and the target CPU of the interrupt
++ * is the current CPU.
+ */
+ bool synchronize_hardirq(unsigned int irq)
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (desc) {
+- __synchronize_hardirq(desc);
++ __synchronize_hardirq(desc, false);
+ return !atomic_read(&desc->threads_active);
+ }
+
+@@ -98,13 +118,17 @@ EXPORT_SYMBOL(synchronize_hardirq);
+ *
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
++ *
++ * It optionally makes sure (when the irq chip supports that method)
++ * that the interrupt is not pending in any CPU and waiting for
++ * service.
+ */
+ void synchronize_irq(unsigned int irq)
+ {
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (desc) {
+- __synchronize_hardirq(desc);
++ __synchronize_hardirq(desc, true);
+ /*
+ * We made sure that no hardirq handler is
+ * running. Now verify that no threaded handlers are
+@@ -1730,8 +1754,12 @@ static struct irqaction *__free_irq(stru
+
+ unregister_handler_proc(irq, action);
+
+- /* Make sure it's not being used on another CPU: */
+- synchronize_hardirq(irq);
++ /*
++ * Make sure it's not being used on another CPU and if the chip
++ * supports it also make sure that there is no (not yet serviced)
++ * interrupt in flight at the hardware level.
++ */
++ __synchronize_hardirq(desc, true);
+
+ #ifdef CONFIG_DEBUG_SHIRQ
+ /*
+@@ -2589,6 +2617,28 @@ out:
+ irq_put_desc_unlock(desc, flags);
+ }
+
++int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
++ bool *state)
++{
++ struct irq_chip *chip;
++ int err = -EINVAL;
++
++ do {
++ chip = irq_data_get_irq_chip(data);
++ if (chip->irq_get_irqchip_state)
++ break;
++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
++ data = data->parent_data;
++#else
++ data = NULL;
++#endif
++ } while (data);
++
++ if (data)
++ err = chip->irq_get_irqchip_state(data, which, state);
++ return err;
++}
++
+ /**
+ * irq_get_irqchip_state - returns the irqchip state of a interrupt.
+ * @irq: Interrupt line that is forwarded to a VM
+@@ -2607,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int i
+ {
+ struct irq_desc *desc;
+ struct irq_data *data;
+- struct irq_chip *chip;
+ unsigned long flags;
+ int err = -EINVAL;
+
+@@ -2617,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int i
+
+ data = irq_desc_get_irq_data(desc);
+
+- do {
+- chip = irq_data_get_irq_chip(data);
+- if (chip->irq_get_irqchip_state)
+- break;
+-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+- data = data->parent_data;
+-#else
+- data = NULL;
+-#endif
+- } while (data);
+-
+- if (data)
+- err = chip->irq_get_irqchip_state(data, which, state);
++ err = __irq_get_irqchip_state(data, which, state);
+
+ irq_put_desc_busunlock(desc, flags);
+ return err;
--- /dev/null
+From 4001d8e8762f57d418b66e4e668601791900a1dd Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 28 Jun 2019 13:11:49 +0200
+Subject: genirq: Delay deactivation in free_irq()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 4001d8e8762f57d418b66e4e668601791900a1dd upstream.
+
+When interrupts are shutdown, they are immediately deactivated in the
+irqdomain hierarchy. While this looks obviously correct there is a subtle
+issue:
+
+There might be an interrupt in flight when free_irq() is invoking the
+shutdown. This is properly handled at the irq descriptor / primary handler
+level, but the deactivation might completely disable resources which are
+required to acknowledge the interrupt.
+
+Split the shutdown code and deactivate the interrupt after synchronization
+in free_irq(). Fixup all other usage sites where this is not an issue to
+invoke the combined shutdown_and_deactivate() function instead.
+
+This still might be an issue if the interrupt in flight servicing is
+delayed on a remote CPU beyond the invocation of synchronize_irq(), but
+that cannot be handled at that level and needs to be handled in the
+synchronize_irq() context.
+
+Fixes: f8264e34965a ("irqdomain: Introduce new interfaces to support hierarchy irqdomains")
+Reported-by: Robert Hodaszi <Robert.Hodaszi@digi.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
+Link: https://lkml.kernel.org/r/20190628111440.098196390@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/irq/autoprobe.c | 6 +++---
+ kernel/irq/chip.c | 6 ++++++
+ kernel/irq/cpuhotplug.c | 2 +-
+ kernel/irq/internals.h | 1 +
+ kernel/irq/manage.c | 12 +++++++++++-
+ 5 files changed, 22 insertions(+), 5 deletions(-)
+
+--- a/kernel/irq/autoprobe.c
++++ b/kernel/irq/autoprobe.c
+@@ -90,7 +90,7 @@ unsigned long probe_irq_on(void)
+ /* It triggered already - consider it spurious. */
+ if (!(desc->istate & IRQS_WAITING)) {
+ desc->istate &= ~IRQS_AUTODETECT;
+- irq_shutdown(desc);
++ irq_shutdown_and_deactivate(desc);
+ } else
+ if (i < 32)
+ mask |= 1 << i;
+@@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned lon
+ mask |= 1 << i;
+
+ desc->istate &= ~IRQS_AUTODETECT;
+- irq_shutdown(desc);
++ irq_shutdown_and_deactivate(desc);
+ }
+ raw_spin_unlock_irq(&desc->lock);
+ }
+@@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val)
+ nr_of_irqs++;
+ }
+ desc->istate &= ~IRQS_AUTODETECT;
+- irq_shutdown(desc);
++ irq_shutdown_and_deactivate(desc);
+ }
+ raw_spin_unlock_irq(&desc->lock);
+ }
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc)
+ }
+ irq_state_clr_started(desc);
+ }
++}
++
++
++void irq_shutdown_and_deactivate(struct irq_desc *desc)
++{
++ irq_shutdown(desc);
+ /*
+ * This must be called even if the interrupt was never started up,
+ * because the activation can happen before the interrupt is
+--- a/kernel/irq/cpuhotplug.c
++++ b/kernel/irq/cpuhotplug.c
+@@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_d
+ */
+ if (irqd_affinity_is_managed(d)) {
+ irqd_set_managed_shutdown(d);
+- irq_shutdown(desc);
++ irq_shutdown_and_deactivate(desc);
+ return false;
+ }
+ affinity = cpu_online_mask;
+--- a/kernel/irq/internals.h
++++ b/kernel/irq/internals.h
+@@ -82,6 +82,7 @@ extern int irq_activate_and_startup(stru
+ extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
+
+ extern void irq_shutdown(struct irq_desc *desc);
++extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
+ extern void irq_enable(struct irq_desc *desc);
+ extern void irq_disable(struct irq_desc *desc);
+ extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -13,6 +13,7 @@
+ #include <linux/module.h>
+ #include <linux/random.h>
+ #include <linux/interrupt.h>
++#include <linux/irqdomain.h>
+ #include <linux/slab.h>
+ #include <linux/sched.h>
+ #include <linux/sched/rt.h>
+@@ -1699,6 +1700,7 @@ static struct irqaction *__free_irq(stru
+ /* If this was the last handler, shut down the IRQ line: */
+ if (!desc->action) {
+ irq_settings_clr_disable_unlazy(desc);
++ /* Only shutdown. Deactivate after synchronize_hardirq() */
+ irq_shutdown(desc);
+ }
+
+@@ -1768,6 +1770,14 @@ static struct irqaction *__free_irq(stru
+ * require it to deallocate resources over the slow bus.
+ */
+ chip_bus_lock(desc);
++ /*
++ * There is no interrupt on the fly anymore. Deactivate it
++ * completely.
++ */
++ raw_spin_lock_irqsave(&desc->lock, flags);
++ irq_domain_deactivate_irq(&desc->irq_data);
++ raw_spin_unlock_irqrestore(&desc->lock, flags);
++
+ irq_release_resources(desc);
+ chip_bus_sync_unlock(desc);
+ irq_remove_timings(desc);
+@@ -1855,7 +1865,7 @@ static const void *__cleanup_nmi(unsigne
+ }
+
+ irq_settings_clr_disable_unlazy(desc);
+- irq_shutdown(desc);
++ irq_shutdown_and_deactivate(desc);
+
+ irq_release_resources(desc);
+
--- /dev/null
+From 1d21f2af8571c6a6a44e7c1911780614847b0253 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 28 Jun 2019 13:11:50 +0200
+Subject: genirq: Fix misleading synchronize_irq() documentation
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 1d21f2af8571c6a6a44e7c1911780614847b0253 upstream.
+
+The function might sleep, so it cannot be called from interrupt
+context. Not even with care.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Link: https://lkml.kernel.org/r/20190628111440.189241552@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/irq/manage.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -96,7 +96,8 @@ EXPORT_SYMBOL(synchronize_hardirq);
+ * to complete before returning. If you use this function while
+ * holding a resource the IRQ handler may need you will deadlock.
+ *
+- * This function may be called - with care - from IRQ context.
++ * Can only be called from preemptible code as it might sleep when
++ * an interrupt thread is associated to @irq.
+ */
+ void synchronize_irq(unsigned int irq)
+ {
mm-oom_kill.c-fix-uninitialized-oc-constraint.patch
fork-memcg-alloc_thread_stack_node-needs-to-set-tsk-.patch
linux-kernel.h-fix-overflow-for-div_round_up_ull.patch
+genirq-delay-deactivation-in-free_irq.patch
+genirq-fix-misleading-synchronize_irq-documentation.patch
+genirq-add-optional-hardware-synchronization-for-shutdown.patch
+x86-ioapic-implement-irq_get_irqchip_state-callback.patch
+x86-irq-handle-spurious-interrupt-after-shutdown-gracefully.patch
+x86-irq-seperate-unused-system-vectors-from-spurious-entry-again.patch
--- /dev/null
+From dfe0cf8b51b07e56ded571e3de0a4a9382517231 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 28 Jun 2019 13:11:52 +0200
+Subject: x86/ioapic: Implement irq_get_irqchip_state() callback
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit dfe0cf8b51b07e56ded571e3de0a4a9382517231 upstream.
+
+When an interrupt is shut down in free_irq() there might be an inflight
+interrupt pending in the IO-APIC remote IRR which is not yet serviced. That
+means the interrupt has been sent to the target CPUs local APIC, but the
+target CPU is in a state which delays the servicing.
+
+So free_irq() would proceed to free resources and to clear the vector
+because synchronize_hardirq() does not see an interrupt handler in
+progress.
+
+That can trigger a spurious interrupt warning, which is harmless and just
+confuses users, but it also can leave the remote IRR in a stale state
+because once the handler is invoked the interrupt resources might be freed
+already and therefore acknowledgement is not possible anymore.
+
+Implement the irq_get_irqchip_state() callback for the IO-APIC irq chip. The
+callback is invoked from free_irq() via __synchronize_hardirq(). Check the
+remote IRR bit of the interrupt and return 'in flight' if it is set and the
+interrupt is configured in level mode. For edge mode the remote IRR has no
+meaning.
+
+As this is only meaningful for level triggered interrupts this won't cure
+the potential spurious interrupt warning for edge triggered interrupts, but
+the edge trigger case does not result in stale hardware state. This has to
+be addressed at the vector/interrupt entry level seperately.
+
+Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode")
+Reported-by: Robert Hodaszi <Robert.Hodaszi@digi.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Link: https://lkml.kernel.org/r/20190628111440.370295517@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/apic/io_apic.c | 46 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 46 insertions(+)
+
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -1893,6 +1893,50 @@ static int ioapic_set_affinity(struct ir
+ return ret;
+ }
+
++/*
++ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
++ * be in flight, but not yet serviced by the target CPU. That means
++ * __synchronize_hardirq() would return and claim that everything is calmed
++ * down. So free_irq() would proceed and deactivate the interrupt and free
++ * resources.
++ *
++ * Once the target CPU comes around to service it it will find a cleared
++ * vector and complain. While the spurious interrupt is harmless, the full
++ * release of resources might prevent the interrupt from being acknowledged
++ * which keeps the hardware in a weird state.
++ *
++ * Verify that the corresponding Remote-IRR bits are clear.
++ */
++static int ioapic_irq_get_chip_state(struct irq_data *irqd,
++ enum irqchip_irq_state which,
++ bool *state)
++{
++ struct mp_chip_data *mcd = irqd->chip_data;
++ struct IO_APIC_route_entry rentry;
++ struct irq_pin_list *p;
++
++ if (which != IRQCHIP_STATE_ACTIVE)
++ return -EINVAL;
++
++ *state = false;
++ raw_spin_lock(&ioapic_lock);
++ for_each_irq_pin(p, mcd->irq_2_pin) {
++ rentry = __ioapic_read_entry(p->apic, p->pin);
++ /*
++ * The remote IRR is only valid in level trigger mode. It's
++ * meaning is undefined for edge triggered interrupts and
++ * irrelevant because the IO-APIC treats them as fire and
++ * forget.
++ */
++ if (rentry.irr && rentry.trigger) {
++ *state = true;
++ break;
++ }
++ }
++ raw_spin_unlock(&ioapic_lock);
++ return 0;
++}
++
+ static struct irq_chip ioapic_chip __read_mostly = {
+ .name = "IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+@@ -1902,6 +1946,7 @@ static struct irq_chip ioapic_chip __rea
+ .irq_eoi = ioapic_ack_level,
+ .irq_set_affinity = ioapic_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
++ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+ };
+
+@@ -1914,6 +1959,7 @@ static struct irq_chip ioapic_ir_chip __
+ .irq_eoi = ioapic_ir_ack_level,
+ .irq_set_affinity = ioapic_set_affinity,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
++ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+ };
+
--- /dev/null
+From b7107a67f0d125459fe41f86e8079afd1a5e0b15 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 28 Jun 2019 13:11:53 +0200
+Subject: x86/irq: Handle spurious interrupt after shutdown gracefully
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit b7107a67f0d125459fe41f86e8079afd1a5e0b15 upstream.
+
+Since the rework of the vector management, warnings about spurious
+interrupts have been reported. Robert provided some more information and
+did an initial analysis. The following situation leads to these warnings:
+
+ CPU 0 CPU 1 IO_APIC
+
+ interrupt is raised
+ sent to CPU1
+ Unable to handle
+ immediately
+ (interrupts off,
+ deep idle delay)
+ mask()
+ ...
+ free()
+ shutdown()
+ synchronize_irq()
+ clear_vector()
+ do_IRQ()
+ -> vector is clear
+
+Before the rework the vector entries of legacy interrupts were statically
+assigned and occupied precious vector space while most of them were
+unused. Due to that the above situation was handled silently because the
+vector was handled and the core handler of the assigned interrupt
+descriptor noticed that it is shut down and returned.
+
+While this has been usually observed with legacy interrupts, this situation
+is not limited to them. Any other interrupt source, e.g. MSI, can cause the
+same issue.
+
+After adding proper synchronization for level triggered interrupts, this
+can only happen for edge triggered interrupts where the IO-APIC obviously
+cannot provide information about interrupts in flight.
+
+While the spurious warning is actually harmless in this case it worries
+users and driver developers.
+
+Handle it gracefully by marking the vector entry as VECTOR_SHUTDOWN instead
+of VECTOR_UNUSED when the vector is freed up.
+
+If that above late handling happens the spurious detector will not complain
+and switch the entry to VECTOR_UNUSED. Any subsequent spurious interrupt on
+that line will trigger the spurious warning as before.
+
+Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode")
+Reported-by: Robert Hodaszi <Robert.Hodaszi@digi.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>-
+Tested-by: Robert Hodaszi <Robert.Hodaszi@digi.com>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Link: https://lkml.kernel.org/r/20190628111440.459647741@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/hw_irq.h | 3 ++-
+ arch/x86/kernel/apic/vector.c | 4 ++--
+ arch/x86/kernel/irq.c | 2 +-
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/include/asm/hw_irq.h
++++ b/arch/x86/include/asm/hw_irq.h
+@@ -151,7 +151,8 @@ extern char irq_entries_start[];
+ #endif
+
+ #define VECTOR_UNUSED NULL
+-#define VECTOR_RETRIGGERED ((void *)~0UL)
++#define VECTOR_SHUTDOWN ((void *)~0UL)
++#define VECTOR_RETRIGGERED ((void *)~1UL)
+
+ typedef struct irq_desc* vector_irq_t[NR_VECTORS];
+ DECLARE_PER_CPU(vector_irq_t, vector_irq);
+--- a/arch/x86/kernel/apic/vector.c
++++ b/arch/x86/kernel/apic/vector.c
+@@ -343,7 +343,7 @@ static void clear_irq_vector(struct irq_
+ trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
+ apicd->prev_cpu);
+
+- per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
++ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
+ irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
+ apicd->vector = 0;
+
+@@ -352,7 +352,7 @@ static void clear_irq_vector(struct irq_
+ if (!vector)
+ return;
+
+- per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
++ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
+ irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
+ apicd->prev_vector = 0;
+ apicd->move_in_progress = 0;
+--- a/arch/x86/kernel/irq.c
++++ b/arch/x86/kernel/irq.c
+@@ -246,7 +246,7 @@ __visible unsigned int __irq_entry do_IR
+ if (!handle_irq(desc, regs)) {
+ ack_APIC_irq();
+
+- if (desc != VECTOR_RETRIGGERED) {
++ if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
+ pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
+ __func__, smp_processor_id(),
+ vector);
--- /dev/null
+From f8a8fe61fec8006575699559ead88b0b833d5cad Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 28 Jun 2019 13:11:54 +0200
+Subject: x86/irq: Seperate unused system vectors from spurious entry again
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f8a8fe61fec8006575699559ead88b0b833d5cad upstream.
+
+Quite some time ago the interrupt entry stubs for unused vectors in the
+system vector range got removed and directly mapped to the spurious
+interrupt vector entry point.
+
+Sounds reasonable, but it's subtly broken. The spurious interrupt vector
+entry point pushes vector number 0xFF on the stack which makes the whole
+logic in __smp_spurious_interrupt() pointless.
+
+As a consequence any spurious interrupt which comes from a vector != 0xFF
+is treated as a real spurious interrupt (vector 0xFF) and not
+acknowledged. That subsequently stalls all interrupt vectors of equal and
+lower priority, which brings the system to a grinding halt.
+
+This can happen because even on 64-bit the system vector space is not
+guaranteed to be fully populated. A full compile time handling of the
+unused vectors is not possible because quite some of them are conditonally
+populated at runtime.
+
+Bring the entry stubs back, which wastes 160 bytes if all stubs are unused,
+but gains the proper handling back. There is no point to selectively spare
+some of the stubs which are known at compile time as the required code in
+the IDT management would be way larger and convoluted.
+
+Do not route the spurious entries through common_interrupt and do_IRQ() as
+the original code did. Route it to smp_spurious_interrupt() which evaluates
+the vector number and acts accordingly now that the real vector numbers are
+handed in.
+
+Fixup the pr_warn so the actual spurious vector (0xff) is clearly
+distiguished from the other vectors and also note for the vectored case
+whether it was pending in the ISR or not.
+
+ "Spurious APIC interrupt (vector 0xFF) on CPU#0, should never happen."
+ "Spurious interrupt vector 0xed on CPU#1. Acked."
+ "Spurious interrupt vector 0xee on CPU#1. Not pending!."
+
+Fixes: 2414e021ac8d ("x86: Avoid building unused IRQ entry stubs")
+Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Marc Zyngier <marc.zyngier@arm.com>
+Cc: Jan Beulich <jbeulich@suse.com>
+Link: https://lkml.kernel.org/r/20190628111440.550568228@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/entry/entry_32.S | 24 ++++++++++++++++++++++++
+ arch/x86/entry/entry_64.S | 30 ++++++++++++++++++++++++++----
+ arch/x86/include/asm/hw_irq.h | 2 ++
+ arch/x86/kernel/apic/apic.c | 33 ++++++++++++++++++++++-----------
+ arch/x86/kernel/idt.c | 3 ++-
+ 5 files changed, 76 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/entry/entry_32.S
++++ b/arch/x86/entry/entry_32.S
+@@ -1105,6 +1105,30 @@ ENTRY(irq_entries_start)
+ .endr
+ END(irq_entries_start)
+
++#ifdef CONFIG_X86_LOCAL_APIC
++ .align 8
++ENTRY(spurious_entries_start)
++ vector=FIRST_SYSTEM_VECTOR
++ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
++ pushl $(~vector+0x80) /* Note: always in signed byte range */
++ vector=vector+1
++ jmp common_spurious
++ .align 8
++ .endr
++END(spurious_entries_start)
++
++common_spurious:
++ ASM_CLAC
++ addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
++ SAVE_ALL switch_stacks=1
++ ENCODE_FRAME_POINTER
++ TRACE_IRQS_OFF
++ movl %esp, %eax
++ call smp_spurious_interrupt
++ jmp ret_from_intr
++ENDPROC(common_interrupt)
++#endif
++
+ /*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+--- a/arch/x86/entry/entry_64.S
++++ b/arch/x86/entry/entry_64.S
+@@ -377,6 +377,18 @@ ENTRY(irq_entries_start)
+ .endr
+ END(irq_entries_start)
+
++ .align 8
++ENTRY(spurious_entries_start)
++ vector=FIRST_SYSTEM_VECTOR
++ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
++ UNWIND_HINT_IRET_REGS
++ pushq $(~vector+0x80) /* Note: always in signed byte range */
++ jmp common_spurious
++ .align 8
++ vector=vector+1
++ .endr
++END(spurious_entries_start)
++
+ .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
+ #ifdef CONFIG_DEBUG_ENTRY
+ pushq %rax
+@@ -573,10 +585,20 @@ _ASM_NOKPROBE(interrupt_entry)
+
+ /* Interrupt entry/exit. */
+
+- /*
+- * The interrupt stubs push (~vector+0x80) onto the stack and
+- * then jump to common_interrupt.
+- */
++/*
++ * The interrupt stubs push (~vector+0x80) onto the stack and
++ * then jump to common_spurious/interrupt.
++ */
++common_spurious:
++ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
++ call interrupt_entry
++ UNWIND_HINT_REGS indirect=1
++ call smp_spurious_interrupt /* rdi points to pt_regs */
++ jmp ret_from_intr
++END(common_spurious)
++_ASM_NOKPROBE(common_spurious)
++
++/* common_interrupt is a hotpath. Align it */
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
+ common_interrupt:
+ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
+--- a/arch/x86/include/asm/hw_irq.h
++++ b/arch/x86/include/asm/hw_irq.h
+@@ -150,6 +150,8 @@ extern char irq_entries_start[];
+ #define trace_irq_entries_start irq_entries_start
+ #endif
+
++extern char spurious_entries_start[];
++
+ #define VECTOR_UNUSED NULL
+ #define VECTOR_SHUTDOWN ((void *)~0UL)
+ #define VECTOR_RETRIGGERED ((void *)~1UL)
+--- a/arch/x86/kernel/apic/apic.c
++++ b/arch/x86/kernel/apic/apic.c
+@@ -2035,21 +2035,32 @@ __visible void __irq_entry smp_spurious_
+ entering_irq();
+ trace_spurious_apic_entry(vector);
+
++ inc_irq_stat(irq_spurious_count);
++
++ /*
++ * If this is a spurious interrupt then do not acknowledge
++ */
++ if (vector == SPURIOUS_APIC_VECTOR) {
++ /* See SDM vol 3 */
++ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
++ smp_processor_id());
++ goto out;
++ }
++
+ /*
+- * Check if this really is a spurious interrupt and ACK it
+- * if it is a vectored one. Just in case...
+- * Spurious interrupts should not be ACKed.
++ * If it is a vectored one, verify it's set in the ISR. If set,
++ * acknowledge it.
+ */
+ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+- if (v & (1 << (vector & 0x1f)))
++ if (v & (1 << (vector & 0x1f))) {
++ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
++ vector, smp_processor_id());
+ ack_APIC_irq();
+-
+- inc_irq_stat(irq_spurious_count);
+-
+- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+- pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
+- "should never happen.\n", vector, smp_processor_id());
+-
++ } else {
++ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
++ vector, smp_processor_id());
++ }
++out:
+ trace_spurious_apic_exit(vector);
+ exiting_irq();
+ }
+--- a/arch/x86/kernel/idt.c
++++ b/arch/x86/kernel/idt.c
+@@ -321,7 +321,8 @@ void __init idt_setup_apic_and_irq_gates
+ #ifdef CONFIG_X86_LOCAL_APIC
+ for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
+ set_bit(i, system_vectors);
+- set_intr_gate(i, spurious_interrupt);
++ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
++ set_intr_gate(i, entry);
+ }
+ #endif
+ }