From: Greg Kroah-Hartman Date: Tue, 16 Jul 2019 22:58:18 +0000 (+0900) Subject: 5.1-stable patches X-Git-Tag: v5.2.2~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fa50343f9a3a6d78bd95d7c78e534260d9e7c496;p=thirdparty%2Fkernel%2Fstable-queue.git 5.1-stable patches added patches: genirq-add-optional-hardware-synchronization-for-shutdown.patch genirq-delay-deactivation-in-free_irq.patch genirq-fix-misleading-synchronize_irq-documentation.patch x86-ioapic-implement-irq_get_irqchip_state-callback.patch x86-irq-handle-spurious-interrupt-after-shutdown-gracefully.patch x86-irq-seperate-unused-system-vectors-from-spurious-entry-again.patch --- diff --git a/queue-5.1/genirq-add-optional-hardware-synchronization-for-shutdown.patch b/queue-5.1/genirq-add-optional-hardware-synchronization-for-shutdown.patch new file mode 100644 index 00000000000..2dcf6282757 --- /dev/null +++ b/queue-5.1/genirq-add-optional-hardware-synchronization-for-shutdown.patch @@ -0,0 +1,222 @@ +From 62e0468650c30f0298822c580f382b16328119f6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 28 Jun 2019 13:11:51 +0200 +Subject: genirq: Add optional hardware synchronization for shutdown + +From: Thomas Gleixner + +commit 62e0468650c30f0298822c580f382b16328119f6 upstream. + +free_irq() ensures that no hardware interrupt handler is executing on a +different CPU before actually releasing resources and deactivating the +interrupt completely in a domain hierarchy. + +But that does not catch the case where the interrupt is on flight at the +hardware level but not yet serviced by the target CPU. That creates an +interesing race condition: + + CPU 0 CPU 1 IRQ CHIP + + interrupt is raised + sent to CPU1 + Unable to handle + immediately + (interrupts off, + deep idle delay) + mask() + ... + free() + shutdown() + synchronize_irq() + release_resources() + do_IRQ() + -> resources are not available + +That might be harmless and just trigger a spurious interrupt warning, but +some interrupt chips might get into a wedged state. + +Utilize the existing irq_get_irqchip_state() callback for the +synchronization in free_irq(). + +synchronize_hardirq() is not using this mechanism as it might actually +deadlock unter certain conditions, e.g. when called with interrupts +disabled and the target CPU is the one on which the synchronization is +invoked. synchronize_irq() uses it because that function cannot be called +from non preemtible contexts as it might sleep. + +No functional change intended and according to Marc the existing GIC +implementations where the driver supports the callback should be able +to cope with that core change. Famous last words. + +Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode") +Reported-by: Robert Hodaszi +Signed-off-by: Thomas Gleixner +Reviewed-by: Marc Zyngier +Tested-by: Marc Zyngier +Link: https://lkml.kernel.org/r/20190628111440.279463375@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/irq/internals.h | 4 ++ + kernel/irq/manage.c | 75 ++++++++++++++++++++++++++++++++++++------------- + 2 files changed, 60 insertions(+), 19 deletions(-) + +--- a/kernel/irq/internals.h ++++ b/kernel/irq/internals.h +@@ -97,6 +97,10 @@ static inline void irq_mark_irq(unsigned + extern void irq_mark_irq(unsigned int irq); + #endif + ++extern int __irq_get_irqchip_state(struct irq_data *data, ++ enum irqchip_irq_state which, ++ bool *state); ++ + extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); + + irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -35,8 +35,9 @@ static int __init setup_forced_irqthread + early_param("threadirqs", setup_forced_irqthreads); + #endif + +-static void __synchronize_hardirq(struct irq_desc *desc) ++static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) + { ++ struct irq_data *irqd = irq_desc_get_irq_data(desc); + bool inprogress; + + do { +@@ -52,6 +53,20 @@ static void __synchronize_hardirq(struct + /* Ok, that indicated we're done: double-check carefully. */ + raw_spin_lock_irqsave(&desc->lock, flags); + inprogress = irqd_irq_inprogress(&desc->irq_data); ++ ++ /* ++ * If requested and supported, check at the chip whether it ++ * is in flight at the hardware level, i.e. already pending ++ * in a CPU and waiting for service and acknowledge. ++ */ ++ if (!inprogress && sync_chip) { ++ /* ++ * Ignore the return code. inprogress is only updated ++ * when the chip supports it. ++ */ ++ __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, ++ &inprogress); ++ } + raw_spin_unlock_irqrestore(&desc->lock, flags); + + /* Oops, that failed? */ +@@ -74,13 +89,18 @@ static void __synchronize_hardirq(struct + * Returns: false if a threaded handler is active. + * + * This function may be called - with care - from IRQ context. ++ * ++ * It does not check whether there is an interrupt in flight at the ++ * hardware level, but not serviced yet, as this might deadlock when ++ * called with interrupts disabled and the target CPU of the interrupt ++ * is the current CPU. + */ + bool synchronize_hardirq(unsigned int irq) + { + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { +- __synchronize_hardirq(desc); ++ __synchronize_hardirq(desc, false); + return !atomic_read(&desc->threads_active); + } + +@@ -98,13 +118,17 @@ EXPORT_SYMBOL(synchronize_hardirq); + * + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. ++ * ++ * It optionally makes sure (when the irq chip supports that method) ++ * that the interrupt is not pending in any CPU and waiting for ++ * service. + */ + void synchronize_irq(unsigned int irq) + { + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { +- __synchronize_hardirq(desc); ++ __synchronize_hardirq(desc, true); + /* + * We made sure that no hardirq handler is + * running. Now verify that no threaded handlers are +@@ -1730,8 +1754,12 @@ static struct irqaction *__free_irq(stru + + unregister_handler_proc(irq, action); + +- /* Make sure it's not being used on another CPU: */ +- synchronize_hardirq(irq); ++ /* ++ * Make sure it's not being used on another CPU and if the chip ++ * supports it also make sure that there is no (not yet serviced) ++ * interrupt in flight at the hardware level. ++ */ ++ __synchronize_hardirq(desc, true); + + #ifdef CONFIG_DEBUG_SHIRQ + /* +@@ -2589,6 +2617,28 @@ out: + irq_put_desc_unlock(desc, flags); + } + ++int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, ++ bool *state) ++{ ++ struct irq_chip *chip; ++ int err = -EINVAL; ++ ++ do { ++ chip = irq_data_get_irq_chip(data); ++ if (chip->irq_get_irqchip_state) ++ break; ++#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY ++ data = data->parent_data; ++#else ++ data = NULL; ++#endif ++ } while (data); ++ ++ if (data) ++ err = chip->irq_get_irqchip_state(data, which, state); ++ return err; ++} ++ + /** + * irq_get_irqchip_state - returns the irqchip state of a interrupt. + * @irq: Interrupt line that is forwarded to a VM +@@ -2607,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int i + { + struct irq_desc *desc; + struct irq_data *data; +- struct irq_chip *chip; + unsigned long flags; + int err = -EINVAL; + +@@ -2617,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int i + + data = irq_desc_get_irq_data(desc); + +- do { +- chip = irq_data_get_irq_chip(data); +- if (chip->irq_get_irqchip_state) +- break; +-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY +- data = data->parent_data; +-#else +- data = NULL; +-#endif +- } while (data); +- +- if (data) +- err = chip->irq_get_irqchip_state(data, which, state); ++ err = __irq_get_irqchip_state(data, which, state); + + irq_put_desc_busunlock(desc, flags); + return err; diff --git a/queue-5.1/genirq-delay-deactivation-in-free_irq.patch b/queue-5.1/genirq-delay-deactivation-in-free_irq.patch new file mode 100644 index 00000000000..aede736c1cf --- /dev/null +++ b/queue-5.1/genirq-delay-deactivation-in-free_irq.patch @@ -0,0 +1,149 @@ +From 4001d8e8762f57d418b66e4e668601791900a1dd Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 28 Jun 2019 13:11:49 +0200 +Subject: genirq: Delay deactivation in free_irq() + +From: Thomas Gleixner + +commit 4001d8e8762f57d418b66e4e668601791900a1dd upstream. + +When interrupts are shutdown, they are immediately deactivated in the +irqdomain hierarchy. While this looks obviously correct there is a subtle +issue: + +There might be an interrupt in flight when free_irq() is invoking the +shutdown. This is properly handled at the irq descriptor / primary handler +level, but the deactivation might completely disable resources which are +required to acknowledge the interrupt. + +Split the shutdown code and deactivate the interrupt after synchronization +in free_irq(). Fixup all other usage sites where this is not an issue to +invoke the combined shutdown_and_deactivate() function instead. + +This still might be an issue if the interrupt in flight servicing is +delayed on a remote CPU beyond the invocation of synchronize_irq(), but +that cannot be handled at that level and needs to be handled in the +synchronize_irq() context. + +Fixes: f8264e34965a ("irqdomain: Introduce new interfaces to support hierarchy irqdomains") +Reported-by: Robert Hodaszi +Signed-off-by: Thomas Gleixner +Reviewed-by: Marc Zyngier +Link: https://lkml.kernel.org/r/20190628111440.098196390@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/irq/autoprobe.c | 6 +++--- + kernel/irq/chip.c | 6 ++++++ + kernel/irq/cpuhotplug.c | 2 +- + kernel/irq/internals.h | 1 + + kernel/irq/manage.c | 12 +++++++++++- + 5 files changed, 22 insertions(+), 5 deletions(-) + +--- a/kernel/irq/autoprobe.c ++++ b/kernel/irq/autoprobe.c +@@ -90,7 +90,7 @@ unsigned long probe_irq_on(void) + /* It triggered already - consider it spurious. */ + if (!(desc->istate & IRQS_WAITING)) { + desc->istate &= ~IRQS_AUTODETECT; +- irq_shutdown(desc); ++ irq_shutdown_and_deactivate(desc); + } else + if (i < 32) + mask |= 1 << i; +@@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned lon + mask |= 1 << i; + + desc->istate &= ~IRQS_AUTODETECT; +- irq_shutdown(desc); ++ irq_shutdown_and_deactivate(desc); + } + raw_spin_unlock_irq(&desc->lock); + } +@@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val) + nr_of_irqs++; + } + desc->istate &= ~IRQS_AUTODETECT; +- irq_shutdown(desc); ++ irq_shutdown_and_deactivate(desc); + } + raw_spin_unlock_irq(&desc->lock); + } +--- a/kernel/irq/chip.c ++++ b/kernel/irq/chip.c +@@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc) + } + irq_state_clr_started(desc); + } ++} ++ ++ ++void irq_shutdown_and_deactivate(struct irq_desc *desc) ++{ ++ irq_shutdown(desc); + /* + * This must be called even if the interrupt was never started up, + * because the activation can happen before the interrupt is +--- a/kernel/irq/cpuhotplug.c ++++ b/kernel/irq/cpuhotplug.c +@@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_d + */ + if (irqd_affinity_is_managed(d)) { + irqd_set_managed_shutdown(d); +- irq_shutdown(desc); ++ irq_shutdown_and_deactivate(desc); + return false; + } + affinity = cpu_online_mask; +--- a/kernel/irq/internals.h ++++ b/kernel/irq/internals.h +@@ -82,6 +82,7 @@ extern int irq_activate_and_startup(stru + extern int irq_startup(struct irq_desc *desc, bool resend, bool force); + + extern void irq_shutdown(struct irq_desc *desc); ++extern void irq_shutdown_and_deactivate(struct irq_desc *desc); + extern void irq_enable(struct irq_desc *desc); + extern void irq_disable(struct irq_desc *desc); + extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1699,6 +1700,7 @@ static struct irqaction *__free_irq(stru + /* If this was the last handler, shut down the IRQ line: */ + if (!desc->action) { + irq_settings_clr_disable_unlazy(desc); ++ /* Only shutdown. Deactivate after synchronize_hardirq() */ + irq_shutdown(desc); + } + +@@ -1768,6 +1770,14 @@ static struct irqaction *__free_irq(stru + * require it to deallocate resources over the slow bus. + */ + chip_bus_lock(desc); ++ /* ++ * There is no interrupt on the fly anymore. Deactivate it ++ * completely. ++ */ ++ raw_spin_lock_irqsave(&desc->lock, flags); ++ irq_domain_deactivate_irq(&desc->irq_data); ++ raw_spin_unlock_irqrestore(&desc->lock, flags); ++ + irq_release_resources(desc); + chip_bus_sync_unlock(desc); + irq_remove_timings(desc); +@@ -1855,7 +1865,7 @@ static const void *__cleanup_nmi(unsigne + } + + irq_settings_clr_disable_unlazy(desc); +- irq_shutdown(desc); ++ irq_shutdown_and_deactivate(desc); + + irq_release_resources(desc); + diff --git a/queue-5.1/genirq-fix-misleading-synchronize_irq-documentation.patch b/queue-5.1/genirq-fix-misleading-synchronize_irq-documentation.patch new file mode 100644 index 00000000000..cf65a57fce6 --- /dev/null +++ b/queue-5.1/genirq-fix-misleading-synchronize_irq-documentation.patch @@ -0,0 +1,33 @@ +From 1d21f2af8571c6a6a44e7c1911780614847b0253 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 28 Jun 2019 13:11:50 +0200 +Subject: genirq: Fix misleading synchronize_irq() documentation + +From: Thomas Gleixner + +commit 1d21f2af8571c6a6a44e7c1911780614847b0253 upstream. + +The function might sleep, so it cannot be called from interrupt +context. Not even with care. + +Signed-off-by: Thomas Gleixner +Cc: Marc Zyngier +Link: https://lkml.kernel.org/r/20190628111440.189241552@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/irq/manage.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -96,7 +96,8 @@ EXPORT_SYMBOL(synchronize_hardirq); + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * +- * This function may be called - with care - from IRQ context. ++ * Can only be called from preemptible code as it might sleep when ++ * an interrupt thread is associated to @irq. + */ + void synchronize_irq(unsigned int irq) + { diff --git a/queue-5.1/series b/queue-5.1/series index 1e7d315d3d3..0d2ab38351f 100644 --- a/queue-5.1/series +++ b/queue-5.1/series @@ -38,3 +38,9 @@ pinctrl-mediatek-update-cur_mask-in-mask-mask-ops.patch mm-oom_kill.c-fix-uninitialized-oc-constraint.patch fork-memcg-alloc_thread_stack_node-needs-to-set-tsk-.patch linux-kernel.h-fix-overflow-for-div_round_up_ull.patch +genirq-delay-deactivation-in-free_irq.patch +genirq-fix-misleading-synchronize_irq-documentation.patch +genirq-add-optional-hardware-synchronization-for-shutdown.patch +x86-ioapic-implement-irq_get_irqchip_state-callback.patch +x86-irq-handle-spurious-interrupt-after-shutdown-gracefully.patch +x86-irq-seperate-unused-system-vectors-from-spurious-entry-again.patch diff --git a/queue-5.1/x86-ioapic-implement-irq_get_irqchip_state-callback.patch b/queue-5.1/x86-ioapic-implement-irq_get_irqchip_state-callback.patch new file mode 100644 index 00000000000..bc8fde61b8c --- /dev/null +++ b/queue-5.1/x86-ioapic-implement-irq_get_irqchip_state-callback.patch @@ -0,0 +1,114 @@ +From dfe0cf8b51b07e56ded571e3de0a4a9382517231 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 28 Jun 2019 13:11:52 +0200 +Subject: x86/ioapic: Implement irq_get_irqchip_state() callback + +From: Thomas Gleixner + +commit dfe0cf8b51b07e56ded571e3de0a4a9382517231 upstream. + +When an interrupt is shut down in free_irq() there might be an inflight +interrupt pending in the IO-APIC remote IRR which is not yet serviced. That +means the interrupt has been sent to the target CPUs local APIC, but the +target CPU is in a state which delays the servicing. + +So free_irq() would proceed to free resources and to clear the vector +because synchronize_hardirq() does not see an interrupt handler in +progress. + +That can trigger a spurious interrupt warning, which is harmless and just +confuses users, but it also can leave the remote IRR in a stale state +because once the handler is invoked the interrupt resources might be freed +already and therefore acknowledgement is not possible anymore. + +Implement the irq_get_irqchip_state() callback for the IO-APIC irq chip. The +callback is invoked from free_irq() via __synchronize_hardirq(). Check the +remote IRR bit of the interrupt and return 'in flight' if it is set and the +interrupt is configured in level mode. For edge mode the remote IRR has no +meaning. + +As this is only meaningful for level triggered interrupts this won't cure +the potential spurious interrupt warning for edge triggered interrupts, but +the edge trigger case does not result in stale hardware state. This has to +be addressed at the vector/interrupt entry level seperately. + +Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode") +Reported-by: Robert Hodaszi +Signed-off-by: Thomas Gleixner +Cc: Marc Zyngier +Link: https://lkml.kernel.org/r/20190628111440.370295517@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/apic/io_apic.c | 46 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 46 insertions(+) + +--- a/arch/x86/kernel/apic/io_apic.c ++++ b/arch/x86/kernel/apic/io_apic.c +@@ -1893,6 +1893,50 @@ static int ioapic_set_affinity(struct ir + return ret; + } + ++/* ++ * Interrupt shutdown masks the ioapic pin, but the interrupt might already ++ * be in flight, but not yet serviced by the target CPU. That means ++ * __synchronize_hardirq() would return and claim that everything is calmed ++ * down. So free_irq() would proceed and deactivate the interrupt and free ++ * resources. ++ * ++ * Once the target CPU comes around to service it it will find a cleared ++ * vector and complain. While the spurious interrupt is harmless, the full ++ * release of resources might prevent the interrupt from being acknowledged ++ * which keeps the hardware in a weird state. ++ * ++ * Verify that the corresponding Remote-IRR bits are clear. ++ */ ++static int ioapic_irq_get_chip_state(struct irq_data *irqd, ++ enum irqchip_irq_state which, ++ bool *state) ++{ ++ struct mp_chip_data *mcd = irqd->chip_data; ++ struct IO_APIC_route_entry rentry; ++ struct irq_pin_list *p; ++ ++ if (which != IRQCHIP_STATE_ACTIVE) ++ return -EINVAL; ++ ++ *state = false; ++ raw_spin_lock(&ioapic_lock); ++ for_each_irq_pin(p, mcd->irq_2_pin) { ++ rentry = __ioapic_read_entry(p->apic, p->pin); ++ /* ++ * The remote IRR is only valid in level trigger mode. It's ++ * meaning is undefined for edge triggered interrupts and ++ * irrelevant because the IO-APIC treats them as fire and ++ * forget. ++ */ ++ if (rentry.irr && rentry.trigger) { ++ *state = true; ++ break; ++ } ++ } ++ raw_spin_unlock(&ioapic_lock); ++ return 0; ++} ++ + static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .irq_startup = startup_ioapic_irq, +@@ -1902,6 +1946,7 @@ static struct irq_chip ioapic_chip __rea + .irq_eoi = ioapic_ack_level, + .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_get_irqchip_state = ioapic_irq_get_chip_state, + .flags = IRQCHIP_SKIP_SET_WAKE, + }; + +@@ -1914,6 +1959,7 @@ static struct irq_chip ioapic_ir_chip __ + .irq_eoi = ioapic_ir_ack_level, + .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, ++ .irq_get_irqchip_state = ioapic_irq_get_chip_state, + .flags = IRQCHIP_SKIP_SET_WAKE, + }; + diff --git a/queue-5.1/x86-irq-handle-spurious-interrupt-after-shutdown-gracefully.patch b/queue-5.1/x86-irq-handle-spurious-interrupt-after-shutdown-gracefully.patch new file mode 100644 index 00000000000..0aad1cca0bf --- /dev/null +++ b/queue-5.1/x86-irq-handle-spurious-interrupt-after-shutdown-gracefully.patch @@ -0,0 +1,111 @@ +From b7107a67f0d125459fe41f86e8079afd1a5e0b15 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 28 Jun 2019 13:11:53 +0200 +Subject: x86/irq: Handle spurious interrupt after shutdown gracefully + +From: Thomas Gleixner + +commit b7107a67f0d125459fe41f86e8079afd1a5e0b15 upstream. + +Since the rework of the vector management, warnings about spurious +interrupts have been reported. Robert provided some more information and +did an initial analysis. The following situation leads to these warnings: + + CPU 0 CPU 1 IO_APIC + + interrupt is raised + sent to CPU1 + Unable to handle + immediately + (interrupts off, + deep idle delay) + mask() + ... + free() + shutdown() + synchronize_irq() + clear_vector() + do_IRQ() + -> vector is clear + +Before the rework the vector entries of legacy interrupts were statically +assigned and occupied precious vector space while most of them were +unused. Due to that the above situation was handled silently because the +vector was handled and the core handler of the assigned interrupt +descriptor noticed that it is shut down and returned. + +While this has been usually observed with legacy interrupts, this situation +is not limited to them. Any other interrupt source, e.g. MSI, can cause the +same issue. + +After adding proper synchronization for level triggered interrupts, this +can only happen for edge triggered interrupts where the IO-APIC obviously +cannot provide information about interrupts in flight. + +While the spurious warning is actually harmless in this case it worries +users and driver developers. + +Handle it gracefully by marking the vector entry as VECTOR_SHUTDOWN instead +of VECTOR_UNUSED when the vector is freed up. + +If that above late handling happens the spurious detector will not complain +and switch the entry to VECTOR_UNUSED. Any subsequent spurious interrupt on +that line will trigger the spurious warning as before. + +Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode") +Reported-by: Robert Hodaszi +Signed-off-by: Thomas Gleixner - +Tested-by: Robert Hodaszi +Cc: Marc Zyngier +Link: https://lkml.kernel.org/r/20190628111440.459647741@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/hw_irq.h | 3 ++- + arch/x86/kernel/apic/vector.c | 4 ++-- + arch/x86/kernel/irq.c | 2 +- + 3 files changed, 5 insertions(+), 4 deletions(-) + +--- a/arch/x86/include/asm/hw_irq.h ++++ b/arch/x86/include/asm/hw_irq.h +@@ -151,7 +151,8 @@ extern char irq_entries_start[]; + #endif + + #define VECTOR_UNUSED NULL +-#define VECTOR_RETRIGGERED ((void *)~0UL) ++#define VECTOR_SHUTDOWN ((void *)~0UL) ++#define VECTOR_RETRIGGERED ((void *)~1UL) + + typedef struct irq_desc* vector_irq_t[NR_VECTORS]; + DECLARE_PER_CPU(vector_irq_t, vector_irq); +--- a/arch/x86/kernel/apic/vector.c ++++ b/arch/x86/kernel/apic/vector.c +@@ -343,7 +343,7 @@ static void clear_irq_vector(struct irq_ + trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, + apicd->prev_cpu); + +- per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; ++ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; + irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); + apicd->vector = 0; + +@@ -352,7 +352,7 @@ static void clear_irq_vector(struct irq_ + if (!vector) + return; + +- per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; ++ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; + irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); + apicd->prev_vector = 0; + apicd->move_in_progress = 0; +--- a/arch/x86/kernel/irq.c ++++ b/arch/x86/kernel/irq.c +@@ -246,7 +246,7 @@ __visible unsigned int __irq_entry do_IR + if (!handle_irq(desc, regs)) { + ack_APIC_irq(); + +- if (desc != VECTOR_RETRIGGERED) { ++ if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) { + pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n", + __func__, smp_processor_id(), + vector); diff --git a/queue-5.1/x86-irq-seperate-unused-system-vectors-from-spurious-entry-again.patch b/queue-5.1/x86-irq-seperate-unused-system-vectors-from-spurious-entry-again.patch new file mode 100644 index 00000000000..b309d60a064 --- /dev/null +++ b/queue-5.1/x86-irq-seperate-unused-system-vectors-from-spurious-entry-again.patch @@ -0,0 +1,209 @@ +From f8a8fe61fec8006575699559ead88b0b833d5cad Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 28 Jun 2019 13:11:54 +0200 +Subject: x86/irq: Seperate unused system vectors from spurious entry again + +From: Thomas Gleixner + +commit f8a8fe61fec8006575699559ead88b0b833d5cad upstream. + +Quite some time ago the interrupt entry stubs for unused vectors in the +system vector range got removed and directly mapped to the spurious +interrupt vector entry point. + +Sounds reasonable, but it's subtly broken. The spurious interrupt vector +entry point pushes vector number 0xFF on the stack which makes the whole +logic in __smp_spurious_interrupt() pointless. + +As a consequence any spurious interrupt which comes from a vector != 0xFF +is treated as a real spurious interrupt (vector 0xFF) and not +acknowledged. That subsequently stalls all interrupt vectors of equal and +lower priority, which brings the system to a grinding halt. + +This can happen because even on 64-bit the system vector space is not +guaranteed to be fully populated. A full compile time handling of the +unused vectors is not possible because quite some of them are conditonally +populated at runtime. + +Bring the entry stubs back, which wastes 160 bytes if all stubs are unused, +but gains the proper handling back. There is no point to selectively spare +some of the stubs which are known at compile time as the required code in +the IDT management would be way larger and convoluted. + +Do not route the spurious entries through common_interrupt and do_IRQ() as +the original code did. Route it to smp_spurious_interrupt() which evaluates +the vector number and acts accordingly now that the real vector numbers are +handed in. + +Fixup the pr_warn so the actual spurious vector (0xff) is clearly +distiguished from the other vectors and also note for the vectored case +whether it was pending in the ISR or not. + + "Spurious APIC interrupt (vector 0xFF) on CPU#0, should never happen." + "Spurious interrupt vector 0xed on CPU#1. Acked." + "Spurious interrupt vector 0xee on CPU#1. Not pending!." + +Fixes: 2414e021ac8d ("x86: Avoid building unused IRQ entry stubs") +Reported-by: Jan Kiszka +Signed-off-by: Thomas Gleixner +Cc: Marc Zyngier +Cc: Jan Beulich +Link: https://lkml.kernel.org/r/20190628111440.550568228@linutronix.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_32.S | 24 ++++++++++++++++++++++++ + arch/x86/entry/entry_64.S | 30 ++++++++++++++++++++++++++---- + arch/x86/include/asm/hw_irq.h | 2 ++ + arch/x86/kernel/apic/apic.c | 33 ++++++++++++++++++++++----------- + arch/x86/kernel/idt.c | 3 ++- + 5 files changed, 76 insertions(+), 16 deletions(-) + +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -1105,6 +1105,30 @@ ENTRY(irq_entries_start) + .endr + END(irq_entries_start) + ++#ifdef CONFIG_X86_LOCAL_APIC ++ .align 8 ++ENTRY(spurious_entries_start) ++ vector=FIRST_SYSTEM_VECTOR ++ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) ++ pushl $(~vector+0x80) /* Note: always in signed byte range */ ++ vector=vector+1 ++ jmp common_spurious ++ .align 8 ++ .endr ++END(spurious_entries_start) ++ ++common_spurious: ++ ASM_CLAC ++ addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ ++ SAVE_ALL switch_stacks=1 ++ ENCODE_FRAME_POINTER ++ TRACE_IRQS_OFF ++ movl %esp, %eax ++ call smp_spurious_interrupt ++ jmp ret_from_intr ++ENDPROC(common_interrupt) ++#endif ++ + /* + * the CPU automatically disables interrupts when executing an IRQ vector, + * so IRQ-flags tracing has to follow that: +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -377,6 +377,18 @@ ENTRY(irq_entries_start) + .endr + END(irq_entries_start) + ++ .align 8 ++ENTRY(spurious_entries_start) ++ vector=FIRST_SYSTEM_VECTOR ++ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) ++ UNWIND_HINT_IRET_REGS ++ pushq $(~vector+0x80) /* Note: always in signed byte range */ ++ jmp common_spurious ++ .align 8 ++ vector=vector+1 ++ .endr ++END(spurious_entries_start) ++ + .macro DEBUG_ENTRY_ASSERT_IRQS_OFF + #ifdef CONFIG_DEBUG_ENTRY + pushq %rax +@@ -573,10 +585,20 @@ _ASM_NOKPROBE(interrupt_entry) + + /* Interrupt entry/exit. */ + +- /* +- * The interrupt stubs push (~vector+0x80) onto the stack and +- * then jump to common_interrupt. +- */ ++/* ++ * The interrupt stubs push (~vector+0x80) onto the stack and ++ * then jump to common_spurious/interrupt. ++ */ ++common_spurious: ++ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ ++ call interrupt_entry ++ UNWIND_HINT_REGS indirect=1 ++ call smp_spurious_interrupt /* rdi points to pt_regs */ ++ jmp ret_from_intr ++END(common_spurious) ++_ASM_NOKPROBE(common_spurious) ++ ++/* common_interrupt is a hotpath. Align it */ + .p2align CONFIG_X86_L1_CACHE_SHIFT + common_interrupt: + addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ +--- a/arch/x86/include/asm/hw_irq.h ++++ b/arch/x86/include/asm/hw_irq.h +@@ -150,6 +150,8 @@ extern char irq_entries_start[]; + #define trace_irq_entries_start irq_entries_start + #endif + ++extern char spurious_entries_start[]; ++ + #define VECTOR_UNUSED NULL + #define VECTOR_SHUTDOWN ((void *)~0UL) + #define VECTOR_RETRIGGERED ((void *)~1UL) +--- a/arch/x86/kernel/apic/apic.c ++++ b/arch/x86/kernel/apic/apic.c +@@ -2035,21 +2035,32 @@ __visible void __irq_entry smp_spurious_ + entering_irq(); + trace_spurious_apic_entry(vector); + ++ inc_irq_stat(irq_spurious_count); ++ ++ /* ++ * If this is a spurious interrupt then do not acknowledge ++ */ ++ if (vector == SPURIOUS_APIC_VECTOR) { ++ /* See SDM vol 3 */ ++ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", ++ smp_processor_id()); ++ goto out; ++ } ++ + /* +- * Check if this really is a spurious interrupt and ACK it +- * if it is a vectored one. Just in case... +- * Spurious interrupts should not be ACKed. ++ * If it is a vectored one, verify it's set in the ISR. If set, ++ * acknowledge it. + */ + v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); +- if (v & (1 << (vector & 0x1f))) ++ if (v & (1 << (vector & 0x1f))) { ++ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", ++ vector, smp_processor_id()); + ack_APIC_irq(); +- +- inc_irq_stat(irq_spurious_count); +- +- /* see sw-dev-man vol 3, chapter 7.4.13.5 */ +- pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " +- "should never happen.\n", vector, smp_processor_id()); +- ++ } else { ++ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", ++ vector, smp_processor_id()); ++ } ++out: + trace_spurious_apic_exit(vector); + exiting_irq(); + } +--- a/arch/x86/kernel/idt.c ++++ b/arch/x86/kernel/idt.c +@@ -321,7 +321,8 @@ void __init idt_setup_apic_and_irq_gates + #ifdef CONFIG_X86_LOCAL_APIC + for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { + set_bit(i, system_vectors); +- set_intr_gate(i, spurious_interrupt); ++ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); ++ set_intr_gate(i, entry); + } + #endif + }