--- /dev/null
+From dfd62407f84d26a360e774bdb232460c0b0c22f0 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Mon, 28 Aug 2023 08:09:47 +0200
+Subject: xen/events: replace evtchn_rwlock with RCU
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 87797fad6cce28ec9be3c13f031776ff4f104cfc upstream.
+
+In unprivileged Xen guests event handling can cause a deadlock with
+Xen console handling. The evtchn_rwlock and the hvc_lock are taken in
+opposite sequence in __hvc_poll() and in Xen console IRQ handling.
+Normally this is no problem, as the evtchn_rwlock is taken as a reader
+in both paths, but as soon as an event channel is being closed, the
+lock will be taken as a writer, which will cause read_lock() to block:
+
+CPU0 CPU1 CPU2
+(IRQ handling) (__hvc_poll()) (closing event channel)
+
+read_lock(evtchn_rwlock)
+ spin_lock(hvc_lock)
+ write_lock(evtchn_rwlock)
+ [blocks]
+spin_lock(hvc_lock)
+ [blocks]
+ read_lock(evtchn_rwlock)
+ [blocks due to writer waiting,
+ and not in_interrupt()]
+
+This issue can be avoided by replacing evtchn_rwlock with RCU in
+xen_free_irq(). Note that RCU is used only to delay freeing of the
+irq_info memory. There is no RCU based dereferencing or replacement of
+pointers involved.
+
+In order to avoid potential races between removing the irq_info
+reference and handling of interrupts, set the irq_info pointer to NULL
+only when freeing its memory. The IRQ itself must be freed at that
+time, too, as otherwise the same IRQ number could be allocated again
+before handling of the old instance would have been finished.
+
+This is XSA-441 / CVE-2023-34324.
+
+Fixes: 54c9de89895e ("xen/events: add a new "late EOI" evtchn framework")
+Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c | 87 ++++++++++++++++++++-------------------
+ 1 file changed, 46 insertions(+), 41 deletions(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -33,6 +33,7 @@
+ #include <linux/slab.h>
+ #include <linux/irqnr.h>
+ #include <linux/pci.h>
++#include <linux/rcupdate.h>
+ #include <linux/spinlock.h>
+ #include <linux/cpuhotplug.h>
+ #include <linux/atomic.h>
+@@ -94,6 +95,7 @@ enum xen_irq_type {
+ struct irq_info {
+ struct list_head list;
+ struct list_head eoi_list;
++ struct rcu_work rwork;
+ short refcnt;
+ short spurious_cnt;
+ short type; /* type */
+@@ -142,22 +144,12 @@ const struct evtchn_ops *evtchn_ops;
+ static DEFINE_MUTEX(irq_mapping_update_lock);
+
+ /*
+- * Lock protecting event handling loop against removing event channels.
+- * Adding of event channels is no issue as the associated IRQ becomes active
+- * only after everything is setup (before request_[threaded_]irq() the handler
+- * can't be entered for an event, as the event channel will be unmasked only
+- * then).
+- */
+-static DEFINE_RWLOCK(evtchn_rwlock);
+-
+-/*
+ * Lock hierarchy:
+ *
+ * irq_mapping_update_lock
+- * evtchn_rwlock
+- * IRQ-desc lock
+- * percpu eoi_list_lock
+- * irq_info->lock
++ * IRQ-desc lock
++ * percpu eoi_list_lock
++ * irq_info->lock
+ */
+
+ static LIST_HEAD(xen_irq_list_head);
+@@ -272,6 +264,22 @@ static void set_info_for_irq(unsigned in
+ irq_set_chip_data(irq, info);
+ }
+
++static void delayed_free_irq(struct work_struct *work)
++{
++ struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
++ rwork);
++ unsigned int irq = info->irq;
++
++ /* Remove the info pointer only now, with no potential users left. */
++ set_info_for_irq(irq, NULL);
++
++ kfree(info);
++
++ /* Legacy IRQ descriptors are managed by the arch. */
++ if (irq >= nr_legacy_irqs())
++ irq_free_desc(irq);
++}
++
+ /* Constructors for packed IRQ information. */
+ static int xen_irq_info_common_setup(struct irq_info *info,
+ unsigned irq,
+@@ -606,33 +614,36 @@ static void xen_irq_lateeoi_worker(struc
+
+ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+- read_lock_irqsave(&evtchn_rwlock, flags);
++ rcu_read_lock();
+
+ while (true) {
+- spin_lock(&eoi->eoi_list_lock);
++ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+ eoi_list);
+
+- if (info == NULL || now < info->eoi_time) {
+- spin_unlock(&eoi->eoi_list_lock);
++ if (info == NULL)
++ break;
++
++ if (now < info->eoi_time) {
++ mod_delayed_work_on(info->eoi_cpu, system_wq,
++ &eoi->delayed,
++ info->eoi_time - now);
+ break;
+ }
+
+ list_del_init(&info->eoi_list);
+
+- spin_unlock(&eoi->eoi_list_lock);
++ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+
+ info->eoi_time = 0;
+
+ xen_irq_lateeoi_locked(info, false);
+ }
+
+- if (info)
+- mod_delayed_work_on(info->eoi_cpu, system_wq,
+- &eoi->delayed, info->eoi_time - now);
++ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+
+- read_unlock_irqrestore(&evtchn_rwlock, flags);
++ rcu_read_unlock();
+ }
+
+ static void xen_cpu_init_eoi(unsigned int cpu)
+@@ -647,16 +658,15 @@ static void xen_cpu_init_eoi(unsigned in
+ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+ {
+ struct irq_info *info;
+- unsigned long flags;
+
+- read_lock_irqsave(&evtchn_rwlock, flags);
++ rcu_read_lock();
+
+ info = info_for_irq(irq);
+
+ if (info)
+ xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
+
+- read_unlock_irqrestore(&evtchn_rwlock, flags);
++ rcu_read_unlock();
+ }
+ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+
+@@ -675,6 +685,7 @@ static void xen_irq_init(unsigned irq)
+
+ info->type = IRQT_UNBOUND;
+ info->refcnt = -1;
++ INIT_RCU_WORK(&info->rwork, delayed_free_irq);
+
+ set_info_for_irq(irq, info);
+
+@@ -727,31 +738,18 @@ static int __must_check xen_allocate_irq
+ static void xen_free_irq(unsigned irq)
+ {
+ struct irq_info *info = info_for_irq(irq);
+- unsigned long flags;
+
+ if (WARN_ON(!info))
+ return;
+
+- write_lock_irqsave(&evtchn_rwlock, flags);
+-
+ if (!list_empty(&info->eoi_list))
+ lateeoi_list_del(info);
+
+ list_del(&info->list);
+
+- set_info_for_irq(irq, NULL);
+-
+ WARN_ON(info->refcnt > 0);
+
+- write_unlock_irqrestore(&evtchn_rwlock, flags);
+-
+- kfree(info);
+-
+- /* Legacy IRQ descriptors are managed by the arch. */
+- if (irq < nr_legacy_irqs())
+- return;
+-
+- irq_free_desc(irq);
++ queue_rcu_work(system_wq, &info->rwork);
+ }
+
+ static void xen_evtchn_close(evtchn_port_t port)
+@@ -1639,7 +1637,14 @@ static void __xen_evtchn_do_upcall(void)
+ int cpu = smp_processor_id();
+ struct evtchn_loop_ctrl ctrl = { 0 };
+
+- read_lock(&evtchn_rwlock);
++ /*
++ * When closing an event channel the associated IRQ must not be freed
++ * until all cpus have left the event handling loop. This is ensured
++ * by taking the rcu_read_lock() while handling events, as freeing of
++ * the IRQ is handled via queue_rcu_work() _after_ closing the event
++ * channel.
++ */
++ rcu_read_lock();
+
+ do {
+ vcpu_info->evtchn_upcall_pending = 0;
+@@ -1652,7 +1657,7 @@ static void __xen_evtchn_do_upcall(void)
+
+ } while (vcpu_info->evtchn_upcall_pending);
+
+- read_unlock(&evtchn_rwlock);
++ rcu_read_unlock();
+
+ /*
+ * Increment irq_epoch only now to defer EOIs only for