From: Greg Kroah-Hartman Date: Tue, 10 Oct 2023 19:37:49 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v4.14.327~5 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=576afc6d4e723f4f00434de01698b96fe271555e;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: xen-events-replace-evtchn_rwlock-with-rcu.patch --- diff --git a/queue-5.4/series b/queue-5.4/series index 047beb29496..38333302ffb 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -125,3 +125,4 @@ rdma-mlx5-fix-null-string-error.patch parisc-restore-__ldcw_align-for-pa-risc-2.0-processors.patch nfs-fix-a-race-in-__nfs_list_for_each_server.patch ima-rework-config_ima-dependency-block.patch +xen-events-replace-evtchn_rwlock-with-rcu.patch diff --git a/queue-5.4/xen-events-replace-evtchn_rwlock-with-rcu.patch b/queue-5.4/xen-events-replace-evtchn_rwlock-with-rcu.patch new file mode 100644 index 00000000000..4849d0c02a9 --- /dev/null +++ b/queue-5.4/xen-events-replace-evtchn_rwlock-with-rcu.patch @@ -0,0 +1,252 @@ +From 61d85fe2c67879d34f221cbed486ba4489fae0b3 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 28 Aug 2023 08:09:47 +0200 +Subject: xen/events: replace evtchn_rwlock with RCU + +From: Juergen Gross + +commit 87797fad6cce28ec9be3c13f031776ff4f104cfc upstream. + +In unprivileged Xen guests event handling can cause a deadlock with +Xen console handling. The evtchn_rwlock and the hvc_lock are taken in +opposite sequence in __hvc_poll() and in Xen console IRQ handling. +Normally this is no problem, as the evtchn_rwlock is taken as a reader +in both paths, but as soon as an event channel is being closed, the +lock will be taken as a writer, which will cause read_lock() to block: + +CPU0 CPU1 CPU2 +(IRQ handling) (__hvc_poll()) (closing event channel) + +read_lock(evtchn_rwlock) + spin_lock(hvc_lock) + write_lock(evtchn_rwlock) + [blocks] +spin_lock(hvc_lock) + [blocks] + read_lock(evtchn_rwlock) + [blocks due to writer waiting, + and not in_interrupt()] + +This issue can be avoided by replacing evtchn_rwlock with RCU in +xen_free_irq(). Note that RCU is used only to delay freeing of the +irq_info memory. There is no RCU based dereferencing or replacement of +pointers involved. + +In order to avoid potential races between removing the irq_info +reference and handling of interrupts, set the irq_info pointer to NULL +only when freeing its memory. The IRQ itself must be freed at that +time, too, as otherwise the same IRQ number could be allocated again +before handling of the old instance would have been finished. + +This is XSA-441 / CVE-2023-34324. + +Fixes: 54c9de89895e ("xen/events: add a new "late EOI" evtchn framework") +Reported-by: Marek Marczykowski-Górecki +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/events/events_base.c | 85 ++++++++++++++++++----------------- + drivers/xen/events/events_internal.h | 2 + 2 files changed, 46 insertions(+), 41 deletions(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -84,22 +84,12 @@ const struct evtchn_ops *evtchn_ops; + static DEFINE_MUTEX(irq_mapping_update_lock); + + /* +- * Lock protecting event handling loop against removing event channels. +- * Adding of event channels is no issue as the associated IRQ becomes active +- * only after everything is setup (before request_[threaded_]irq() the handler +- * can't be entered for an event, as the event channel will be unmasked only +- * then). +- */ +-static DEFINE_RWLOCK(evtchn_rwlock); +- +-/* + * Lock hierarchy: + * + * irq_mapping_update_lock +- * evtchn_rwlock +- * IRQ-desc lock +- * percpu eoi_list_lock +- * irq_info->lock ++ * IRQ-desc lock ++ * percpu eoi_list_lock ++ * irq_info->lock + */ + + static LIST_HEAD(xen_irq_list_head); +@@ -214,6 +204,22 @@ static void set_info_for_irq(unsigned in + irq_set_chip_data(irq, info); + } + ++static void delayed_free_irq(struct work_struct *work) ++{ ++ struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, ++ rwork); ++ unsigned int irq = info->irq; ++ ++ /* Remove the info pointer only now, with no potential users left. */ ++ set_info_for_irq(irq, NULL); ++ ++ kfree(info); ++ ++ /* Legacy IRQ descriptors are managed by the arch. */ ++ if (irq >= nr_legacy_irqs()) ++ irq_free_desc(irq); ++} ++ + /* Constructors for packed IRQ information. */ + static int xen_irq_info_common_setup(struct irq_info *info, + unsigned irq, +@@ -548,33 +554,36 @@ static void xen_irq_lateeoi_worker(struc + + eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); + +- read_lock_irqsave(&evtchn_rwlock, flags); ++ rcu_read_lock(); + + while (true) { +- spin_lock(&eoi->eoi_list_lock); ++ spin_lock_irqsave(&eoi->eoi_list_lock, flags); + + info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + +- if (info == NULL || now < info->eoi_time) { +- spin_unlock(&eoi->eoi_list_lock); ++ if (info == NULL) ++ break; ++ ++ if (now < info->eoi_time) { ++ mod_delayed_work_on(info->eoi_cpu, system_wq, ++ &eoi->delayed, ++ info->eoi_time - now); + break; + } + + list_del_init(&info->eoi_list); + +- spin_unlock(&eoi->eoi_list_lock); ++ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); + + info->eoi_time = 0; + + xen_irq_lateeoi_locked(info, false); + } + +- if (info) +- mod_delayed_work_on(info->eoi_cpu, system_wq, +- &eoi->delayed, info->eoi_time - now); ++ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); + +- read_unlock_irqrestore(&evtchn_rwlock, flags); ++ rcu_read_unlock(); + } + + static void xen_cpu_init_eoi(unsigned int cpu) +@@ -589,16 +598,15 @@ static void xen_cpu_init_eoi(unsigned in + void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) + { + struct irq_info *info; +- unsigned long flags; + +- read_lock_irqsave(&evtchn_rwlock, flags); ++ rcu_read_lock(); + + info = info_for_irq(irq); + + if (info) + xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); + +- read_unlock_irqrestore(&evtchn_rwlock, flags); ++ rcu_read_unlock(); + } + EXPORT_SYMBOL_GPL(xen_irq_lateeoi); + +@@ -617,6 +625,7 @@ static void xen_irq_init(unsigned irq) + + info->type = IRQT_UNBOUND; + info->refcnt = -1; ++ INIT_RCU_WORK(&info->rwork, delayed_free_irq); + + set_info_for_irq(irq, info); + +@@ -669,31 +678,18 @@ static int __must_check xen_allocate_irq + static void xen_free_irq(unsigned irq) + { + struct irq_info *info = info_for_irq(irq); +- unsigned long flags; + + if (WARN_ON(!info)) + return; + +- write_lock_irqsave(&evtchn_rwlock, flags); +- + if (!list_empty(&info->eoi_list)) + lateeoi_list_del(info); + + list_del(&info->list); + +- set_info_for_irq(irq, NULL); +- + WARN_ON(info->refcnt > 0); + +- write_unlock_irqrestore(&evtchn_rwlock, flags); +- +- kfree(info); +- +- /* Legacy IRQ descriptors are managed by the arch. */ +- if (irq < nr_legacy_irqs()) +- return; +- +- irq_free_desc(irq); ++ queue_rcu_work(system_wq, &info->rwork); + } + + static void xen_evtchn_close(unsigned int port) +@@ -1604,7 +1600,14 @@ static void __xen_evtchn_do_upcall(void) + unsigned count; + struct evtchn_loop_ctrl ctrl = { 0 }; + +- read_lock(&evtchn_rwlock); ++ /* ++ * When closing an event channel the associated IRQ must not be freed ++ * until all cpus have left the event handling loop. This is ensured ++ * by taking the rcu_read_lock() while handling events, as freeing of ++ * the IRQ is handled via queue_rcu_work() _after_ closing the event ++ * channel. ++ */ ++ rcu_read_lock(); + + do { + vcpu_info->evtchn_upcall_pending = 0; +@@ -1621,7 +1624,7 @@ static void __xen_evtchn_do_upcall(void) + } while (count != 1 || vcpu_info->evtchn_upcall_pending); + + out: +- read_unlock(&evtchn_rwlock); ++ rcu_read_unlock(); + + /* + * Increment irq_epoch only now to defer EOIs only for +--- a/drivers/xen/events/events_internal.h ++++ b/drivers/xen/events/events_internal.h +@@ -6,6 +6,7 @@ + */ + #ifndef __EVENTS_INTERNAL_H__ + #define __EVENTS_INTERNAL_H__ ++#include + + /* Interrupt types. */ + enum xen_irq_type { +@@ -31,6 +32,7 @@ enum xen_irq_type { + struct irq_info { + struct list_head list; + struct list_head eoi_list; ++ struct rcu_work rwork; + short refcnt; + short spurious_cnt; + short type; /* type */