From: Greg Kroah-Hartman Date: Mon, 2 Nov 2020 15:41:31 +0000 (+0100) Subject: 5.9-stable patches X-Git-Tag: v4.14.204~50 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7fcc1cc7321aa5ac0a0b01a35309d40c1b197c56;p=thirdparty%2Fkernel%2Fstable-queue.git 5.9-stable patches added patches: xen-blkback-use-lateeoi-irq-binding.patch xen-events-add-a-new-late-eoi-evtchn-framework.patch xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch xen-events-avoid-removing-an-event-channel-while-handling-it.patch xen-events-block-rogue-events-for-some-time.patch xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch xen-events-fix-race-in-evtchn_fifo_unmask.patch xen-events-switch-user-event-channels-to-lateeoi-model.patch xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch xen-netback-use-lateeoi-irq-binding.patch xen-pciback-use-lateeoi-irq-binding.patch xen-pvcallsback-use-lateeoi-irq-binding.patch xen-scsiback-use-lateeoi-irq-binding.patch --- diff --git a/queue-5.9/series b/queue-5.9/series new file mode 100644 index 00000000000..06e28d21573 --- /dev/null +++ b/queue-5.9/series @@ -0,0 +1,13 @@ +xen-events-avoid-removing-an-event-channel-while-handling-it.patch +xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch +xen-events-fix-race-in-evtchn_fifo_unmask.patch +xen-events-add-a-new-late-eoi-evtchn-framework.patch +xen-blkback-use-lateeoi-irq-binding.patch +xen-netback-use-lateeoi-irq-binding.patch +xen-scsiback-use-lateeoi-irq-binding.patch +xen-pvcallsback-use-lateeoi-irq-binding.patch +xen-pciback-use-lateeoi-irq-binding.patch +xen-events-switch-user-event-channels-to-lateeoi-model.patch +xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch +xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch +xen-events-block-rogue-events-for-some-time.patch diff --git a/queue-5.9/xen-blkback-use-lateeoi-irq-binding.patch b/queue-5.9/xen-blkback-use-lateeoi-irq-binding.patch new file mode 100644 index 00000000000..acbee019047 --- /dev/null +++ b/queue-5.9/xen-blkback-use-lateeoi-irq-binding.patch @@ -0,0 +1,126 @@ +From 01263a1fabe30b4d542f34c7e2364a22587ddaf2 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:27 +0200 +Subject: xen/blkback: use lateeoi irq binding + +From: Juergen Gross + +commit 01263a1fabe30b4d542f34c7e2364a22587ddaf2 upstream. + +In order to reduce the chance for the system becoming unresponsive due +to event storms triggered by a misbehaving blkfront use the lateeoi +irq binding for blkback and unmask the event channel only after +processing all pending requests. + +As the thread processing requests is used to do purging work in regular +intervals an EOI may be sent only after having received an event. If +there was no pending I/O request flag the EOI as spurious. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/xen-blkback/blkback.c | 22 +++++++++++++++++----- + drivers/block/xen-blkback/xenbus.c | 5 ++--- + 2 files changed, 19 insertions(+), 8 deletions(-) + +--- a/drivers/block/xen-blkback/blkback.c ++++ b/drivers/block/xen-blkback/blkback.c +@@ -201,7 +201,7 @@ static inline void shrink_free_pagepool( + + #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) + +-static int do_block_io_op(struct xen_blkif_ring *ring); ++static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags); + static int dispatch_rw_block_io(struct xen_blkif_ring *ring, + struct blkif_request *req, + struct pending_req *pending_req); +@@ -612,6 +612,8 @@ int xen_blkif_schedule(void *arg) + struct xen_vbd *vbd = &blkif->vbd; + unsigned long timeout; + int ret; ++ bool do_eoi; ++ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; + + set_freezable(); + while (!kthread_should_stop()) { +@@ -636,16 +638,23 @@ int xen_blkif_schedule(void *arg) + if (timeout == 0) + goto purge_gnt_list; + ++ do_eoi = ring->waiting_reqs; ++ + ring->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + +- ret = do_block_io_op(ring); ++ ret = do_block_io_op(ring, &eoi_flags); + if (ret > 0) + ring->waiting_reqs = 1; + if (ret == -EACCES) + wait_event_interruptible(ring->shutdown_wq, + kthread_should_stop()); + ++ if (do_eoi && !ring->waiting_reqs) { ++ xen_irq_lateeoi(ring->irq, eoi_flags); ++ eoi_flags |= XEN_EOI_FLAG_SPURIOUS; ++ } ++ + purge_gnt_list: + if (blkif->vbd.feature_gnt_persistent && + time_after(jiffies, ring->next_lru)) { +@@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio * + * and transmute it to the block API to hand it over to the proper block disk. + */ + static int +-__do_block_io_op(struct xen_blkif_ring *ring) ++__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) + { + union blkif_back_rings *blk_rings = &ring->blk_rings; + struct blkif_request req; +@@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring * + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + ++ /* We've seen a request, so clear spurious eoi flag. */ ++ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; ++ + if (kthread_should_stop()) { + more_to_do = 1; + break; +@@ -1202,13 +1214,13 @@ done: + } + + static int +-do_block_io_op(struct xen_blkif_ring *ring) ++do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) + { + union blkif_back_rings *blk_rings = &ring->blk_rings; + int more_to_do; + + do { +- more_to_do = __do_block_io_op(ring); ++ more_to_do = __do_block_io_op(ring, eoi_flags); + if (more_to_do) + break; + +--- a/drivers/block/xen-blkback/xenbus.c ++++ b/drivers/block/xen-blkback/xenbus.c +@@ -246,9 +246,8 @@ static int xen_blkif_map(struct xen_blki + if (req_prod - rsp_prod > size) + goto fail; + +- err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, +- xen_blkif_be_int, 0, +- "blkif-backend", ring); ++ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid, ++ evtchn, xen_blkif_be_int, 0, "blkif-backend", ring); + if (err < 0) + goto fail; + ring->irq = err; diff --git a/queue-5.9/xen-events-add-a-new-late-eoi-evtchn-framework.patch b/queue-5.9/xen-events-add-a-new-late-eoi-evtchn-framework.patch new file mode 100644 index 00000000000..bb11c6f8694 --- /dev/null +++ b/queue-5.9/xen-events-add-a-new-late-eoi-evtchn-framework.patch @@ -0,0 +1,337 @@ +From 54c9de89895e0a36047fcc4ae754ea5b8655fb9d Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:27 +0200 +Subject: xen/events: add a new "late EOI" evtchn framework + +From: Juergen Gross + +commit 54c9de89895e0a36047fcc4ae754ea5b8655fb9d upstream. + +In order to avoid tight event channel related IRQ loops add a new +framework of "late EOI" handling: the IRQ the event channel is bound +to will be masked until the event has been handled and the related +driver is capable to handle another event. The driver is responsible +for unmasking the event channel via the new function xen_irq_lateeoi(). + +This is similar to binding an event channel to a threaded IRQ, but +without having to structure the driver accordingly. + +In order to support a future special handling in case a rogue guest +is sending lots of unsolicited events, add a flag to xen_irq_lateeoi() +which can be set by the caller to indicate the event was a spurious +one. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_base.c | 151 ++++++++++++++++++++++++++++++++++----- + include/xen/events.h | 21 +++++ + 2 files changed, 155 insertions(+), 17 deletions(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -113,6 +113,7 @@ static bool (*pirq_needs_eoi)(unsigned i + static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; + + static struct irq_chip xen_dynamic_chip; ++static struct irq_chip xen_lateeoi_chip; + static struct irq_chip xen_percpu_chip; + static struct irq_chip xen_pirq_chip; + static void enable_dynirq(struct irq_data *data); +@@ -397,6 +398,33 @@ void notify_remote_via_irq(int irq) + } + EXPORT_SYMBOL_GPL(notify_remote_via_irq); + ++static void xen_irq_lateeoi_locked(struct irq_info *info) ++{ ++ evtchn_port_t evtchn; ++ ++ evtchn = info->evtchn; ++ if (!VALID_EVTCHN(evtchn)) ++ return; ++ ++ unmask_evtchn(evtchn); ++} ++ ++void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) ++{ ++ struct irq_info *info; ++ unsigned long flags; ++ ++ read_lock_irqsave(&evtchn_rwlock, flags); ++ ++ info = info_for_irq(irq); ++ ++ if (info) ++ xen_irq_lateeoi_locked(info); ++ ++ read_unlock_irqrestore(&evtchn_rwlock, flags); ++} ++EXPORT_SYMBOL_GPL(xen_irq_lateeoi); ++ + static void xen_irq_init(unsigned irq) + { + struct irq_info *info; +@@ -868,7 +896,7 @@ int xen_pirq_from_irq(unsigned irq) + } + EXPORT_SYMBOL_GPL(xen_pirq_from_irq); + +-int bind_evtchn_to_irq(evtchn_port_t evtchn) ++static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) + { + int irq; + int ret; +@@ -885,7 +913,7 @@ int bind_evtchn_to_irq(evtchn_port_t evt + if (irq < 0) + goto out; + +- irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, ++ irq_set_chip_and_handler_name(irq, chip, + handle_edge_irq, "event"); + + ret = xen_irq_info_evtchn_setup(irq, evtchn); +@@ -906,8 +934,19 @@ out: + + return irq; + } ++ ++int bind_evtchn_to_irq(evtchn_port_t evtchn) ++{ ++ return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); ++} + EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); + ++int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) ++{ ++ return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); ++} ++EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); ++ + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) + { + struct evtchn_bind_ipi bind_ipi; +@@ -949,8 +988,9 @@ static int bind_ipi_to_irq(unsigned int + return irq; + } + +-int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, +- evtchn_port_t remote_port) ++static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, ++ evtchn_port_t remote_port, ++ struct irq_chip *chip) + { + struct evtchn_bind_interdomain bind_interdomain; + int err; +@@ -961,10 +1001,26 @@ int bind_interdomain_evtchn_to_irq(unsig + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + +- return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); ++ return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, ++ chip); ++} ++ ++int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, ++ evtchn_port_t remote_port) ++{ ++ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, ++ &xen_dynamic_chip); + } + EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); + ++int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, ++ evtchn_port_t remote_port) ++{ ++ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, ++ &xen_lateeoi_chip); ++} ++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); ++ + static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) + { + struct evtchn_status status; +@@ -1061,14 +1117,15 @@ static void unbind_from_irq(unsigned int + mutex_unlock(&irq_mapping_update_lock); + } + +-int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, +- irq_handler_t handler, +- unsigned long irqflags, +- const char *devname, void *dev_id) ++static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, void *dev_id, ++ struct irq_chip *chip) + { + int irq, retval; + +- irq = bind_evtchn_to_irq(evtchn); ++ irq = bind_evtchn_to_irq_chip(evtchn, chip); + if (irq < 0) + return irq; + retval = request_irq(irq, handler, irqflags, devname, dev_id); +@@ -1079,18 +1136,38 @@ int bind_evtchn_to_irqhandler(evtchn_por + + return irq; + } ++ ++int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, void *dev_id) ++{ ++ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, ++ devname, dev_id, ++ &xen_dynamic_chip); ++} + EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); + +-int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, +- evtchn_port_t remote_port, +- irq_handler_t handler, +- unsigned long irqflags, +- const char *devname, +- void *dev_id) ++int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, void *dev_id) ++{ ++ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, ++ devname, dev_id, ++ &xen_lateeoi_chip); ++} ++EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); ++ ++static int bind_interdomain_evtchn_to_irqhandler_chip( ++ unsigned int remote_domain, evtchn_port_t remote_port, ++ irq_handler_t handler, unsigned long irqflags, ++ const char *devname, void *dev_id, struct irq_chip *chip) + { + int irq, retval; + +- irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); ++ irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, ++ chip); + if (irq < 0) + return irq; + +@@ -1102,8 +1179,33 @@ int bind_interdomain_evtchn_to_irqhandle + + return irq; + } ++ ++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, ++ evtchn_port_t remote_port, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, ++ void *dev_id) ++{ ++ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, ++ remote_port, handler, irqflags, devname, ++ dev_id, &xen_dynamic_chip); ++} + EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); + ++int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, ++ evtchn_port_t remote_port, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, ++ void *dev_id) ++{ ++ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, ++ remote_port, handler, irqflags, devname, ++ dev_id, &xen_lateeoi_chip); ++} ++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); ++ + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id) +@@ -1634,6 +1736,21 @@ static struct irq_chip xen_dynamic_chip + .irq_mask_ack = mask_ack_dynirq, + + .irq_set_affinity = set_affinity_irq, ++ .irq_retrigger = retrigger_dynirq, ++}; ++ ++static struct irq_chip xen_lateeoi_chip __read_mostly = { ++ /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ ++ .name = "xen-dyn-lateeoi", ++ ++ .irq_disable = disable_dynirq, ++ .irq_mask = disable_dynirq, ++ .irq_unmask = enable_dynirq, ++ ++ .irq_ack = mask_ack_dynirq, ++ .irq_mask_ack = mask_ack_dynirq, ++ ++ .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, + }; + +--- a/include/xen/events.h ++++ b/include/xen/events.h +@@ -15,10 +15,15 @@ + unsigned xen_evtchn_nr_channels(void); + + int bind_evtchn_to_irq(evtchn_port_t evtchn); ++int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); + int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, const char *devname, + void *dev_id); ++int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, ++ irq_handler_t handler, ++ unsigned long irqflags, const char *devname, ++ void *dev_id); + int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + irq_handler_t handler, +@@ -32,12 +37,20 @@ int bind_ipi_to_irqhandler(enum ipi_vect + void *dev_id); + int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + evtchn_port_t remote_port); ++int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, ++ evtchn_port_t remote_port); + int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); ++int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, ++ evtchn_port_t remote_port, ++ irq_handler_t handler, ++ unsigned long irqflags, ++ const char *devname, ++ void *dev_id); + + /* + * Common unbind function for all event sources. Takes IRQ to unbind from. +@@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandle + */ + void unbind_from_irqhandler(unsigned int irq, void *dev_id); + ++/* ++ * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi ++ * functions above. ++ */ ++void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags); ++/* Signal an event was spurious, i.e. there was no action resulting from it. */ ++#define XEN_EOI_FLAG_SPURIOUS 0x00000001 ++ + #define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX + #define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT + #define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN diff --git a/queue-5.9/xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch b/queue-5.9/xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch new file mode 100644 index 00000000000..255edfe5b69 --- /dev/null +++ b/queue-5.9/xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch @@ -0,0 +1,45 @@ +From 4d3fe31bd993ef504350989786858aefdb877daa Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:27 +0200 +Subject: xen/events: add a proper barrier to 2-level uevent unmasking + +From: Juergen Gross + +commit 4d3fe31bd993ef504350989786858aefdb877daa upstream. + +A follow-up patch will require certain write to happen before an event +channel is unmasked. + +While the memory barrier is not strictly necessary for all the callers, +the main one will need it. In order to avoid an extra memory barrier +when using fifo event channels, mandate evtchn_unmask() to provide +write ordering. + +The 2-level event handling unmask operation is missing an appropriate +barrier, so add it. Fifo event channels are fine in this regard due to +using sync_cmpxchg(). + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Suggested-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Julien Grall +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_2l.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/xen/events/events_2l.c ++++ b/drivers/xen/events/events_2l.c +@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(evtchn_port + + BUG_ON(!irqs_disabled()); + ++ smp_wmb(); /* All writes before unmask must be visible. */ ++ + if (unlikely((cpu != cpu_from_evtchn(port)))) + do_hypercall = 1; + else { diff --git a/queue-5.9/xen-events-avoid-removing-an-event-channel-while-handling-it.patch b/queue-5.9/xen-events-avoid-removing-an-event-channel-while-handling-it.patch new file mode 100644 index 00000000000..f97ed5743af --- /dev/null +++ b/queue-5.9/xen-events-avoid-removing-an-event-channel-while-handling-it.patch @@ -0,0 +1,162 @@ +From 073d0552ead5bfc7a3a9c01de590e924f11b5dd2 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:27 +0200 +Subject: xen/events: avoid removing an event channel while handling it +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Juergen Gross + +commit 073d0552ead5bfc7a3a9c01de590e924f11b5dd2 upstream. + +Today it can happen that an event channel is being removed from the +system while the event handling loop is active. This can lead to a +race resulting in crashes or WARN() splats when trying to access the +irq_info structure related to the event channel. + +Fix this problem by using a rwlock taken as reader in the event +handling loop and as writer when deallocating the irq_info structure. + +As the observed problem was a NULL dereference in evtchn_from_irq() +make this function more robust against races by testing the irq_info +pointer to be not NULL before dereferencing it. + +And finally make all accesses to evtchn_to_irq[row][col] atomic ones +in order to avoid seeing partial updates of an array element in irq +handling. Note that irq handling can be entered only for event channels +which have been valid before, so any not populated row isn't a problem +in this regard, as rows are only ever added and never removed. + +This is XSA-331. + +Cc: stable@vger.kernel.org +Reported-by: Marek Marczykowski-Górecki +Reported-by: Jinoh Kang +Signed-off-by: Juergen Gross +Reviewed-by: Stefano Stabellini +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_base.c | 41 ++++++++++++++++++++++++++++++++++----- + 1 file changed, 36 insertions(+), 5 deletions(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_X86 + #include +@@ -71,6 +72,23 @@ const struct evtchn_ops *evtchn_ops; + */ + static DEFINE_MUTEX(irq_mapping_update_lock); + ++/* ++ * Lock protecting event handling loop against removing event channels. ++ * Adding of event channels is no issue as the associated IRQ becomes active ++ * only after everything is setup (before request_[threaded_]irq() the handler ++ * can't be entered for an event, as the event channel will be unmasked only ++ * then). ++ */ ++static DEFINE_RWLOCK(evtchn_rwlock); ++ ++/* ++ * Lock hierarchy: ++ * ++ * irq_mapping_update_lock ++ * evtchn_rwlock ++ * IRQ-desc lock ++ */ ++ + static LIST_HEAD(xen_irq_list_head); + + /* IRQ <-> VIRQ mapping. */ +@@ -105,7 +123,7 @@ static void clear_evtchn_to_irq_row(unsi + unsigned col; + + for (col = 0; col < EVTCHN_PER_ROW; col++) +- evtchn_to_irq[row][col] = -1; ++ WRITE_ONCE(evtchn_to_irq[row][col], -1); + } + + static void clear_evtchn_to_irq_all(void) +@@ -142,7 +160,7 @@ static int set_evtchn_to_irq(evtchn_port + clear_evtchn_to_irq_row(row); + } + +- evtchn_to_irq[row][col] = irq; ++ WRITE_ONCE(evtchn_to_irq[row][col], irq); + return 0; + } + +@@ -152,7 +170,7 @@ int get_evtchn_to_irq(evtchn_port_t evtc + return -1; + if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) + return -1; +- return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; ++ return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); + } + + /* Get info for IRQ */ +@@ -261,10 +279,14 @@ static void xen_irq_info_cleanup(struct + */ + evtchn_port_t evtchn_from_irq(unsigned irq) + { +- if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) ++ const struct irq_info *info = NULL; ++ ++ if (likely(irq < nr_irqs)) ++ info = info_for_irq(irq); ++ if (!info) + return 0; + +- return info_for_irq(irq)->evtchn; ++ return info->evtchn; + } + + unsigned int irq_from_evtchn(evtchn_port_t evtchn) +@@ -440,16 +462,21 @@ static int __must_check xen_allocate_irq + static void xen_free_irq(unsigned irq) + { + struct irq_info *info = info_for_irq(irq); ++ unsigned long flags; + + if (WARN_ON(!info)) + return; + ++ write_lock_irqsave(&evtchn_rwlock, flags); ++ + list_del(&info->list); + + set_info_for_irq(irq, NULL); + + WARN_ON(info->refcnt > 0); + ++ write_unlock_irqrestore(&evtchn_rwlock, flags); ++ + kfree(info); + + /* Legacy IRQ descriptors are managed by the arch. */ +@@ -1233,6 +1260,8 @@ static void __xen_evtchn_do_upcall(void) + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + int cpu = smp_processor_id(); + ++ read_lock(&evtchn_rwlock); ++ + do { + vcpu_info->evtchn_upcall_pending = 0; + +@@ -1243,6 +1272,8 @@ static void __xen_evtchn_do_upcall(void) + virt_rmb(); /* Hypervisor can set upcall pending. */ + + } while (vcpu_info->evtchn_upcall_pending); ++ ++ read_unlock(&evtchn_rwlock); + } + + void xen_evtchn_do_upcall(struct pt_regs *regs) diff --git a/queue-5.9/xen-events-block-rogue-events-for-some-time.patch b/queue-5.9/xen-events-block-rogue-events-for-some-time.patch new file mode 100644 index 00000000000..34138141802 --- /dev/null +++ b/queue-5.9/xen-events-block-rogue-events-for-some-time.patch @@ -0,0 +1,115 @@ +From 5f7f77400ab5b357b5fdb7122c3442239672186c Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 14 Sep 2020 14:01:02 +0200 +Subject: xen/events: block rogue events for some time + +From: Juergen Gross + +commit 5f7f77400ab5b357b5fdb7122c3442239672186c upstream. + +In order to avoid high dom0 load due to rogue guests sending events at +high frequency, block those events in case there was no action needed +in dom0 to handle the events. + +This is done by adding a per-event counter, which set to zero in case +an EOI without the XEN_EOI_FLAG_SPURIOUS is received from a backend +driver, and incremented when this flag has been set. In case the +counter is 2 or higher delay the EOI by 1 << (cnt - 2) jiffies, but +not more than 1 second. + +In order not to waste memory shorten the per-event refcnt to two bytes +(it should normally never exceed a value of 2). Add an overflow check +to evtchn_get() to make sure the 2 bytes really won't overflow. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Stefano Stabellini +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_base.c | 27 ++++++++++++++++++++++----- + drivers/xen/events/events_internal.h | 3 ++- + 2 files changed, 24 insertions(+), 6 deletions(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -461,17 +461,34 @@ static void lateeoi_list_add(struct irq_ + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); + } + +-static void xen_irq_lateeoi_locked(struct irq_info *info) ++static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) + { + evtchn_port_t evtchn; + unsigned int cpu; ++ unsigned int delay = 0; + + evtchn = info->evtchn; + if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) + return; + ++ if (spurious) { ++ if ((1 << info->spurious_cnt) < (HZ << 2)) ++ info->spurious_cnt++; ++ if (info->spurious_cnt > 1) { ++ delay = 1 << (info->spurious_cnt - 2); ++ if (delay > HZ) ++ delay = HZ; ++ if (!info->eoi_time) ++ info->eoi_cpu = smp_processor_id(); ++ info->eoi_time = get_jiffies_64() + delay; ++ } ++ } else { ++ info->spurious_cnt = 0; ++ } ++ + cpu = info->eoi_cpu; +- if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) { ++ if (info->eoi_time && ++ (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { + lateeoi_list_add(info); + return; + } +@@ -508,7 +525,7 @@ static void xen_irq_lateeoi_worker(struc + + info->eoi_time = 0; + +- xen_irq_lateeoi_locked(info); ++ xen_irq_lateeoi_locked(info, false); + } + + if (info) +@@ -537,7 +554,7 @@ void xen_irq_lateeoi(unsigned int irq, u + info = info_for_irq(irq); + + if (info) +- xen_irq_lateeoi_locked(info); ++ xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); + + read_unlock_irqrestore(&evtchn_rwlock, flags); + } +@@ -1441,7 +1458,7 @@ int evtchn_get(evtchn_port_t evtchn) + goto done; + + err = -EINVAL; +- if (info->refcnt <= 0) ++ if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) + goto done; + + info->refcnt++; +--- a/drivers/xen/events/events_internal.h ++++ b/drivers/xen/events/events_internal.h +@@ -31,7 +31,8 @@ enum xen_irq_type { + struct irq_info { + struct list_head list; + struct list_head eoi_list; +- int refcnt; ++ short refcnt; ++ short spurious_cnt; + enum xen_irq_type type; /* type */ + unsigned irq; + evtchn_port_t evtchn; /* event channel */ diff --git a/queue-5.9/xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch b/queue-5.9/xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch new file mode 100644 index 00000000000..6b284c40dec --- /dev/null +++ b/queue-5.9/xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch @@ -0,0 +1,517 @@ +From e99502f76271d6bc4e374fe368c50c67a1fd3070 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:30 +0200 +Subject: xen/events: defer eoi in case of excessive number of events + +From: Juergen Gross + +commit e99502f76271d6bc4e374fe368c50c67a1fd3070 upstream. + +In case rogue guests are sending events at high frequency it might +happen that xen_evtchn_do_upcall() won't stop processing events in +dom0. As this is done in irq handling a crash might be the result. + +In order to avoid that, delay further inter-domain events after some +time in xen_evtchn_do_upcall() by forcing eoi processing into a +worker on the same cpu, thus inhibiting new events coming in. + +The time after which eoi processing is to be delayed is configurable +via a new module parameter "event_loop_timeout" which specifies the +maximum event loop time in jiffies (default: 2, the value was chosen +after some tests showing that a value of 2 was the lowest with an +only slight drop of dom0 network throughput while multiple guests +performed an event storm). + +How long eoi processing will be delayed can be specified via another +parameter "event_eoi_delay" (again in jiffies, default 10, again the +value was chosen after testing with different delay values). + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Stefano Stabellini +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/admin-guide/kernel-parameters.txt | 8 + + drivers/xen/events/events_2l.c | 7 + drivers/xen/events/events_base.c | 189 +++++++++++++++++++++++- + drivers/xen/events/events_fifo.c | 30 +-- + drivers/xen/events/events_internal.h | 14 + + 5 files changed, 216 insertions(+), 32 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5828,6 +5828,14 @@ + improve timer resolution at the expense of processing + more timer interrupts. + ++ xen.event_eoi_delay= [XEN] ++ How long to delay EOI handling in case of event ++ storms (jiffies). Default is 10. ++ ++ xen.event_loop_timeout= [XEN] ++ After which time (jiffies) the event handling loop ++ should start to delay EOI handling. Default is 2. ++ + nopv= [X86,XEN,KVM,HYPER_V,VMWARE] + Disables the PV optimizations forcing the guest to run + as generic guest with no PV drivers. Currently support +--- a/drivers/xen/events/events_2l.c ++++ b/drivers/xen/events/events_2l.c +@@ -161,7 +161,7 @@ static inline xen_ulong_t active_evtchns + * a bitset of words which contain pending event bits. The second + * level is a bitset of pending events themselves. + */ +-static void evtchn_2l_handle_events(unsigned cpu) ++static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) + { + int irq; + xen_ulong_t pending_words; +@@ -242,10 +242,7 @@ static void evtchn_2l_handle_events(unsi + + /* Process port. */ + port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; +- irq = get_evtchn_to_irq(port); +- +- if (irq != -1) +- generic_handle_irq(irq); ++ handle_irq_for_port(port, ctrl); + + bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -35,6 +35,8 @@ + #include + #include + #include ++#include ++#include + + #ifdef CONFIG_X86 + #include +@@ -65,6 +67,15 @@ + + #include "events_internal.h" + ++#undef MODULE_PARAM_PREFIX ++#define MODULE_PARAM_PREFIX "xen." ++ ++static uint __read_mostly event_loop_timeout = 2; ++module_param(event_loop_timeout, uint, 0644); ++ ++static uint __read_mostly event_eoi_delay = 10; ++module_param(event_eoi_delay, uint, 0644); ++ + const struct evtchn_ops *evtchn_ops; + + /* +@@ -88,6 +99,7 @@ static DEFINE_RWLOCK(evtchn_rwlock); + * irq_mapping_update_lock + * evtchn_rwlock + * IRQ-desc lock ++ * percpu eoi_list_lock + */ + + static LIST_HEAD(xen_irq_list_head); +@@ -120,6 +132,8 @@ static struct irq_chip xen_pirq_chip; + static void enable_dynirq(struct irq_data *data); + static void disable_dynirq(struct irq_data *data); + ++static DEFINE_PER_CPU(unsigned int, irq_epoch); ++ + static void clear_evtchn_to_irq_row(unsigned row) + { + unsigned col; +@@ -399,17 +413,120 @@ void notify_remote_via_irq(int irq) + } + EXPORT_SYMBOL_GPL(notify_remote_via_irq); + ++struct lateeoi_work { ++ struct delayed_work delayed; ++ spinlock_t eoi_list_lock; ++ struct list_head eoi_list; ++}; ++ ++static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); ++ ++static void lateeoi_list_del(struct irq_info *info) ++{ ++ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&eoi->eoi_list_lock, flags); ++ list_del_init(&info->eoi_list); ++ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); ++} ++ ++static void lateeoi_list_add(struct irq_info *info) ++{ ++ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); ++ struct irq_info *elem; ++ u64 now = get_jiffies_64(); ++ unsigned long delay; ++ unsigned long flags; ++ ++ if (now < info->eoi_time) ++ delay = info->eoi_time - now; ++ else ++ delay = 1; ++ ++ spin_lock_irqsave(&eoi->eoi_list_lock, flags); ++ ++ if (list_empty(&eoi->eoi_list)) { ++ list_add(&info->eoi_list, &eoi->eoi_list); ++ mod_delayed_work_on(info->eoi_cpu, system_wq, ++ &eoi->delayed, delay); ++ } else { ++ list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { ++ if (elem->eoi_time <= info->eoi_time) ++ break; ++ } ++ list_add(&info->eoi_list, &elem->eoi_list); ++ } ++ ++ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); ++} ++ + static void xen_irq_lateeoi_locked(struct irq_info *info) + { + evtchn_port_t evtchn; ++ unsigned int cpu; + + evtchn = info->evtchn; +- if (!VALID_EVTCHN(evtchn)) ++ if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) + return; + ++ cpu = info->eoi_cpu; ++ if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) { ++ lateeoi_list_add(info); ++ return; ++ } ++ ++ info->eoi_time = 0; + unmask_evtchn(evtchn); + } + ++static void xen_irq_lateeoi_worker(struct work_struct *work) ++{ ++ struct lateeoi_work *eoi; ++ struct irq_info *info; ++ u64 now = get_jiffies_64(); ++ unsigned long flags; ++ ++ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); ++ ++ read_lock_irqsave(&evtchn_rwlock, flags); ++ ++ while (true) { ++ spin_lock(&eoi->eoi_list_lock); ++ ++ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, ++ eoi_list); ++ ++ if (info == NULL || now < info->eoi_time) { ++ spin_unlock(&eoi->eoi_list_lock); ++ break; ++ } ++ ++ list_del_init(&info->eoi_list); ++ ++ spin_unlock(&eoi->eoi_list_lock); ++ ++ info->eoi_time = 0; ++ ++ xen_irq_lateeoi_locked(info); ++ } ++ ++ if (info) ++ mod_delayed_work_on(info->eoi_cpu, system_wq, ++ &eoi->delayed, info->eoi_time - now); ++ ++ read_unlock_irqrestore(&evtchn_rwlock, flags); ++} ++ ++static void xen_cpu_init_eoi(unsigned int cpu) ++{ ++ struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); ++ ++ INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); ++ spin_lock_init(&eoi->eoi_list_lock); ++ INIT_LIST_HEAD(&eoi->eoi_list); ++} ++ + void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) + { + struct irq_info *info; +@@ -429,6 +546,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi); + static void xen_irq_init(unsigned irq) + { + struct irq_info *info; ++ + #ifdef CONFIG_SMP + /* By default all event channels notify CPU#0. */ + cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); +@@ -443,6 +561,7 @@ static void xen_irq_init(unsigned irq) + + set_info_for_irq(irq, info); + ++ INIT_LIST_HEAD(&info->eoi_list); + list_add_tail(&info->list, &xen_irq_list_head); + } + +@@ -498,6 +617,9 @@ static void xen_free_irq(unsigned irq) + + write_lock_irqsave(&evtchn_rwlock, flags); + ++ if (!list_empty(&info->eoi_list)) ++ lateeoi_list_del(info); ++ + list_del(&info->list); + + set_info_for_irq(irq, NULL); +@@ -1358,17 +1480,66 @@ void xen_send_IPI_one(unsigned int cpu, + notify_remote_via_irq(irq); + } + ++struct evtchn_loop_ctrl { ++ ktime_t timeout; ++ unsigned count; ++ bool defer_eoi; ++}; ++ ++void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) ++{ ++ int irq; ++ struct irq_info *info; ++ ++ irq = get_evtchn_to_irq(port); ++ if (irq == -1) ++ return; ++ ++ /* ++ * Check for timeout every 256 events. ++ * We are setting the timeout value only after the first 256 ++ * events in order to not hurt the common case of few loop ++ * iterations. The 256 is basically an arbitrary value. ++ * ++ * In case we are hitting the timeout we need to defer all further ++ * EOIs in order to ensure to leave the event handling loop rather ++ * sooner than later. ++ */ ++ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { ++ ktime_t kt = ktime_get(); ++ ++ if (!ctrl->timeout) { ++ kt = ktime_add_ms(kt, ++ jiffies_to_msecs(event_loop_timeout)); ++ ctrl->timeout = kt; ++ } else if (kt > ctrl->timeout) { ++ ctrl->defer_eoi = true; ++ } ++ } ++ ++ info = info_for_irq(irq); ++ ++ if (ctrl->defer_eoi) { ++ info->eoi_cpu = smp_processor_id(); ++ info->irq_epoch = __this_cpu_read(irq_epoch); ++ info->eoi_time = get_jiffies_64() + event_eoi_delay; ++ } ++ ++ generic_handle_irq(irq); ++} ++ + static void __xen_evtchn_do_upcall(void) + { + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + int cpu = smp_processor_id(); ++ struct evtchn_loop_ctrl ctrl = { 0 }; + + read_lock(&evtchn_rwlock); + + do { + vcpu_info->evtchn_upcall_pending = 0; + +- xen_evtchn_handle_events(cpu); ++ xen_evtchn_handle_events(cpu, &ctrl); + + BUG_ON(!irqs_disabled()); + +@@ -1377,6 +1548,13 @@ static void __xen_evtchn_do_upcall(void) + } while (vcpu_info->evtchn_upcall_pending); + + read_unlock(&evtchn_rwlock); ++ ++ /* ++ * Increment irq_epoch only now to defer EOIs only for ++ * xen_irq_lateeoi() invocations occurring from inside the loop ++ * above. ++ */ ++ __this_cpu_inc(irq_epoch); + } + + void xen_evtchn_do_upcall(struct pt_regs *regs) +@@ -1825,9 +2003,6 @@ void xen_setup_callback_vector(void) {} + static inline void xen_alloc_callback_vector(void) {} + #endif + +-#undef MODULE_PARAM_PREFIX +-#define MODULE_PARAM_PREFIX "xen." +- + static bool fifo_events = true; + module_param(fifo_events, bool, 0); + +@@ -1835,6 +2010,8 @@ static int xen_evtchn_cpu_prepare(unsign + { + int ret = 0; + ++ xen_cpu_init_eoi(cpu); ++ + if (evtchn_ops->percpu_init) + ret = evtchn_ops->percpu_init(cpu); + +@@ -1861,6 +2038,8 @@ void __init xen_init_IRQ(void) + if (ret < 0) + xen_evtchn_2l_init(); + ++ xen_cpu_init_eoi(smp_processor_id()); ++ + cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, + "xen/evtchn:prepare", + xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); +--- a/drivers/xen/events/events_fifo.c ++++ b/drivers/xen/events/events_fifo.c +@@ -275,19 +275,9 @@ static uint32_t clear_linked(volatile ev + return w & EVTCHN_FIFO_LINK_MASK; + } + +-static void handle_irq_for_port(evtchn_port_t port) +-{ +- int irq; +- +- irq = get_evtchn_to_irq(port); +- if (irq != -1) +- generic_handle_irq(irq); +-} +- +-static void consume_one_event(unsigned cpu, ++static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl, + struct evtchn_fifo_control_block *control_block, +- unsigned priority, unsigned long *ready, +- bool drop) ++ unsigned priority, unsigned long *ready) + { + struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); + uint32_t head; +@@ -320,16 +310,17 @@ static void consume_one_event(unsigned c + clear_bit(priority, ready); + + if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { +- if (unlikely(drop)) ++ if (unlikely(!ctrl)) + pr_warn("Dropping pending event for port %u\n", port); + else +- handle_irq_for_port(port); ++ handle_irq_for_port(port, ctrl); + } + + q->head[priority] = head; + } + +-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) ++static void __evtchn_fifo_handle_events(unsigned cpu, ++ struct evtchn_loop_ctrl *ctrl) + { + struct evtchn_fifo_control_block *control_block; + unsigned long ready; +@@ -341,14 +332,15 @@ static void __evtchn_fifo_handle_events( + + while (ready) { + q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); +- consume_one_event(cpu, control_block, q, &ready, drop); ++ consume_one_event(cpu, ctrl, control_block, q, &ready); + ready |= xchg(&control_block->ready, 0); + } + } + +-static void evtchn_fifo_handle_events(unsigned cpu) ++static void evtchn_fifo_handle_events(unsigned cpu, ++ struct evtchn_loop_ctrl *ctrl) + { +- __evtchn_fifo_handle_events(cpu, false); ++ __evtchn_fifo_handle_events(cpu, ctrl); + } + + static void evtchn_fifo_resume(void) +@@ -416,7 +408,7 @@ static int evtchn_fifo_percpu_init(unsig + + static int evtchn_fifo_percpu_deinit(unsigned int cpu) + { +- __evtchn_fifo_handle_events(cpu, true); ++ __evtchn_fifo_handle_events(cpu, NULL); + return 0; + } + +--- a/drivers/xen/events/events_internal.h ++++ b/drivers/xen/events/events_internal.h +@@ -30,11 +30,15 @@ enum xen_irq_type { + */ + struct irq_info { + struct list_head list; ++ struct list_head eoi_list; + int refcnt; + enum xen_irq_type type; /* type */ + unsigned irq; + evtchn_port_t evtchn; /* event channel */ + unsigned short cpu; /* cpu bound */ ++ unsigned short eoi_cpu; /* EOI must happen on this cpu */ ++ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ ++ u64 eoi_time; /* Time in jiffies when to EOI. */ + + union { + unsigned short virq; +@@ -53,6 +57,8 @@ struct irq_info { + #define PIRQ_SHAREABLE (1 << 1) + #define PIRQ_MSI_GROUP (1 << 2) + ++struct evtchn_loop_ctrl; ++ + struct evtchn_ops { + unsigned (*max_channels)(void); + unsigned (*nr_channels)(void); +@@ -67,7 +73,7 @@ struct evtchn_ops { + void (*mask)(evtchn_port_t port); + void (*unmask)(evtchn_port_t port); + +- void (*handle_events)(unsigned cpu); ++ void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl); + void (*resume)(void); + + int (*percpu_init)(unsigned int cpu); +@@ -78,6 +84,7 @@ extern const struct evtchn_ops *evtchn_o + + extern int **evtchn_to_irq; + int get_evtchn_to_irq(evtchn_port_t evtchn); ++void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); + + struct irq_info *info_for_irq(unsigned irq); + unsigned cpu_from_irq(unsigned irq); +@@ -135,9 +142,10 @@ static inline void unmask_evtchn(evtchn_ + return evtchn_ops->unmask(port); + } + +-static inline void xen_evtchn_handle_events(unsigned cpu) ++static inline void xen_evtchn_handle_events(unsigned cpu, ++ struct evtchn_loop_ctrl *ctrl) + { +- return evtchn_ops->handle_events(cpu); ++ return evtchn_ops->handle_events(cpu, ctrl); + } + + static inline void xen_evtchn_resume(void) diff --git a/queue-5.9/xen-events-fix-race-in-evtchn_fifo_unmask.patch b/queue-5.9/xen-events-fix-race-in-evtchn_fifo_unmask.patch new file mode 100644 index 00000000000..794ecfc2e3b --- /dev/null +++ b/queue-5.9/xen-events-fix-race-in-evtchn_fifo_unmask.patch @@ -0,0 +1,66 @@ +From f01337197419b7e8a492e83089552b77d3b5fb90 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 20 Oct 2020 06:52:55 +0200 +Subject: xen/events: fix race in evtchn_fifo_unmask() + +From: Juergen Gross + +commit f01337197419b7e8a492e83089552b77d3b5fb90 upstream. + +Unmasking a fifo event channel can result in unmasking it twice, once +directly in the kernel and once via a hypercall in case the event was +pending. + +Fix that by doing the local unmask only if the event is not pending. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_fifo.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/drivers/xen/events/events_fifo.c ++++ b/drivers/xen/events/events_fifo.c +@@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(evtchn + return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); + } + /* +- * Clear MASKED, spinning if BUSY is set. ++ * Clear MASKED if not PENDING, spinning if BUSY is set. ++ * Return true if mask was cleared. + */ +-static void clear_masked(volatile event_word_t *word) ++static bool clear_masked_cond(volatile event_word_t *word) + { + event_word_t new, old, w; + + w = *word; + + do { ++ if (w & (1 << EVTCHN_FIFO_PENDING)) ++ return false; ++ + old = w & ~(1 << EVTCHN_FIFO_BUSY); + new = old & ~(1 << EVTCHN_FIFO_MASKED); + w = sync_cmpxchg(word, old, new); + } while (w != old); ++ ++ return true; + } + + static void evtchn_fifo_unmask(evtchn_port_t port) +@@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(evtchn_po + + BUG_ON(!irqs_disabled()); + +- clear_masked(word); +- if (evtchn_fifo_is_pending(port)) { ++ if (!clear_masked_cond(word)) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } diff --git a/queue-5.9/xen-events-switch-user-event-channels-to-lateeoi-model.patch b/queue-5.9/xen-events-switch-user-event-channels-to-lateeoi-model.patch new file mode 100644 index 00000000000..deb554354c5 --- /dev/null +++ b/queue-5.9/xen-events-switch-user-event-channels-to-lateeoi-model.patch @@ -0,0 +1,57 @@ +From c44b849cee8c3ac587da3b0980e01f77500d158c Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:29 +0200 +Subject: xen/events: switch user event channels to lateeoi model + +From: Juergen Gross + +commit c44b849cee8c3ac587da3b0980e01f77500d158c upstream. + +Instead of disabling the irq when an event is received and enabling +it again when handled by the user process use the lateeoi model. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Tested-by: Stefano Stabellini +Reviewed-by: Stefano Stabellini +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/evtchn.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/drivers/xen/evtchn.c ++++ b/drivers/xen/evtchn.c +@@ -167,7 +167,6 @@ static irqreturn_t evtchn_interrupt(int + "Interrupt for port %u, but apparently not enabled; per-user %p\n", + evtchn->port, u); + +- disable_irq_nosync(irq); + evtchn->enabled = false; + + spin_lock(&u->ring_prod_lock); +@@ -293,7 +292,7 @@ static ssize_t evtchn_write(struct file + evtchn = find_evtchn(u, port); + if (evtchn && !evtchn->enabled) { + evtchn->enabled = true; +- enable_irq(irq_from_evtchn(port)); ++ xen_irq_lateeoi(irq_from_evtchn(port), 0); + } + } + +@@ -393,8 +392,8 @@ static int evtchn_bind_to_user(struct pe + if (rc < 0) + goto err; + +- rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, +- u->name, evtchn); ++ rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, ++ u->name, evtchn); + if (rc < 0) + goto err; + diff --git a/queue-5.9/xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch b/queue-5.9/xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch new file mode 100644 index 00000000000..46218002dd9 --- /dev/null +++ b/queue-5.9/xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch @@ -0,0 +1,161 @@ +From 7beb290caa2adb0a399e735a1e175db9aae0523a Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Sun, 13 Sep 2020 14:23:02 +0200 +Subject: xen/events: use a common cpu hotplug hook for event channels + +From: Juergen Gross + +commit 7beb290caa2adb0a399e735a1e175db9aae0523a upstream. + +Today only fifo event channels have a cpu hotplug callback. In order +to prepare for more percpu (de)init work move that callback into +events_base.c and add percpu_init() and percpu_deinit() hooks to +struct evtchn_ops. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/events/events_base.c | 25 +++++++++++++++++++++ + drivers/xen/events/events_fifo.c | 40 ++++++++++++++++------------------- + drivers/xen/events/events_internal.h | 3 ++ + 3 files changed, 47 insertions(+), 21 deletions(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_X86 + #include +@@ -1830,6 +1831,26 @@ static inline void xen_alloc_callback_ve + static bool fifo_events = true; + module_param(fifo_events, bool, 0); + ++static int xen_evtchn_cpu_prepare(unsigned int cpu) ++{ ++ int ret = 0; ++ ++ if (evtchn_ops->percpu_init) ++ ret = evtchn_ops->percpu_init(cpu); ++ ++ return ret; ++} ++ ++static int xen_evtchn_cpu_dead(unsigned int cpu) ++{ ++ int ret = 0; ++ ++ if (evtchn_ops->percpu_deinit) ++ ret = evtchn_ops->percpu_deinit(cpu); ++ ++ return ret; ++} ++ + void __init xen_init_IRQ(void) + { + int ret = -EINVAL; +@@ -1840,6 +1861,10 @@ void __init xen_init_IRQ(void) + if (ret < 0) + xen_evtchn_2l_init(); + ++ cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, ++ "xen/evtchn:prepare", ++ xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); ++ + evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), + sizeof(*evtchn_to_irq), GFP_KERNEL); + BUG_ON(!evtchn_to_irq); +--- a/drivers/xen/events/events_fifo.c ++++ b/drivers/xen/events/events_fifo.c +@@ -385,21 +385,6 @@ static void evtchn_fifo_resume(void) + event_array_pages = 0; + } + +-static const struct evtchn_ops evtchn_ops_fifo = { +- .max_channels = evtchn_fifo_max_channels, +- .nr_channels = evtchn_fifo_nr_channels, +- .setup = evtchn_fifo_setup, +- .bind_to_cpu = evtchn_fifo_bind_to_cpu, +- .clear_pending = evtchn_fifo_clear_pending, +- .set_pending = evtchn_fifo_set_pending, +- .is_pending = evtchn_fifo_is_pending, +- .test_and_set_mask = evtchn_fifo_test_and_set_mask, +- .mask = evtchn_fifo_mask, +- .unmask = evtchn_fifo_unmask, +- .handle_events = evtchn_fifo_handle_events, +- .resume = evtchn_fifo_resume, +-}; +- + static int evtchn_fifo_alloc_control_block(unsigned cpu) + { + void *control_block = NULL; +@@ -422,19 +407,36 @@ static int evtchn_fifo_alloc_control_blo + return ret; + } + +-static int xen_evtchn_cpu_prepare(unsigned int cpu) ++static int evtchn_fifo_percpu_init(unsigned int cpu) + { + if (!per_cpu(cpu_control_block, cpu)) + return evtchn_fifo_alloc_control_block(cpu); + return 0; + } + +-static int xen_evtchn_cpu_dead(unsigned int cpu) ++static int evtchn_fifo_percpu_deinit(unsigned int cpu) + { + __evtchn_fifo_handle_events(cpu, true); + return 0; + } + ++static const struct evtchn_ops evtchn_ops_fifo = { ++ .max_channels = evtchn_fifo_max_channels, ++ .nr_channels = evtchn_fifo_nr_channels, ++ .setup = evtchn_fifo_setup, ++ .bind_to_cpu = evtchn_fifo_bind_to_cpu, ++ .clear_pending = evtchn_fifo_clear_pending, ++ .set_pending = evtchn_fifo_set_pending, ++ .is_pending = evtchn_fifo_is_pending, ++ .test_and_set_mask = evtchn_fifo_test_and_set_mask, ++ .mask = evtchn_fifo_mask, ++ .unmask = evtchn_fifo_unmask, ++ .handle_events = evtchn_fifo_handle_events, ++ .resume = evtchn_fifo_resume, ++ .percpu_init = evtchn_fifo_percpu_init, ++ .percpu_deinit = evtchn_fifo_percpu_deinit, ++}; ++ + int __init xen_evtchn_fifo_init(void) + { + int cpu = smp_processor_id(); +@@ -448,9 +450,5 @@ int __init xen_evtchn_fifo_init(void) + + evtchn_ops = &evtchn_ops_fifo; + +- cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, +- "xen/evtchn:prepare", +- xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); +- + return ret; + } +--- a/drivers/xen/events/events_internal.h ++++ b/drivers/xen/events/events_internal.h +@@ -69,6 +69,9 @@ struct evtchn_ops { + + void (*handle_events)(unsigned cpu); + void (*resume)(void); ++ ++ int (*percpu_init)(unsigned int cpu); ++ int (*percpu_deinit)(unsigned int cpu); + }; + + extern const struct evtchn_ops *evtchn_ops; diff --git a/queue-5.9/xen-netback-use-lateeoi-irq-binding.patch b/queue-5.9/xen-netback-use-lateeoi-irq-binding.patch new file mode 100644 index 00000000000..8ec49d110eb --- /dev/null +++ b/queue-5.9/xen-netback-use-lateeoi-irq-binding.patch @@ -0,0 +1,256 @@ +From 23025393dbeb3b8b3b60ebfa724cdae384992e27 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:28 +0200 +Subject: xen/netback: use lateeoi irq binding + +From: Juergen Gross + +commit 23025393dbeb3b8b3b60ebfa724cdae384992e27 upstream. + +In order to reduce the chance for the system becoming unresponsive due +to event storms triggered by a misbehaving netfront use the lateeoi +irq binding for netback and unmask the event channel only just before +going to sleep waiting for new events. + +Make sure not to issue an EOI when none is pending by introducing an +eoi_pending element to struct xenvif_queue. + +When no request has been consumed set the spurious flag when sending +the EOI for an interrupt. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/xen-netback/common.h | 15 ++++++++ + drivers/net/xen-netback/interface.c | 61 ++++++++++++++++++++++++++++++------ + drivers/net/xen-netback/netback.c | 11 +++++- + drivers/net/xen-netback/rx.c | 13 +++++-- + 4 files changed, 86 insertions(+), 14 deletions(-) + +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data + char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ + struct xenvif *vif; /* Parent VIF */ + ++ /* ++ * TX/RX common EOI handling. ++ * When feature-split-event-channels = 0, interrupt handler sets ++ * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set ++ * by the RX and TX interrupt handlers. ++ * RX and TX handler threads will issue an EOI when either ++ * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or ++ * NETBK_TX_EOI) are set and they will reset those bits. ++ */ ++ atomic_t eoi_pending; ++#define NETBK_RX_EOI 0x01 ++#define NETBK_TX_EOI 0x02 ++#define NETBK_COMMON_EOI 0x04 ++ + /* Use NAPI for guest TX */ + struct napi_struct napi; + /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ +@@ -378,6 +392,7 @@ int xenvif_dealloc_kthread(void *data); + + irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); + ++bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); + void xenvif_rx_action(struct xenvif_queue *queue); + void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); + +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vi + !vif->disabled; + } + ++static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue) ++{ ++ bool rc; ++ ++ rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); ++ if (rc) ++ napi_schedule(&queue->napi); ++ return rc; ++} ++ + static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) + { + struct xenvif_queue *queue = dev_id; ++ int old; + +- if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)) +- napi_schedule(&queue->napi); ++ old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending); ++ WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n"); ++ ++ if (!xenvif_handle_tx_interrupt(queue)) { ++ atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending); ++ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); ++ } + + return IRQ_HANDLED; + } +@@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struc + return work_done; + } + ++static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue) ++{ ++ bool rc; ++ ++ rc = xenvif_have_rx_work(queue, false); ++ if (rc) ++ xenvif_kick_thread(queue); ++ return rc; ++} ++ + static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) + { + struct xenvif_queue *queue = dev_id; ++ int old; + +- xenvif_kick_thread(queue); ++ old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending); ++ WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n"); ++ ++ if (!xenvif_handle_rx_interrupt(queue)) { ++ atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending); ++ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); ++ } + + return IRQ_HANDLED; + } + + irqreturn_t xenvif_interrupt(int irq, void *dev_id) + { +- xenvif_tx_interrupt(irq, dev_id); +- xenvif_rx_interrupt(irq, dev_id); ++ struct xenvif_queue *queue = dev_id; ++ int old; ++ ++ old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending); ++ WARN(old, "Interrupt while EOI pending\n"); ++ ++ /* Use bitwise or as we need to call both functions. */ ++ if ((!xenvif_handle_tx_interrupt(queue) | ++ !xenvif_handle_rx_interrupt(queue))) { ++ atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending); ++ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); ++ } + + return IRQ_HANDLED; + } +@@ -605,7 +648,7 @@ int xenvif_connect_ctrl(struct xenvif *v + if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl)) + goto err_unmap; + +- err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); ++ err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn); + if (err < 0) + goto err_unmap; + +@@ -709,7 +752,7 @@ int xenvif_connect_data(struct xenvif_qu + + if (tx_evtchn == rx_evtchn) { + /* feature-split-event-channels == 0 */ +- err = bind_interdomain_evtchn_to_irqhandler( ++ err = bind_interdomain_evtchn_to_irqhandler_lateeoi( + queue->vif->domid, tx_evtchn, xenvif_interrupt, 0, + queue->name, queue); + if (err < 0) +@@ -720,7 +763,7 @@ int xenvif_connect_data(struct xenvif_qu + /* feature-split-event-channels == 1 */ + snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), + "%s-tx", queue->name); +- err = bind_interdomain_evtchn_to_irqhandler( ++ err = bind_interdomain_evtchn_to_irqhandler_lateeoi( + queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, + queue->tx_irq_name, queue); + if (err < 0) +@@ -730,7 +773,7 @@ int xenvif_connect_data(struct xenvif_qu + + snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), + "%s-rx", queue->name); +- err = bind_interdomain_evtchn_to_irqhandler( ++ err = bind_interdomain_evtchn_to_irqhandler_lateeoi( + queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, + queue->rx_irq_name, queue); + if (err < 0) +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -169,6 +169,10 @@ void xenvif_napi_schedule_or_enable_even + + if (more_to_do) + napi_schedule(&queue->napi); ++ else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI, ++ &queue->eoi_pending) & ++ (NETBK_TX_EOI | NETBK_COMMON_EOI)) ++ xen_irq_lateeoi(queue->tx_irq, 0); + } + + static void tx_add_credit(struct xenvif_queue *queue) +@@ -1643,9 +1647,14 @@ static bool xenvif_ctrl_work_todo(struct + irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) + { + struct xenvif *vif = data; ++ unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS; + +- while (xenvif_ctrl_work_todo(vif)) ++ while (xenvif_ctrl_work_todo(vif)) { + xenvif_ctrl_action(vif); ++ eoi_flag = 0; ++ } ++ ++ xen_irq_lateeoi(irq, eoi_flag); + + return IRQ_HANDLED; + } +--- a/drivers/net/xen-netback/rx.c ++++ b/drivers/net/xen-netback/rx.c +@@ -503,13 +503,13 @@ static bool xenvif_rx_queue_ready(struct + return queue->stalled && prod - cons >= 1; + } + +-static bool xenvif_have_rx_work(struct xenvif_queue *queue) ++bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) + { + return xenvif_rx_ring_slots_available(queue) || + (queue->vif->stall_timeout && + (xenvif_rx_queue_stalled(queue) || + xenvif_rx_queue_ready(queue))) || +- kthread_should_stop() || ++ (test_kthread && kthread_should_stop()) || + queue->vif->disabled; + } + +@@ -540,15 +540,20 @@ static void xenvif_wait_for_rx_work(stru + { + DEFINE_WAIT(wait); + +- if (xenvif_have_rx_work(queue)) ++ if (xenvif_have_rx_work(queue, true)) + return; + + for (;;) { + long ret; + + prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); +- if (xenvif_have_rx_work(queue)) ++ if (xenvif_have_rx_work(queue, true)) + break; ++ if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI, ++ &queue->eoi_pending) & ++ (NETBK_RX_EOI | NETBK_COMMON_EOI)) ++ xen_irq_lateeoi(queue->rx_irq, 0); ++ + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); + if (!ret) + break; diff --git a/queue-5.9/xen-pciback-use-lateeoi-irq-binding.patch b/queue-5.9/xen-pciback-use-lateeoi-irq-binding.patch new file mode 100644 index 00000000000..9c4d8890d5a --- /dev/null +++ b/queue-5.9/xen-pciback-use-lateeoi-irq-binding.patch @@ -0,0 +1,223 @@ +From c2711441bc961b37bba0615dd7135857d189035f Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:29 +0200 +Subject: xen/pciback: use lateeoi irq binding + +From: Juergen Gross + +commit c2711441bc961b37bba0615dd7135857d189035f upstream. + +In order to reduce the chance for the system becoming unresponsive due +to event storms triggered by a misbehaving pcifront use the lateeoi irq +binding for pciback and unmask the event channel only just before +leaving the event handling function. + +Restructure the handling to support that scheme. Basically an event can +come in for two reasons: either a normal request for a pciback action, +which is handled in a worker, or in case the guest has finished an AER +request which was requested by pciback. + +When an AER request is issued to the guest and a normal pciback action +is currently active issue an EOI early in order to be able to receive +another event when the AER request has been finished by the guest. + +Let the worker processing the normal requests run until no further +request is pending, instead of starting a new worker ion that case. +Issue the EOI only just before leaving the worker. + +This scheme allows to drop calling the generic function +xen_pcibk_test_and_schedule_op() after processing of any request as +the handling of both request types is now separated more cleanly. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/xen-pciback/pci_stub.c | 13 ++++----- + drivers/xen/xen-pciback/pciback.h | 12 +++++++- + drivers/xen/xen-pciback/pciback_ops.c | 48 ++++++++++++++++++++++++++-------- + drivers/xen/xen-pciback/xenbus.c | 2 - + 4 files changed, 56 insertions(+), 19 deletions(-) + +--- a/drivers/xen/xen-pciback/pci_stub.c ++++ b/drivers/xen/xen-pciback/pci_stub.c +@@ -734,10 +734,17 @@ static pci_ers_result_t common_process(s + wmb(); + notify_remote_via_irq(pdev->evtchn_irq); + ++ /* Enable IRQ to signal "request done". */ ++ xen_pcibk_lateeoi(pdev, 0); ++ + ret = wait_event_timeout(xen_pcibk_aer_wait_queue, + !(test_bit(_XEN_PCIB_active, (unsigned long *) + &sh_info->flags)), 300*HZ); + ++ /* Enable IRQ for pcifront request if not already active. */ ++ if (!test_bit(_PDEVF_op_active, &pdev->flags)) ++ xen_pcibk_lateeoi(pdev, 0); ++ + if (!ret) { + if (test_bit(_XEN_PCIB_active, + (unsigned long *)&sh_info->flags)) { +@@ -751,12 +758,6 @@ static pci_ers_result_t common_process(s + } + clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); + +- if (test_bit(_XEN_PCIF_active, +- (unsigned long *)&sh_info->flags)) { +- dev_dbg(&psdev->dev->dev, "schedule pci_conf service\n"); +- xen_pcibk_test_and_schedule_op(psdev->pdev); +- } +- + res = (pci_ers_result_t)aer_op->err; + return res; + } +--- a/drivers/xen/xen-pciback/pciback.h ++++ b/drivers/xen/xen-pciback/pciback.h +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + + #define DRV_NAME "xen-pciback" +@@ -27,6 +28,8 @@ struct pci_dev_entry { + #define PDEVF_op_active (1<<(_PDEVF_op_active)) + #define _PCIB_op_pending (1) + #define PCIB_op_pending (1<<(_PCIB_op_pending)) ++#define _EOI_pending (2) ++#define EOI_pending (1<<(_EOI_pending)) + + struct xen_pcibk_device { + void *pci_dev_data; +@@ -183,10 +186,15 @@ static inline void xen_pcibk_release_dev + irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); + void xen_pcibk_do_op(struct work_struct *data); + ++static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev, ++ unsigned int eoi_flag) ++{ ++ if (test_and_clear_bit(_EOI_pending, &pdev->flags)) ++ xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag); ++} ++ + int xen_pcibk_xenbus_register(void); + void xen_pcibk_xenbus_unregister(void); +- +-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); + #endif + + /* Handles shared IRQs that can to device domain and control domain. */ +--- a/drivers/xen/xen-pciback/pciback_ops.c ++++ b/drivers/xen/xen-pciback/pciback_ops.c +@@ -276,26 +276,41 @@ int xen_pcibk_disable_msix(struct xen_pc + return 0; + } + #endif ++ ++static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev) ++{ ++ return test_bit(_XEN_PCIF_active, ++ (unsigned long *)&pdev->sh_info->flags) && ++ !test_and_set_bit(_PDEVF_op_active, &pdev->flags); ++} ++ + /* + * Now the same evtchn is used for both pcifront conf_read_write request + * as well as pcie aer front end ack. We use a new work_queue to schedule + * xen_pcibk conf_read_write service for avoiding confict with aer_core + * do_recovery job which also use the system default work_queue + */ +-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) ++static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) + { ++ bool eoi = true; ++ + /* Check that frontend is requesting an operation and that we are not + * already processing a request */ +- if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) +- && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { ++ if (xen_pcibk_test_op_pending(pdev)) { + schedule_work(&pdev->op_work); ++ eoi = false; + } + /*_XEN_PCIB_active should have been cleared by pcifront. And also make + sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ + if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) + && test_bit(_PCIB_op_pending, &pdev->flags)) { + wake_up(&xen_pcibk_aer_wait_queue); ++ eoi = false; + } ++ ++ /* EOI if there was nothing to do. */ ++ if (eoi) ++ xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS); + } + + /* Performing the configuration space reads/writes must not be done in atomic +@@ -303,10 +318,8 @@ void xen_pcibk_test_and_schedule_op(stru + * use of semaphores). This function is intended to be called from a work + * queue in process context taking a struct xen_pcibk_device as a parameter */ + +-void xen_pcibk_do_op(struct work_struct *data) ++static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev) + { +- struct xen_pcibk_device *pdev = +- container_of(data, struct xen_pcibk_device, op_work); + struct pci_dev *dev; + struct xen_pcibk_dev_data *dev_data = NULL; + struct xen_pci_op *op = &pdev->op; +@@ -379,16 +392,31 @@ void xen_pcibk_do_op(struct work_struct + smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ + clear_bit(_PDEVF_op_active, &pdev->flags); + smp_mb__after_atomic(); /* /before/ final check for work */ ++} + +- /* Check to see if the driver domain tried to start another request in +- * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. +- */ +- xen_pcibk_test_and_schedule_op(pdev); ++void xen_pcibk_do_op(struct work_struct *data) ++{ ++ struct xen_pcibk_device *pdev = ++ container_of(data, struct xen_pcibk_device, op_work); ++ ++ do { ++ xen_pcibk_do_one_op(pdev); ++ } while (xen_pcibk_test_op_pending(pdev)); ++ ++ xen_pcibk_lateeoi(pdev, 0); + } + + irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) + { + struct xen_pcibk_device *pdev = dev_id; ++ bool eoi; ++ ++ /* IRQs might come in before pdev->evtchn_irq is written. */ ++ if (unlikely(pdev->evtchn_irq != irq)) ++ pdev->evtchn_irq = irq; ++ ++ eoi = test_and_set_bit(_EOI_pending, &pdev->flags); ++ WARN(eoi, "IRQ while EOI pending\n"); + + xen_pcibk_test_and_schedule_op(pdev); + +--- a/drivers/xen/xen-pciback/xenbus.c ++++ b/drivers/xen/xen-pciback/xenbus.c +@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xe + + pdev->sh_info = vaddr; + +- err = bind_interdomain_evtchn_to_irqhandler( ++ err = bind_interdomain_evtchn_to_irqhandler_lateeoi( + pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, + 0, DRV_NAME, pdev); + if (err < 0) { diff --git a/queue-5.9/xen-pvcallsback-use-lateeoi-irq-binding.patch b/queue-5.9/xen-pvcallsback-use-lateeoi-irq-binding.patch new file mode 100644 index 00000000000..b53b4cdc38e --- /dev/null +++ b/queue-5.9/xen-pvcallsback-use-lateeoi-irq-binding.patch @@ -0,0 +1,230 @@ +From c8d647a326f06a39a8e5f0f1af946eacfa1835f8 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:28 +0200 +Subject: xen/pvcallsback: use lateeoi irq binding + +From: Juergen Gross + +commit c8d647a326f06a39a8e5f0f1af946eacfa1835f8 upstream. + +In order to reduce the chance for the system becoming unresponsive due +to event storms triggered by a misbehaving pvcallsfront use the lateeoi +irq binding for pvcallsback and unmask the event channel only after +handling all write requests, which are the ones coming in via an irq. + +This requires modifying the logic a little bit to not require an event +for each write request, but to keep the ioworker running until no +further data is found on the ring page to be processed. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Stefano Stabellini +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/pvcalls-back.c | 76 +++++++++++++++++++++++++++------------------ + 1 file changed, 46 insertions(+), 30 deletions(-) + +--- a/drivers/xen/pvcalls-back.c ++++ b/drivers/xen/pvcalls-back.c +@@ -66,6 +66,7 @@ struct sock_mapping { + atomic_t write; + atomic_t io; + atomic_t release; ++ atomic_t eoi; + void (*saved_data_ready)(struct sock *sk); + struct pvcalls_ioworker ioworker; + }; +@@ -87,7 +88,7 @@ static int pvcalls_back_release_active(s + struct pvcalls_fedata *fedata, + struct sock_mapping *map); + +-static void pvcalls_conn_back_read(void *opaque) ++static bool pvcalls_conn_back_read(void *opaque) + { + struct sock_mapping *map = (struct sock_mapping *)opaque; + struct msghdr msg; +@@ -107,17 +108,17 @@ static void pvcalls_conn_back_read(void + virt_mb(); + + if (error) +- return; ++ return false; + + size = pvcalls_queued(prod, cons, array_size); + if (size >= array_size) +- return; ++ return false; + spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); + if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) { + atomic_set(&map->read, 0); + spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, + flags); +- return; ++ return true; + } + spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); + wanted = array_size - size; +@@ -141,7 +142,7 @@ static void pvcalls_conn_back_read(void + ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT); + WARN_ON(ret > wanted); + if (ret == -EAGAIN) /* shouldn't happen */ +- return; ++ return true; + if (!ret) + ret = -ENOTCONN; + spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); +@@ -160,10 +161,10 @@ static void pvcalls_conn_back_read(void + virt_wmb(); + notify_remote_via_irq(map->irq); + +- return; ++ return true; + } + +-static void pvcalls_conn_back_write(struct sock_mapping *map) ++static bool pvcalls_conn_back_write(struct sock_mapping *map) + { + struct pvcalls_data_intf *intf = map->ring; + struct pvcalls_data *data = &map->data; +@@ -180,7 +181,7 @@ static void pvcalls_conn_back_write(stru + array_size = XEN_FLEX_RING_SIZE(map->ring_order); + size = pvcalls_queued(prod, cons, array_size); + if (size == 0) +- return; ++ return false; + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags |= MSG_DONTWAIT; +@@ -198,12 +199,11 @@ static void pvcalls_conn_back_write(stru + + atomic_set(&map->write, 0); + ret = inet_sendmsg(map->sock, &msg, size); +- if (ret == -EAGAIN || (ret >= 0 && ret < size)) { ++ if (ret == -EAGAIN) { + atomic_inc(&map->write); + atomic_inc(&map->io); ++ return true; + } +- if (ret == -EAGAIN) +- return; + + /* write the data, then update the indexes */ + virt_wmb(); +@@ -216,9 +216,13 @@ static void pvcalls_conn_back_write(stru + } + /* update the indexes, then notify the other end */ + virt_wmb(); +- if (prod != cons + ret) ++ if (prod != cons + ret) { + atomic_inc(&map->write); ++ atomic_inc(&map->io); ++ } + notify_remote_via_irq(map->irq); ++ ++ return true; + } + + static void pvcalls_back_ioworker(struct work_struct *work) +@@ -227,6 +231,7 @@ static void pvcalls_back_ioworker(struct + struct pvcalls_ioworker, register_work); + struct sock_mapping *map = container_of(ioworker, struct sock_mapping, + ioworker); ++ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; + + while (atomic_read(&map->io) > 0) { + if (atomic_read(&map->release) > 0) { +@@ -234,10 +239,18 @@ static void pvcalls_back_ioworker(struct + return; + } + +- if (atomic_read(&map->read) > 0) +- pvcalls_conn_back_read(map); +- if (atomic_read(&map->write) > 0) +- pvcalls_conn_back_write(map); ++ if (atomic_read(&map->read) > 0 && ++ pvcalls_conn_back_read(map)) ++ eoi_flags = 0; ++ if (atomic_read(&map->write) > 0 && ++ pvcalls_conn_back_write(map)) ++ eoi_flags = 0; ++ ++ if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) { ++ atomic_set(&map->eoi, 0); ++ xen_irq_lateeoi(map->irq, eoi_flags); ++ eoi_flags = XEN_EOI_FLAG_SPURIOUS; ++ } + + atomic_dec(&map->io); + } +@@ -334,12 +347,9 @@ static struct sock_mapping *pvcalls_new_ + goto out; + map->bytes = page; + +- ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, +- evtchn, +- pvcalls_back_conn_event, +- 0, +- "pvcalls-backend", +- map); ++ ret = bind_interdomain_evtchn_to_irqhandler_lateeoi( ++ fedata->dev->otherend_id, evtchn, ++ pvcalls_back_conn_event, 0, "pvcalls-backend", map); + if (ret < 0) + goto out; + map->irq = ret; +@@ -873,15 +883,18 @@ static irqreturn_t pvcalls_back_event(in + { + struct xenbus_device *dev = dev_id; + struct pvcalls_fedata *fedata = NULL; ++ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; + +- if (dev == NULL) +- return IRQ_HANDLED; ++ if (dev) { ++ fedata = dev_get_drvdata(&dev->dev); ++ if (fedata) { ++ pvcalls_back_work(fedata); ++ eoi_flags = 0; ++ } ++ } + +- fedata = dev_get_drvdata(&dev->dev); +- if (fedata == NULL) +- return IRQ_HANDLED; ++ xen_irq_lateeoi(irq, eoi_flags); + +- pvcalls_back_work(fedata); + return IRQ_HANDLED; + } + +@@ -891,12 +904,15 @@ static irqreturn_t pvcalls_back_conn_eve + struct pvcalls_ioworker *iow; + + if (map == NULL || map->sock == NULL || map->sock->sk == NULL || +- map->sock->sk->sk_user_data != map) ++ map->sock->sk->sk_user_data != map) { ++ xen_irq_lateeoi(irq, 0); + return IRQ_HANDLED; ++ } + + iow = &map->ioworker; + + atomic_inc(&map->write); ++ atomic_inc(&map->eoi); + atomic_inc(&map->io); + queue_work(iow->wq, &iow->register_work); + +@@ -932,7 +948,7 @@ static int backend_connect(struct xenbus + goto error; + } + +- err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn); ++ err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); + if (err < 0) + goto error; + fedata->irq = err; diff --git a/queue-5.9/xen-scsiback-use-lateeoi-irq-binding.patch b/queue-5.9/xen-scsiback-use-lateeoi-irq-binding.patch new file mode 100644 index 00000000000..66418244b58 --- /dev/null +++ b/queue-5.9/xen-scsiback-use-lateeoi-irq-binding.patch @@ -0,0 +1,106 @@ +From 86991b6e7ea6c613b7692f65106076943449b6b7 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Mon, 7 Sep 2020 15:47:28 +0200 +Subject: xen/scsiback: use lateeoi irq binding + +From: Juergen Gross + +commit 86991b6e7ea6c613b7692f65106076943449b6b7 upstream. + +In order to reduce the chance for the system becoming unresponsive due +to event storms triggered by a misbehaving scsifront use the lateeoi +irq binding for scsiback and unmask the event channel only just before +leaving the event handling function. + +In case of a ring protocol error don't issue an EOI in order to avoid +the possibility to use that for producing an event storm. This at once +will result in no further call of scsiback_irq_fn(), so the ring_error +struct member can be dropped and scsiback_do_cmd_fn() can signal the +protocol error via a negative return value. + +This is part of XSA-332. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Reviewed-by: Wei Liu +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/xen-scsiback.c | 23 +++++++++++++---------- + 1 file changed, 13 insertions(+), 10 deletions(-) + +--- a/drivers/xen/xen-scsiback.c ++++ b/drivers/xen/xen-scsiback.c +@@ -91,7 +91,6 @@ struct vscsibk_info { + unsigned int irq; + + struct vscsiif_back_ring ring; +- int ring_error; + + spinlock_t ring_lock; + atomic_t nr_unreplied_reqs; +@@ -722,7 +721,8 @@ static struct vscsibk_pend *prepare_pend + return pending_req; + } + +-static int scsiback_do_cmd_fn(struct vscsibk_info *info) ++static int scsiback_do_cmd_fn(struct vscsibk_info *info, ++ unsigned int *eoi_flags) + { + struct vscsiif_back_ring *ring = &info->ring; + struct vscsiif_request ring_req; +@@ -739,11 +739,12 @@ static int scsiback_do_cmd_fn(struct vsc + rc = ring->rsp_prod_pvt; + pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", + info->domid, rp, rc, rp - rc); +- info->ring_error = 1; +- return 0; ++ return -EINVAL; + } + + while ((rc != rp)) { ++ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; ++ + if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) + break; + +@@ -802,13 +803,16 @@ static int scsiback_do_cmd_fn(struct vsc + static irqreturn_t scsiback_irq_fn(int irq, void *dev_id) + { + struct vscsibk_info *info = dev_id; ++ int rc; ++ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; + +- if (info->ring_error) +- return IRQ_HANDLED; +- +- while (scsiback_do_cmd_fn(info)) ++ while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0) + cond_resched(); + ++ /* In case of a ring error we keep the event channel masked. */ ++ if (!rc) ++ xen_irq_lateeoi(irq, eoi_flags); ++ + return IRQ_HANDLED; + } + +@@ -829,7 +833,7 @@ static int scsiback_init_sring(struct vs + sring = (struct vscsiif_sring *)area; + BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); + +- err = bind_interdomain_evtchn_to_irq(info->domid, evtchn); ++ err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn); + if (err < 0) + goto unmap_page; + +@@ -1253,7 +1257,6 @@ static int scsiback_probe(struct xenbus_ + + info->domid = dev->otherend_id; + spin_lock_init(&info->ring_lock); +- info->ring_error = 0; + atomic_set(&info->nr_unreplied_reqs, 0); + init_waitqueue_head(&info->waiting_to_free); + info->dev = dev;