]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Nov 2020 11:42:11 +0000 (12:42 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 17 Nov 2020 11:42:11 +0000 (12:42 +0100)
added patches:
xen-blkback-use-lateeoi-irq-binding.patch
xen-events-add-a-new-late-eoi-evtchn-framework.patch
xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch
xen-events-avoid-removing-an-event-channel-while-handling-it.patch
xen-events-block-rogue-events-for-some-time.patch
xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch
xen-events-fix-race-in-evtchn_fifo_unmask.patch
xen-events-switch-user-event-channels-to-lateeoi-model.patch
xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch
xen-netback-use-lateeoi-irq-binding.patch
xen-pciback-use-lateeoi-irq-binding.patch
xen-pvcallsback-use-lateeoi-irq-binding.patch
xen-scsiback-use-lateeoi-irq-binding.patch

14 files changed:
queue-4.14/series
queue-4.14/xen-blkback-use-lateeoi-irq-binding.patch [new file with mode: 0644]
queue-4.14/xen-events-add-a-new-late-eoi-evtchn-framework.patch [new file with mode: 0644]
queue-4.14/xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch [new file with mode: 0644]
queue-4.14/xen-events-avoid-removing-an-event-channel-while-handling-it.patch [new file with mode: 0644]
queue-4.14/xen-events-block-rogue-events-for-some-time.patch [new file with mode: 0644]
queue-4.14/xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch [new file with mode: 0644]
queue-4.14/xen-events-fix-race-in-evtchn_fifo_unmask.patch [new file with mode: 0644]
queue-4.14/xen-events-switch-user-event-channels-to-lateeoi-model.patch [new file with mode: 0644]
queue-4.14/xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch [new file with mode: 0644]
queue-4.14/xen-netback-use-lateeoi-irq-binding.patch [new file with mode: 0644]
queue-4.14/xen-pciback-use-lateeoi-irq-binding.patch [new file with mode: 0644]
queue-4.14/xen-pvcallsback-use-lateeoi-irq-binding.patch [new file with mode: 0644]
queue-4.14/xen-scsiback-use-lateeoi-irq-binding.patch [new file with mode: 0644]

index 56f5295d44c5c959e8c5512a8aec5d2d2790f336..200a96501ca0b95a18a3d4dafe92544f2c5398c4 100644 (file)
@@ -66,3 +66,16 @@ x86-speculation-allow-ibpb-to-be-conditionally-enabled-on-cpus-with-always-on-st
 perf-core-fix-bad-use-of-igrab.patch
 perf-core-fix-crash-when-using-hw-tracing-kernel-filters.patch
 perf-core-fix-a-memory-leak-in-perf_event_parse_addr_filter.patch
+xen-events-avoid-removing-an-event-channel-while-handling-it.patch
+xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch
+xen-events-fix-race-in-evtchn_fifo_unmask.patch
+xen-events-add-a-new-late-eoi-evtchn-framework.patch
+xen-blkback-use-lateeoi-irq-binding.patch
+xen-netback-use-lateeoi-irq-binding.patch
+xen-scsiback-use-lateeoi-irq-binding.patch
+xen-pvcallsback-use-lateeoi-irq-binding.patch
+xen-pciback-use-lateeoi-irq-binding.patch
+xen-events-switch-user-event-channels-to-lateeoi-model.patch
+xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch
+xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch
+xen-events-block-rogue-events-for-some-time.patch
diff --git a/queue-4.14/xen-blkback-use-lateeoi-irq-binding.patch b/queue-4.14/xen-blkback-use-lateeoi-irq-binding.patch
new file mode 100644 (file)
index 0000000..2f28c27
--- /dev/null
@@ -0,0 +1,127 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:03 +0100
+Subject: xen/blkback: use lateeoi irq binding
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-7-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 01263a1fabe30b4d542f34c7e2364a22587ddaf2 upstream.
+
+In order to reduce the chance for the system becoming unresponsive due
+to event storms triggered by a misbehaving blkfront use the lateeoi
+irq binding for blkback and unmask the event channel only after
+processing all pending requests.
+
+As the thread processing requests is used to do purging work in regular
+intervals an EOI may be sent only after having received an event. If
+there was no pending I/O request flag the EOI as spurious.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/xen-blkback/blkback.c |   22 +++++++++++++++++-----
+ drivers/block/xen-blkback/xenbus.c  |    5 ++---
+ 2 files changed, 19 insertions(+), 8 deletions(-)
+
+--- a/drivers/block/xen-blkback/blkback.c
++++ b/drivers/block/xen-blkback/blkback.c
+@@ -183,7 +183,7 @@ static inline void shrink_free_pagepool(
+ #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
+-static int do_block_io_op(struct xen_blkif_ring *ring);
++static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
+ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
+                               struct blkif_request *req,
+                               struct pending_req *pending_req);
+@@ -608,6 +608,8 @@ int xen_blkif_schedule(void *arg)
+       struct xen_vbd *vbd = &blkif->vbd;
+       unsigned long timeout;
+       int ret;
++      bool do_eoi;
++      unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+       set_freezable();
+       while (!kthread_should_stop()) {
+@@ -632,16 +634,23 @@ int xen_blkif_schedule(void *arg)
+               if (timeout == 0)
+                       goto purge_gnt_list;
++              do_eoi = ring->waiting_reqs;
++
+               ring->waiting_reqs = 0;
+               smp_mb(); /* clear flag *before* checking for work */
+-              ret = do_block_io_op(ring);
++              ret = do_block_io_op(ring, &eoi_flags);
+               if (ret > 0)
+                       ring->waiting_reqs = 1;
+               if (ret == -EACCES)
+                       wait_event_interruptible(ring->shutdown_wq,
+                                                kthread_should_stop());
++              if (do_eoi && !ring->waiting_reqs) {
++                      xen_irq_lateeoi(ring->irq, eoi_flags);
++                      eoi_flags |= XEN_EOI_FLAG_SPURIOUS;
++              }
++
+ purge_gnt_list:
+               if (blkif->vbd.feature_gnt_persistent &&
+                   time_after(jiffies, ring->next_lru)) {
+@@ -1114,7 +1123,7 @@ static void end_block_io_op(struct bio *
+  * and transmute  it to the block API to hand it over to the proper block disk.
+  */
+ static int
+-__do_block_io_op(struct xen_blkif_ring *ring)
++__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
+ {
+       union blkif_back_rings *blk_rings = &ring->blk_rings;
+       struct blkif_request req;
+@@ -1137,6 +1146,9 @@ __do_block_io_op(struct xen_blkif_ring *
+               if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
+                       break;
++              /* We've seen a request, so clear spurious eoi flag. */
++              *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
++
+               if (kthread_should_stop()) {
+                       more_to_do = 1;
+                       break;
+@@ -1195,13 +1207,13 @@ done:
+ }
+ static int
+-do_block_io_op(struct xen_blkif_ring *ring)
++do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
+ {
+       union blkif_back_rings *blk_rings = &ring->blk_rings;
+       int more_to_do;
+       do {
+-              more_to_do = __do_block_io_op(ring);
++              more_to_do = __do_block_io_op(ring, eoi_flags);
+               if (more_to_do)
+                       break;
+--- a/drivers/block/xen-blkback/xenbus.c
++++ b/drivers/block/xen-blkback/xenbus.c
+@@ -236,9 +236,8 @@ static int xen_blkif_map(struct xen_blki
+               BUG();
+       }
+-      err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
+-                                                  xen_blkif_be_int, 0,
+-                                                  "blkif-backend", ring);
++      err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
++                      evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
+       if (err < 0) {
+               xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+               ring->blk_rings.common.sring = NULL;
diff --git a/queue-4.14/xen-events-add-a-new-late-eoi-evtchn-framework.patch b/queue-4.14/xen-events-add-a-new-late-eoi-evtchn-framework.patch
new file mode 100644 (file)
index 0000000..33b5aaa
--- /dev/null
@@ -0,0 +1,344 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:02 +0100
+Subject: xen/events: add a new "late EOI" evtchn framework
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-6-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 54c9de89895e0a36047fcc4ae754ea5b8655fb9d upstream.
+
+In order to avoid tight event channel related IRQ loops add a new
+framework of "late EOI" handling: the IRQ the event channel is bound
+to will be masked until the event has been handled and the related
+driver is capable to handle another event. The driver is responsible
+for unmasking the event channel via the new function xen_irq_lateeoi().
+
+This is similar to binding an event channel to a threaded IRQ, but
+without having to structure the driver accordingly.
+
+In order to support a future special handling in case a rogue guest
+is sending lots of unsolicited events, add a flag to xen_irq_lateeoi()
+which can be set by the caller to indicate the event was a spurious
+one.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c |  151 ++++++++++++++++++++++++++++++++++-----
+ include/xen/events.h             |   29 ++++++-
+ 2 files changed, 159 insertions(+), 21 deletions(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -111,6 +111,7 @@ static bool (*pirq_needs_eoi)(unsigned i
+ static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
+ static struct irq_chip xen_dynamic_chip;
++static struct irq_chip xen_lateeoi_chip;
+ static struct irq_chip xen_percpu_chip;
+ static struct irq_chip xen_pirq_chip;
+ static void enable_dynirq(struct irq_data *data);
+@@ -395,6 +396,33 @@ void notify_remote_via_irq(int irq)
+ }
+ EXPORT_SYMBOL_GPL(notify_remote_via_irq);
++static void xen_irq_lateeoi_locked(struct irq_info *info)
++{
++      evtchn_port_t evtchn;
++
++      evtchn = info->evtchn;
++      if (!VALID_EVTCHN(evtchn))
++              return;
++
++      unmask_evtchn(evtchn);
++}
++
++void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
++{
++      struct irq_info *info;
++      unsigned long flags;
++
++      read_lock_irqsave(&evtchn_rwlock, flags);
++
++      info = info_for_irq(irq);
++
++      if (info)
++              xen_irq_lateeoi_locked(info);
++
++      read_unlock_irqrestore(&evtchn_rwlock, flags);
++}
++EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
++
+ static void xen_irq_init(unsigned irq)
+ {
+       struct irq_info *info;
+@@ -866,7 +894,7 @@ int xen_pirq_from_irq(unsigned irq)
+ }
+ EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+-int bind_evtchn_to_irq(unsigned int evtchn)
++static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
+ {
+       int irq;
+       int ret;
+@@ -883,7 +911,7 @@ int bind_evtchn_to_irq(unsigned int evtc
+               if (irq < 0)
+                       goto out;
+-              irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
++              irq_set_chip_and_handler_name(irq, chip,
+                                             handle_edge_irq, "event");
+               ret = xen_irq_info_evtchn_setup(irq, evtchn);
+@@ -904,8 +932,19 @@ out:
+       return irq;
+ }
++
++int bind_evtchn_to_irq(evtchn_port_t evtchn)
++{
++      return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip);
++}
+ EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
++int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
++{
++      return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
++}
++EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
++
+ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ {
+       struct evtchn_bind_ipi bind_ipi;
+@@ -947,8 +986,9 @@ static int bind_ipi_to_irq(unsigned int
+       return irq;
+ }
+-int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+-                                 unsigned int remote_port)
++static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain,
++                                             evtchn_port_t remote_port,
++                                             struct irq_chip *chip)
+ {
+       struct evtchn_bind_interdomain bind_interdomain;
+       int err;
+@@ -959,10 +999,26 @@ int bind_interdomain_evtchn_to_irq(unsig
+       err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+                                         &bind_interdomain);
+-      return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
++      return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
++                                             chip);
++}
++
++int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
++                                 evtchn_port_t remote_port)
++{
++      return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
++                                                 &xen_dynamic_chip);
+ }
+ EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
++int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
++                                         evtchn_port_t remote_port)
++{
++      return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
++                                                 &xen_lateeoi_chip);
++}
++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
++
+ static int find_virq(unsigned int virq, unsigned int cpu)
+ {
+       struct evtchn_status status;
+@@ -1058,14 +1114,15 @@ static void unbind_from_irq(unsigned int
+       mutex_unlock(&irq_mapping_update_lock);
+ }
+-int bind_evtchn_to_irqhandler(unsigned int evtchn,
+-                            irq_handler_t handler,
+-                            unsigned long irqflags,
+-                            const char *devname, void *dev_id)
++static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
++                                        irq_handler_t handler,
++                                        unsigned long irqflags,
++                                        const char *devname, void *dev_id,
++                                        struct irq_chip *chip)
+ {
+       int irq, retval;
+-      irq = bind_evtchn_to_irq(evtchn);
++      irq = bind_evtchn_to_irq_chip(evtchn, chip);
+       if (irq < 0)
+               return irq;
+       retval = request_irq(irq, handler, irqflags, devname, dev_id);
+@@ -1076,18 +1133,38 @@ int bind_evtchn_to_irqhandler(unsigned i
+       return irq;
+ }
++
++int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
++                            irq_handler_t handler,
++                            unsigned long irqflags,
++                            const char *devname, void *dev_id)
++{
++      return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
++                                            devname, dev_id,
++                                            &xen_dynamic_chip);
++}
+ EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
+-int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+-                                        unsigned int remote_port,
+-                                        irq_handler_t handler,
+-                                        unsigned long irqflags,
+-                                        const char *devname,
+-                                        void *dev_id)
++int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
++                                    irq_handler_t handler,
++                                    unsigned long irqflags,
++                                    const char *devname, void *dev_id)
++{
++      return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
++                                            devname, dev_id,
++                                            &xen_lateeoi_chip);
++}
++EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
++
++static int bind_interdomain_evtchn_to_irqhandler_chip(
++              unsigned int remote_domain, evtchn_port_t remote_port,
++              irq_handler_t handler, unsigned long irqflags,
++              const char *devname, void *dev_id, struct irq_chip *chip)
+ {
+       int irq, retval;
+-      irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
++      irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
++                                                chip);
+       if (irq < 0)
+               return irq;
+@@ -1099,8 +1176,33 @@ int bind_interdomain_evtchn_to_irqhandle
+       return irq;
+ }
++
++int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
++                                        evtchn_port_t remote_port,
++                                        irq_handler_t handler,
++                                        unsigned long irqflags,
++                                        const char *devname,
++                                        void *dev_id)
++{
++      return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
++                              remote_port, handler, irqflags, devname,
++                              dev_id, &xen_dynamic_chip);
++}
+ EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
++int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
++                                                evtchn_port_t remote_port,
++                                                irq_handler_t handler,
++                                                unsigned long irqflags,
++                                                const char *devname,
++                                                void *dev_id)
++{
++      return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
++                              remote_port, handler, irqflags, devname,
++                              dev_id, &xen_lateeoi_chip);
++}
++EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
++
+ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+                           irq_handler_t handler,
+                           unsigned long irqflags, const char *devname, void *dev_id)
+@@ -1641,6 +1743,21 @@ static struct irq_chip xen_dynamic_chip
+       .irq_mask_ack           = mask_ack_dynirq,
+       .irq_set_affinity       = set_affinity_irq,
++      .irq_retrigger          = retrigger_dynirq,
++};
++
++static struct irq_chip xen_lateeoi_chip __read_mostly = {
++      /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
++      .name                   = "xen-dyn-lateeoi",
++
++      .irq_disable            = disable_dynirq,
++      .irq_mask               = disable_dynirq,
++      .irq_unmask             = enable_dynirq,
++
++      .irq_ack                = mask_ack_dynirq,
++      .irq_mask_ack           = mask_ack_dynirq,
++
++      .irq_set_affinity       = set_affinity_irq,
+       .irq_retrigger          = retrigger_dynirq,
+ };
+--- a/include/xen/events.h
++++ b/include/xen/events.h
+@@ -14,11 +14,16 @@
+ unsigned xen_evtchn_nr_channels(void);
+-int bind_evtchn_to_irq(unsigned int evtchn);
+-int bind_evtchn_to_irqhandler(unsigned int evtchn,
++int bind_evtchn_to_irq(evtchn_port_t evtchn);
++int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
++int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
+                             irq_handler_t handler,
+                             unsigned long irqflags, const char *devname,
+                             void *dev_id);
++int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
++                                    irq_handler_t handler,
++                                    unsigned long irqflags, const char *devname,
++                                    void *dev_id);
+ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu);
+ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+                           irq_handler_t handler,
+@@ -31,13 +36,21 @@ int bind_ipi_to_irqhandler(enum ipi_vect
+                          const char *devname,
+                          void *dev_id);
+ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+-                                 unsigned int remote_port);
++                                 evtchn_port_t remote_port);
++int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
++                                         evtchn_port_t remote_port);
+ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+-                                        unsigned int remote_port,
++                                        evtchn_port_t remote_port,
+                                         irq_handler_t handler,
+                                         unsigned long irqflags,
+                                         const char *devname,
+                                         void *dev_id);
++int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
++                                                evtchn_port_t remote_port,
++                                                irq_handler_t handler,
++                                                unsigned long irqflags,
++                                                const char *devname,
++                                                void *dev_id);
+ /*
+  * Common unbind function for all event sources. Takes IRQ to unbind from.
+@@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandle
+  */
+ void unbind_from_irqhandler(unsigned int irq, void *dev_id);
++/*
++ * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi
++ * functions above.
++ */
++void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags);
++/* Signal an event was spurious, i.e. there was no action resulting from it. */
++#define XEN_EOI_FLAG_SPURIOUS 0x00000001
++
+ #define XEN_IRQ_PRIORITY_MAX     EVTCHN_FIFO_PRIORITY_MAX
+ #define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
+ #define XEN_IRQ_PRIORITY_MIN     EVTCHN_FIFO_PRIORITY_MIN
diff --git a/queue-4.14/xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch b/queue-4.14/xen-events-add-a-proper-barrier-to-2-level-uevent-unmasking.patch
new file mode 100644 (file)
index 0000000..43cfd12
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:00 +0100
+Subject: xen/events: add a proper barrier to 2-level uevent unmasking
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-4-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 4d3fe31bd993ef504350989786858aefdb877daa upstream.
+
+A follow-up patch will require certain write to happen before an event
+channel is unmasked.
+
+While the memory barrier is not strictly necessary for all the callers,
+the main one will need it. In order to avoid an extra memory barrier
+when using fifo event channels, mandate evtchn_unmask() to provide
+write ordering.
+
+The 2-level event handling unmask operation is missing an appropriate
+barrier, so add it. Fifo event channels are fine in this regard due to
+using sync_cmpxchg().
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Suggested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_2l.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/xen/events/events_2l.c
++++ b/drivers/xen/events/events_2l.c
+@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(unsigned po
+       BUG_ON(!irqs_disabled());
++      smp_wmb();      /* All writes before unmask must be visible. */
++
+       if (unlikely((cpu != cpu_from_evtchn(port))))
+               do_hypercall = 1;
+       else {
diff --git a/queue-4.14/xen-events-avoid-removing-an-event-channel-while-handling-it.patch b/queue-4.14/xen-events-avoid-removing-an-event-channel-while-handling-it.patch
new file mode 100644 (file)
index 0000000..cef75b8
--- /dev/null
@@ -0,0 +1,159 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:28:59 +0100
+Subject: xen/events: avoid removing an event channel while handling it
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-3-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 073d0552ead5bfc7a3a9c01de590e924f11b5dd2 upstream.
+
+Today it can happen that an event channel is being removed from the
+system while the event handling loop is active. This can lead to a
+race resulting in crashes or WARN() splats when trying to access the
+irq_info structure related to the event channel.
+
+Fix this problem by using a rwlock taken as reader in the event
+handling loop and as writer when deallocating the irq_info structure.
+
+As the observed problem was a NULL dereference in evtchn_from_irq()
+make this function more robust against races by testing the irq_info
+pointer to be not NULL before dereferencing it.
+
+And finally make all accesses to evtchn_to_irq[row][col] atomic ones
+in order to avoid seeing partial updates of an array element in irq
+handling. Note that irq handling can be entered only for event channels
+which have been valid before, so any not populated row isn't a problem
+in this regard, as rows are only ever added and never removed.
+
+This is XSA-331.
+
+Cc: stable@vger.kernel.org
+Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Reported-by: Jinoh Kang <luke1337@theori.io>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c |   40 ++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 35 insertions(+), 5 deletions(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -32,6 +32,7 @@
+ #include <linux/slab.h>
+ #include <linux/irqnr.h>
+ #include <linux/pci.h>
++#include <linux/spinlock.h>
+ #ifdef CONFIG_X86
+ #include <asm/desc.h>
+@@ -69,6 +70,23 @@ const struct evtchn_ops *evtchn_ops;
+  */
+ static DEFINE_MUTEX(irq_mapping_update_lock);
++/*
++ * Lock protecting event handling loop against removing event channels.
++ * Adding of event channels is no issue as the associated IRQ becomes active
++ * only after everything is setup (before request_[threaded_]irq() the handler
++ * can't be entered for an event, as the event channel will be unmasked only
++ * then).
++ */
++static DEFINE_RWLOCK(evtchn_rwlock);
++
++/*
++ * Lock hierarchy:
++ *
++ * irq_mapping_update_lock
++ *   evtchn_rwlock
++ *     IRQ-desc lock
++ */
++
+ static LIST_HEAD(xen_irq_list_head);
+ /* IRQ <-> VIRQ mapping. */
+@@ -103,7 +121,7 @@ static void clear_evtchn_to_irq_row(unsi
+       unsigned col;
+       for (col = 0; col < EVTCHN_PER_ROW; col++)
+-              evtchn_to_irq[row][col] = -1;
++              WRITE_ONCE(evtchn_to_irq[row][col], -1);
+ }
+ static void clear_evtchn_to_irq_all(void)
+@@ -140,7 +158,7 @@ static int set_evtchn_to_irq(unsigned ev
+               clear_evtchn_to_irq_row(row);
+       }
+-      evtchn_to_irq[row][col] = irq;
++      WRITE_ONCE(evtchn_to_irq[row][col], irq);
+       return 0;
+ }
+@@ -150,7 +168,7 @@ int get_evtchn_to_irq(unsigned evtchn)
+               return -1;
+       if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+               return -1;
+-      return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
++      return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
+ }
+ /* Get info for IRQ */
+@@ -259,10 +277,14 @@ static void xen_irq_info_cleanup(struct
+  */
+ unsigned int evtchn_from_irq(unsigned irq)
+ {
+-      if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)))
++      const struct irq_info *info = NULL;
++
++      if (likely(irq < nr_irqs))
++              info = info_for_irq(irq);
++      if (!info)
+               return 0;
+-      return info_for_irq(irq)->evtchn;
++      return info->evtchn;
+ }
+ unsigned irq_from_evtchn(unsigned int evtchn)
+@@ -438,16 +460,21 @@ static int __must_check xen_allocate_irq
+ static void xen_free_irq(unsigned irq)
+ {
+       struct irq_info *info = info_for_irq(irq);
++      unsigned long flags;
+       if (WARN_ON(!info))
+               return;
++      write_lock_irqsave(&evtchn_rwlock, flags);
++
+       list_del(&info->list);
+       set_info_for_irq(irq, NULL);
+       WARN_ON(info->refcnt > 0);
++      write_unlock_irqrestore(&evtchn_rwlock, flags);
++
+       kfree(info);
+       /* Legacy IRQ descriptors are managed by the arch. */
+@@ -1233,6 +1260,8 @@ static void __xen_evtchn_do_upcall(void)
+       int cpu = get_cpu();
+       unsigned count;
++      read_lock(&evtchn_rwlock);
++
+       do {
+               vcpu_info->evtchn_upcall_pending = 0;
+@@ -1248,6 +1277,7 @@ static void __xen_evtchn_do_upcall(void)
+       } while (count != 1 || vcpu_info->evtchn_upcall_pending);
+ out:
++      read_unlock(&evtchn_rwlock);
+       put_cpu();
+ }
diff --git a/queue-4.14/xen-events-block-rogue-events-for-some-time.patch b/queue-4.14/xen-events-block-rogue-events-for-some-time.patch
new file mode 100644 (file)
index 0000000..a5fdec8
--- /dev/null
@@ -0,0 +1,116 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:11 +0100
+Subject: xen/events: block rogue events for some time
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-15-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 5f7f77400ab5b357b5fdb7122c3442239672186c upstream.
+
+In order to avoid high dom0 load due to rogue guests sending events at
+high frequency, block those events in case there was no action needed
+in dom0 to handle the events.
+
+This is done by adding a per-event counter, which set to zero in case
+an EOI without the XEN_EOI_FLAG_SPURIOUS is received from a backend
+driver, and incremented when this flag has been set. In case the
+counter is 2 or higher delay the EOI by 1 << (cnt - 2) jiffies, but
+not more than 1 second.
+
+In order not to waste memory shorten the per-event refcnt to two bytes
+(it should normally never exceed a value of 2). Add an overflow check
+to evtchn_get() to make sure the 2 bytes really won't overflow.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c     |   27 ++++++++++++++++++++++-----
+ drivers/xen/events/events_internal.h |    3 ++-
+ 2 files changed, 24 insertions(+), 6 deletions(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -459,17 +459,34 @@ static void lateeoi_list_add(struct irq_
+       spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+ }
+-static void xen_irq_lateeoi_locked(struct irq_info *info)
++static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
+ {
+       evtchn_port_t evtchn;
+       unsigned int cpu;
++      unsigned int delay = 0;
+       evtchn = info->evtchn;
+       if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
+               return;
++      if (spurious) {
++              if ((1 << info->spurious_cnt) < (HZ << 2))
++                      info->spurious_cnt++;
++              if (info->spurious_cnt > 1) {
++                      delay = 1 << (info->spurious_cnt - 2);
++                      if (delay > HZ)
++                              delay = HZ;
++                      if (!info->eoi_time)
++                              info->eoi_cpu = smp_processor_id();
++                      info->eoi_time = get_jiffies_64() + delay;
++              }
++      } else {
++              info->spurious_cnt = 0;
++      }
++
+       cpu = info->eoi_cpu;
+-      if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) {
++      if (info->eoi_time &&
++          (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
+               lateeoi_list_add(info);
+               return;
+       }
+@@ -506,7 +523,7 @@ static void xen_irq_lateeoi_worker(struc
+               info->eoi_time = 0;
+-              xen_irq_lateeoi_locked(info);
++              xen_irq_lateeoi_locked(info, false);
+       }
+       if (info)
+@@ -535,7 +552,7 @@ void xen_irq_lateeoi(unsigned int irq, u
+       info = info_for_irq(irq);
+       if (info)
+-              xen_irq_lateeoi_locked(info);
++              xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
+       read_unlock_irqrestore(&evtchn_rwlock, flags);
+ }
+@@ -1438,7 +1455,7 @@ int evtchn_get(unsigned int evtchn)
+               goto done;
+       err = -EINVAL;
+-      if (info->refcnt <= 0)
++      if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
+               goto done;
+       info->refcnt++;
+--- a/drivers/xen/events/events_internal.h
++++ b/drivers/xen/events/events_internal.h
+@@ -33,7 +33,8 @@ enum xen_irq_type {
+ struct irq_info {
+       struct list_head list;
+       struct list_head eoi_list;
+-      int refcnt;
++      short refcnt;
++      short spurious_cnt;
+       enum xen_irq_type type; /* type */
+       unsigned irq;
+       unsigned int evtchn;    /* event channel */
diff --git a/queue-4.14/xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch b/queue-4.14/xen-events-defer-eoi-in-case-of-excessive-number-of-events.patch
new file mode 100644 (file)
index 0000000..150a54a
--- /dev/null
@@ -0,0 +1,522 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:10 +0100
+Subject: xen/events: defer eoi in case of excessive number of events
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-14-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit e99502f76271d6bc4e374fe368c50c67a1fd3070 upstream.
+
+In case rogue guests are sending events at high frequency it might
+happen that xen_evtchn_do_upcall() won't stop processing events in
+dom0. As this is done in irq handling a crash might be the result.
+
+In order to avoid that, delay further inter-domain events after some
+time in xen_evtchn_do_upcall() by forcing eoi processing into a
+worker on the same cpu, thus inhibiting new events coming in.
+
+The time after which eoi processing is to be delayed is configurable
+via a new module parameter "event_loop_timeout" which specifies the
+maximum event loop time in jiffies (default: 2, the value was chosen
+after some tests showing that a value of 2 was the lowest with an
+only slight drop of dom0 network throughput while multiple guests
+performed an event storm).
+
+How long eoi processing will be delayed can be specified via another
+parameter "event_eoi_delay" (again in jiffies, default 10, again the
+value was chosen after testing with different delay values).
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt |    8 +
+ drivers/xen/events/events_2l.c                  |    7 
+ drivers/xen/events/events_base.c                |  189 +++++++++++++++++++++++-
+ drivers/xen/events/events_fifo.c                |   30 +--
+ drivers/xen/events/events_internal.h            |   14 +
+ 5 files changed, 216 insertions(+), 32 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -5022,6 +5022,14 @@
+                       Disables the PV optimizations forcing the HVM guest to
+                       run as generic HVM guest with no PV drivers.
++      xen.event_eoi_delay=    [XEN]
++                      How long to delay EOI handling in case of event
++                      storms (jiffies). Default is 10.
++
++      xen.event_loop_timeout= [XEN]
++                      After which time (jiffies) the event handling loop
++                      should start to delay EOI handling. Default is 2.
++
+       xirc2ps_cs=     [NET,PCMCIA]
+                       Format:
+                       <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
+--- a/drivers/xen/events/events_2l.c
++++ b/drivers/xen/events/events_2l.c
+@@ -161,7 +161,7 @@ static inline xen_ulong_t active_evtchns
+  * a bitset of words which contain pending event bits.  The second
+  * level is a bitset of pending events themselves.
+  */
+-static void evtchn_2l_handle_events(unsigned cpu)
++static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
+ {
+       int irq;
+       xen_ulong_t pending_words;
+@@ -242,10 +242,7 @@ static void evtchn_2l_handle_events(unsi
+                       /* Process port. */
+                       port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
+-                      irq = get_evtchn_to_irq(port);
+-
+-                      if (irq != -1)
+-                              generic_handle_irq(irq);
++                      handle_irq_for_port(port, ctrl);
+                       bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -34,6 +34,8 @@
+ #include <linux/pci.h>
+ #include <linux/spinlock.h>
+ #include <linux/cpuhotplug.h>
++#include <linux/atomic.h>
++#include <linux/ktime.h>
+ #ifdef CONFIG_X86
+ #include <asm/desc.h>
+@@ -63,6 +65,15 @@
+ #include "events_internal.h"
++#undef MODULE_PARAM_PREFIX
++#define MODULE_PARAM_PREFIX "xen."
++
++static uint __read_mostly event_loop_timeout = 2;
++module_param(event_loop_timeout, uint, 0644);
++
++static uint __read_mostly event_eoi_delay = 10;
++module_param(event_eoi_delay, uint, 0644);
++
+ const struct evtchn_ops *evtchn_ops;
+ /*
+@@ -86,6 +97,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
+  * irq_mapping_update_lock
+  *   evtchn_rwlock
+  *     IRQ-desc lock
++ *       percpu eoi_list_lock
+  */
+ static LIST_HEAD(xen_irq_list_head);
+@@ -118,6 +130,8 @@ static struct irq_chip xen_pirq_chip;
+ static void enable_dynirq(struct irq_data *data);
+ static void disable_dynirq(struct irq_data *data);
++static DEFINE_PER_CPU(unsigned int, irq_epoch);
++
+ static void clear_evtchn_to_irq_row(unsigned row)
+ {
+       unsigned col;
+@@ -397,17 +411,120 @@ void notify_remote_via_irq(int irq)
+ }
+ EXPORT_SYMBOL_GPL(notify_remote_via_irq);
++struct lateeoi_work {
++      struct delayed_work delayed;
++      spinlock_t eoi_list_lock;
++      struct list_head eoi_list;
++};
++
++static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
++
++static void lateeoi_list_del(struct irq_info *info)
++{
++      struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
++      unsigned long flags;
++
++      spin_lock_irqsave(&eoi->eoi_list_lock, flags);
++      list_del_init(&info->eoi_list);
++      spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
++}
++
++static void lateeoi_list_add(struct irq_info *info)
++{
++      struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
++      struct irq_info *elem;
++      u64 now = get_jiffies_64();
++      unsigned long delay;
++      unsigned long flags;
++
++      if (now < info->eoi_time)
++              delay = info->eoi_time - now;
++      else
++              delay = 1;
++
++      spin_lock_irqsave(&eoi->eoi_list_lock, flags);
++
++      if (list_empty(&eoi->eoi_list)) {
++              list_add(&info->eoi_list, &eoi->eoi_list);
++              mod_delayed_work_on(info->eoi_cpu, system_wq,
++                                  &eoi->delayed, delay);
++      } else {
++              list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
++                      if (elem->eoi_time <= info->eoi_time)
++                              break;
++              }
++              list_add(&info->eoi_list, &elem->eoi_list);
++      }
++
++      spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
++}
++
+ static void xen_irq_lateeoi_locked(struct irq_info *info)
+ {
+       evtchn_port_t evtchn;
++      unsigned int cpu;
+       evtchn = info->evtchn;
+-      if (!VALID_EVTCHN(evtchn))
++      if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
++              return;
++
++      cpu = info->eoi_cpu;
++      if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) {
++              lateeoi_list_add(info);
+               return;
++      }
++      info->eoi_time = 0;
+       unmask_evtchn(evtchn);
+ }
++static void xen_irq_lateeoi_worker(struct work_struct *work)
++{
++      struct lateeoi_work *eoi;
++      struct irq_info *info;
++      u64 now = get_jiffies_64();
++      unsigned long flags;
++
++      eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
++
++      read_lock_irqsave(&evtchn_rwlock, flags);
++
++      while (true) {
++              spin_lock(&eoi->eoi_list_lock);
++
++              info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
++                                              eoi_list);
++
++              if (info == NULL || now < info->eoi_time) {
++                      spin_unlock(&eoi->eoi_list_lock);
++                      break;
++              }
++
++              list_del_init(&info->eoi_list);
++
++              spin_unlock(&eoi->eoi_list_lock);
++
++              info->eoi_time = 0;
++
++              xen_irq_lateeoi_locked(info);
++      }
++
++      if (info)
++              mod_delayed_work_on(info->eoi_cpu, system_wq,
++                                  &eoi->delayed, info->eoi_time - now);
++
++      read_unlock_irqrestore(&evtchn_rwlock, flags);
++}
++
++static void xen_cpu_init_eoi(unsigned int cpu)
++{
++      struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
++
++      INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
++      spin_lock_init(&eoi->eoi_list_lock);
++      INIT_LIST_HEAD(&eoi->eoi_list);
++}
++
+ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+ {
+       struct irq_info *info;
+@@ -427,6 +544,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+ static void xen_irq_init(unsigned irq)
+ {
+       struct irq_info *info;
++
+ #ifdef CONFIG_SMP
+       /* By default all event channels notify CPU#0. */
+       cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
+@@ -441,6 +559,7 @@ static void xen_irq_init(unsigned irq)
+       set_info_for_irq(irq, info);
++      INIT_LIST_HEAD(&info->eoi_list);
+       list_add_tail(&info->list, &xen_irq_list_head);
+ }
+@@ -496,6 +615,9 @@ static void xen_free_irq(unsigned irq)
+       write_lock_irqsave(&evtchn_rwlock, flags);
++      if (!list_empty(&info->eoi_list))
++              lateeoi_list_del(info);
++
+       list_del(&info->list);
+       set_info_for_irq(irq, NULL);
+@@ -1355,6 +1477,54 @@ void xen_send_IPI_one(unsigned int cpu,
+       notify_remote_via_irq(irq);
+ }
++struct evtchn_loop_ctrl {
++      ktime_t timeout;
++      unsigned count;
++      bool defer_eoi;
++};
++
++void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
++{
++      int irq;
++      struct irq_info *info;
++
++      irq = get_evtchn_to_irq(port);
++      if (irq == -1)
++              return;
++
++      /*
++       * Check for timeout every 256 events.
++       * We are setting the timeout value only after the first 256
++       * events in order to not hurt the common case of few loop
++       * iterations. The 256 is basically an arbitrary value.
++       *
++       * In case we are hitting the timeout we need to defer all further
++       * EOIs in order to ensure to leave the event handling loop rather
++       * sooner than later.
++       */
++      if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
++              ktime_t kt = ktime_get();
++
++              if (!ctrl->timeout) {
++                      kt = ktime_add_ms(kt,
++                                        jiffies_to_msecs(event_loop_timeout));
++                      ctrl->timeout = kt;
++              } else if (kt > ctrl->timeout) {
++                      ctrl->defer_eoi = true;
++              }
++      }
++
++      info = info_for_irq(irq);
++
++      if (ctrl->defer_eoi) {
++              info->eoi_cpu = smp_processor_id();
++              info->irq_epoch = __this_cpu_read(irq_epoch);
++              info->eoi_time = get_jiffies_64() + event_eoi_delay;
++      }
++
++      generic_handle_irq(irq);
++}
++
+ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
+ static void __xen_evtchn_do_upcall(void)
+@@ -1362,6 +1532,7 @@ static void __xen_evtchn_do_upcall(void)
+       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       int cpu = get_cpu();
+       unsigned count;
++      struct evtchn_loop_ctrl ctrl = { 0 };
+       read_lock(&evtchn_rwlock);
+@@ -1371,7 +1542,7 @@ static void __xen_evtchn_do_upcall(void)
+               if (__this_cpu_inc_return(xed_nesting_count) - 1)
+                       goto out;
+-              xen_evtchn_handle_events(cpu);
++              xen_evtchn_handle_events(cpu, &ctrl);
+               BUG_ON(!irqs_disabled());
+@@ -1382,6 +1553,13 @@ static void __xen_evtchn_do_upcall(void)
+ out:
+       read_unlock(&evtchn_rwlock);
++      /*
++       * Increment irq_epoch only now to defer EOIs only for
++       * xen_irq_lateeoi() invocations occurring from inside the loop
++       * above.
++       */
++      __this_cpu_inc(irq_epoch);
++
+       put_cpu();
+ }
+@@ -1828,9 +2006,6 @@ void xen_callback_vector(void)
+ void xen_callback_vector(void) {}
+ #endif
+-#undef MODULE_PARAM_PREFIX
+-#define MODULE_PARAM_PREFIX "xen."
+-
+ static bool fifo_events = true;
+ module_param(fifo_events, bool, 0);
+@@ -1838,6 +2013,8 @@ static int xen_evtchn_cpu_prepare(unsign
+ {
+       int ret = 0;
++      xen_cpu_init_eoi(cpu);
++
+       if (evtchn_ops->percpu_init)
+               ret = evtchn_ops->percpu_init(cpu);
+@@ -1864,6 +2041,8 @@ void __init xen_init_IRQ(void)
+       if (ret < 0)
+               xen_evtchn_2l_init();
++      xen_cpu_init_eoi(smp_processor_id());
++
+       cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
+                                 "xen/evtchn:prepare",
+                                 xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
+--- a/drivers/xen/events/events_fifo.c
++++ b/drivers/xen/events/events_fifo.c
+@@ -275,19 +275,9 @@ static uint32_t clear_linked(volatile ev
+       return w & EVTCHN_FIFO_LINK_MASK;
+ }
+-static void handle_irq_for_port(unsigned port)
+-{
+-      int irq;
+-
+-      irq = get_evtchn_to_irq(port);
+-      if (irq != -1)
+-              generic_handle_irq(irq);
+-}
+-
+-static void consume_one_event(unsigned cpu,
++static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
+                             struct evtchn_fifo_control_block *control_block,
+-                            unsigned priority, unsigned long *ready,
+-                            bool drop)
++                            unsigned priority, unsigned long *ready)
+ {
+       struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
+       uint32_t head;
+@@ -320,16 +310,17 @@ static void consume_one_event(unsigned c
+               clear_bit(priority, ready);
+       if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
+-              if (unlikely(drop))
++              if (unlikely(!ctrl))
+                       pr_warn("Dropping pending event for port %u\n", port);
+               else
+-                      handle_irq_for_port(port);
++                      handle_irq_for_port(port, ctrl);
+       }
+       q->head[priority] = head;
+ }
+-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
++static void __evtchn_fifo_handle_events(unsigned cpu,
++                                      struct evtchn_loop_ctrl *ctrl)
+ {
+       struct evtchn_fifo_control_block *control_block;
+       unsigned long ready;
+@@ -341,14 +332,15 @@ static void __evtchn_fifo_handle_events(
+       while (ready) {
+               q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
+-              consume_one_event(cpu, control_block, q, &ready, drop);
++              consume_one_event(cpu, ctrl, control_block, q, &ready);
+               ready |= xchg(&control_block->ready, 0);
+       }
+ }
+-static void evtchn_fifo_handle_events(unsigned cpu)
++static void evtchn_fifo_handle_events(unsigned cpu,
++                                    struct evtchn_loop_ctrl *ctrl)
+ {
+-      __evtchn_fifo_handle_events(cpu, false);
++      __evtchn_fifo_handle_events(cpu, ctrl);
+ }
+ static void evtchn_fifo_resume(void)
+@@ -416,7 +408,7 @@ static int evtchn_fifo_percpu_init(unsig
+ static int evtchn_fifo_percpu_deinit(unsigned int cpu)
+ {
+-      __evtchn_fifo_handle_events(cpu, true);
++      __evtchn_fifo_handle_events(cpu, NULL);
+       return 0;
+ }
+--- a/drivers/xen/events/events_internal.h
++++ b/drivers/xen/events/events_internal.h
+@@ -32,11 +32,15 @@ enum xen_irq_type {
+  */
+ struct irq_info {
+       struct list_head list;
++      struct list_head eoi_list;
+       int refcnt;
+       enum xen_irq_type type; /* type */
+       unsigned irq;
+       unsigned int evtchn;    /* event channel */
+       unsigned short cpu;     /* cpu bound */
++      unsigned short eoi_cpu; /* EOI must happen on this cpu */
++      unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
++      u64 eoi_time;           /* Time in jiffies when to EOI. */
+       union {
+               unsigned short virq;
+@@ -55,6 +59,8 @@ struct irq_info {
+ #define PIRQ_SHAREABLE        (1 << 1)
+ #define PIRQ_MSI_GROUP        (1 << 2)
++struct evtchn_loop_ctrl;
++
+ struct evtchn_ops {
+       unsigned (*max_channels)(void);
+       unsigned (*nr_channels)(void);
+@@ -69,7 +75,7 @@ struct evtchn_ops {
+       void (*mask)(unsigned port);
+       void (*unmask)(unsigned port);
+-      void (*handle_events)(unsigned cpu);
++      void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
+       void (*resume)(void);
+       int (*percpu_init)(unsigned int cpu);
+@@ -80,6 +86,7 @@ extern const struct evtchn_ops *evtchn_o
+ extern int **evtchn_to_irq;
+ int get_evtchn_to_irq(unsigned int evtchn);
++void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
+ struct irq_info *info_for_irq(unsigned irq);
+ unsigned cpu_from_irq(unsigned irq);
+@@ -137,9 +144,10 @@ static inline void unmask_evtchn(unsigne
+       return evtchn_ops->unmask(port);
+ }
+-static inline void xen_evtchn_handle_events(unsigned cpu)
++static inline void xen_evtchn_handle_events(unsigned cpu,
++                                          struct evtchn_loop_ctrl *ctrl)
+ {
+-      return evtchn_ops->handle_events(cpu);
++      return evtchn_ops->handle_events(cpu, ctrl);
+ }
+ static inline void xen_evtchn_resume(void)
diff --git a/queue-4.14/xen-events-fix-race-in-evtchn_fifo_unmask.patch b/queue-4.14/xen-events-fix-race-in-evtchn_fifo_unmask.patch
new file mode 100644 (file)
index 0000000..3c5cd39
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:01 +0100
+Subject: xen/events: fix race in evtchn_fifo_unmask()
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-5-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit f01337197419b7e8a492e83089552b77d3b5fb90 upstream.
+
+Unmasking a fifo event channel can result in unmasking it twice, once
+directly in the kernel and once via a hypercall in case the event was
+pending.
+
+Fix that by doing the local unmask only if the event is not pending.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_fifo.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/drivers/xen/events/events_fifo.c
++++ b/drivers/xen/events/events_fifo.c
+@@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(unsign
+       return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
+ }
+ /*
+- * Clear MASKED, spinning if BUSY is set.
++ * Clear MASKED if not PENDING, spinning if BUSY is set.
++ * Return true if mask was cleared.
+  */
+-static void clear_masked(volatile event_word_t *word)
++static bool clear_masked_cond(volatile event_word_t *word)
+ {
+       event_word_t new, old, w;
+       w = *word;
+       do {
++              if (w & (1 << EVTCHN_FIFO_PENDING))
++                      return false;
++
+               old = w & ~(1 << EVTCHN_FIFO_BUSY);
+               new = old & ~(1 << EVTCHN_FIFO_MASKED);
+               w = sync_cmpxchg(word, old, new);
+       } while (w != old);
++
++      return true;
+ }
+ static void evtchn_fifo_unmask(unsigned port)
+@@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(unsigned
+       BUG_ON(!irqs_disabled());
+-      clear_masked(word);
+-      if (evtchn_fifo_is_pending(port)) {
++      if (!clear_masked_cond(word)) {
+               struct evtchn_unmask unmask = { .port = port };
+               (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+       }
diff --git a/queue-4.14/xen-events-switch-user-event-channels-to-lateeoi-model.patch b/queue-4.14/xen-events-switch-user-event-channels-to-lateeoi-model.patch
new file mode 100644 (file)
index 0000000..e9406d0
--- /dev/null
@@ -0,0 +1,58 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:08 +0100
+Subject: xen/events: switch user event channels to lateeoi model
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-12-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit c44b849cee8c3ac587da3b0980e01f77500d158c upstream.
+
+Instead of disabling the irq when an event is received and enabling
+it again when handled by the user process use the lateeoi model.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Tested-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/evtchn.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/drivers/xen/evtchn.c
++++ b/drivers/xen/evtchn.c
+@@ -166,7 +166,6 @@ static irqreturn_t evtchn_interrupt(int
+            "Interrupt for port %d, but apparently not enabled; per-user %p\n",
+            evtchn->port, u);
+-      disable_irq_nosync(irq);
+       evtchn->enabled = false;
+       spin_lock(&u->ring_prod_lock);
+@@ -292,7 +291,7 @@ static ssize_t evtchn_write(struct file
+               evtchn = find_evtchn(u, port);
+               if (evtchn && !evtchn->enabled) {
+                       evtchn->enabled = true;
+-                      enable_irq(irq_from_evtchn(port));
++                      xen_irq_lateeoi(irq_from_evtchn(port), 0);
+               }
+       }
+@@ -392,8 +391,8 @@ static int evtchn_bind_to_user(struct pe
+       if (rc < 0)
+               goto err;
+-      rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
+-                                     u->name, evtchn);
++      rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
++                                             u->name, evtchn);
+       if (rc < 0)
+               goto err;
diff --git a/queue-4.14/xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch b/queue-4.14/xen-events-use-a-common-cpu-hotplug-hook-for-event-channels.patch
new file mode 100644 (file)
index 0000000..3d13517
--- /dev/null
@@ -0,0 +1,162 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:09 +0100
+Subject: xen/events: use a common cpu hotplug hook for event channels
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-13-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 7beb290caa2adb0a399e735a1e175db9aae0523a upstream.
+
+Today only fifo event channels have a cpu hotplug callback. In order
+to prepare for more percpu (de)init work move that callback into
+events_base.c and add percpu_init() and percpu_deinit() hooks to
+struct evtchn_ops.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c     |   25 +++++++++++++++++++++
+ drivers/xen/events/events_fifo.c     |   40 ++++++++++++++++-------------------
+ drivers/xen/events/events_internal.h |    3 ++
+ 3 files changed, 47 insertions(+), 21 deletions(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -33,6 +33,7 @@
+ #include <linux/irqnr.h>
+ #include <linux/pci.h>
+ #include <linux/spinlock.h>
++#include <linux/cpuhotplug.h>
+ #ifdef CONFIG_X86
+ #include <asm/desc.h>
+@@ -1833,6 +1834,26 @@ void xen_callback_vector(void) {}
+ static bool fifo_events = true;
+ module_param(fifo_events, bool, 0);
++static int xen_evtchn_cpu_prepare(unsigned int cpu)
++{
++      int ret = 0;
++
++      if (evtchn_ops->percpu_init)
++              ret = evtchn_ops->percpu_init(cpu);
++
++      return ret;
++}
++
++static int xen_evtchn_cpu_dead(unsigned int cpu)
++{
++      int ret = 0;
++
++      if (evtchn_ops->percpu_deinit)
++              ret = evtchn_ops->percpu_deinit(cpu);
++
++      return ret;
++}
++
+ void __init xen_init_IRQ(void)
+ {
+       int ret = -EINVAL;
+@@ -1843,6 +1864,10 @@ void __init xen_init_IRQ(void)
+       if (ret < 0)
+               xen_evtchn_2l_init();
++      cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
++                                "xen/evtchn:prepare",
++                                xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
++
+       evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
+                               sizeof(*evtchn_to_irq), GFP_KERNEL);
+       BUG_ON(!evtchn_to_irq);
+--- a/drivers/xen/events/events_fifo.c
++++ b/drivers/xen/events/events_fifo.c
+@@ -385,21 +385,6 @@ static void evtchn_fifo_resume(void)
+       event_array_pages = 0;
+ }
+-static const struct evtchn_ops evtchn_ops_fifo = {
+-      .max_channels      = evtchn_fifo_max_channels,
+-      .nr_channels       = evtchn_fifo_nr_channels,
+-      .setup             = evtchn_fifo_setup,
+-      .bind_to_cpu       = evtchn_fifo_bind_to_cpu,
+-      .clear_pending     = evtchn_fifo_clear_pending,
+-      .set_pending       = evtchn_fifo_set_pending,
+-      .is_pending        = evtchn_fifo_is_pending,
+-      .test_and_set_mask = evtchn_fifo_test_and_set_mask,
+-      .mask              = evtchn_fifo_mask,
+-      .unmask            = evtchn_fifo_unmask,
+-      .handle_events     = evtchn_fifo_handle_events,
+-      .resume            = evtchn_fifo_resume,
+-};
+-
+ static int evtchn_fifo_alloc_control_block(unsigned cpu)
+ {
+       void *control_block = NULL;
+@@ -422,19 +407,36 @@ static int evtchn_fifo_alloc_control_blo
+       return ret;
+ }
+-static int xen_evtchn_cpu_prepare(unsigned int cpu)
++static int evtchn_fifo_percpu_init(unsigned int cpu)
+ {
+       if (!per_cpu(cpu_control_block, cpu))
+               return evtchn_fifo_alloc_control_block(cpu);
+       return 0;
+ }
+-static int xen_evtchn_cpu_dead(unsigned int cpu)
++static int evtchn_fifo_percpu_deinit(unsigned int cpu)
+ {
+       __evtchn_fifo_handle_events(cpu, true);
+       return 0;
+ }
++static const struct evtchn_ops evtchn_ops_fifo = {
++      .max_channels      = evtchn_fifo_max_channels,
++      .nr_channels       = evtchn_fifo_nr_channels,
++      .setup             = evtchn_fifo_setup,
++      .bind_to_cpu       = evtchn_fifo_bind_to_cpu,
++      .clear_pending     = evtchn_fifo_clear_pending,
++      .set_pending       = evtchn_fifo_set_pending,
++      .is_pending        = evtchn_fifo_is_pending,
++      .test_and_set_mask = evtchn_fifo_test_and_set_mask,
++      .mask              = evtchn_fifo_mask,
++      .unmask            = evtchn_fifo_unmask,
++      .handle_events     = evtchn_fifo_handle_events,
++      .resume            = evtchn_fifo_resume,
++      .percpu_init       = evtchn_fifo_percpu_init,
++      .percpu_deinit     = evtchn_fifo_percpu_deinit,
++};
++
+ int __init xen_evtchn_fifo_init(void)
+ {
+       int cpu = smp_processor_id();
+@@ -448,9 +450,5 @@ int __init xen_evtchn_fifo_init(void)
+       evtchn_ops = &evtchn_ops_fifo;
+-      cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
+-                                "xen/evtchn:prepare",
+-                                xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
+-
+       return ret;
+ }
+--- a/drivers/xen/events/events_internal.h
++++ b/drivers/xen/events/events_internal.h
+@@ -71,6 +71,9 @@ struct evtchn_ops {
+       void (*handle_events)(unsigned cpu);
+       void (*resume)(void);
++
++      int (*percpu_init)(unsigned int cpu);
++      int (*percpu_deinit)(unsigned int cpu);
+ };
+ extern const struct evtchn_ops *evtchn_ops;
diff --git a/queue-4.14/xen-netback-use-lateeoi-irq-binding.patch b/queue-4.14/xen-netback-use-lateeoi-irq-binding.patch
new file mode 100644 (file)
index 0000000..02f6d81
--- /dev/null
@@ -0,0 +1,257 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:04 +0100
+Subject: xen/netback: use lateeoi irq binding
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-8-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 23025393dbeb3b8b3b60ebfa724cdae384992e27 upstream.
+
+In order to reduce the chance for the system becoming unresponsive due
+to event storms triggered by a misbehaving netfront use the lateeoi
+irq binding for netback and unmask the event channel only just before
+going to sleep waiting for new events.
+
+Make sure not to issue an EOI when none is pending by introducing an
+eoi_pending element to struct xenvif_queue.
+
+When no request has been consumed set the spurious flag when sending
+the EOI for an interrupt.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/common.h    |   15 ++++++++
+ drivers/net/xen-netback/interface.c |   61 ++++++++++++++++++++++++++++++------
+ drivers/net/xen-netback/netback.c   |   11 +++++-
+ drivers/net/xen-netback/rx.c        |   13 +++++--
+ 4 files changed, 86 insertions(+), 14 deletions(-)
+
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data
+       char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
+       struct xenvif *vif; /* Parent VIF */
++      /*
++       * TX/RX common EOI handling.
++       * When feature-split-event-channels = 0, interrupt handler sets
++       * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set
++       * by the RX and TX interrupt handlers.
++       * RX and TX handler threads will issue an EOI when either
++       * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or
++       * NETBK_TX_EOI) are set and they will reset those bits.
++       */
++      atomic_t eoi_pending;
++#define NETBK_RX_EOI          0x01
++#define NETBK_TX_EOI          0x02
++#define NETBK_COMMON_EOI      0x04
++
+       /* Use NAPI for guest TX */
+       struct napi_struct napi;
+       /* When feature-split-event-channels = 0, tx_irq = rx_irq. */
+@@ -356,6 +370,7 @@ int xenvif_dealloc_kthread(void *data);
+ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
++bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
+ void xenvif_rx_action(struct xenvif_queue *queue);
+ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vi
+               !vif->disabled;
+ }
++static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue)
++{
++      bool rc;
++
++      rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
++      if (rc)
++              napi_schedule(&queue->napi);
++      return rc;
++}
++
+ static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
+ {
+       struct xenvif_queue *queue = dev_id;
++      int old;
+-      if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))
+-              napi_schedule(&queue->napi);
++      old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending);
++      WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n");
++
++      if (!xenvif_handle_tx_interrupt(queue)) {
++              atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending);
++              xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
++      }
+       return IRQ_HANDLED;
+ }
+@@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struc
+       return work_done;
+ }
++static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue)
++{
++      bool rc;
++
++      rc = xenvif_have_rx_work(queue, false);
++      if (rc)
++              xenvif_kick_thread(queue);
++      return rc;
++}
++
+ static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
+ {
+       struct xenvif_queue *queue = dev_id;
++      int old;
+-      xenvif_kick_thread(queue);
++      old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending);
++      WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n");
++
++      if (!xenvif_handle_rx_interrupt(queue)) {
++              atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending);
++              xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
++      }
+       return IRQ_HANDLED;
+ }
+ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
+ {
+-      xenvif_tx_interrupt(irq, dev_id);
+-      xenvif_rx_interrupt(irq, dev_id);
++      struct xenvif_queue *queue = dev_id;
++      int old;
++
++      old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending);
++      WARN(old, "Interrupt while EOI pending\n");
++
++      /* Use bitwise or as we need to call both functions. */
++      if ((!xenvif_handle_tx_interrupt(queue) |
++           !xenvif_handle_rx_interrupt(queue))) {
++              atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending);
++              xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
++      }
+       return IRQ_HANDLED;
+ }
+@@ -595,7 +638,7 @@ int xenvif_connect_ctrl(struct xenvif *v
+       shared = (struct xen_netif_ctrl_sring *)addr;
+       BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE);
+-      err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn);
++      err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn);
+       if (err < 0)
+               goto err_unmap;
+@@ -653,7 +696,7 @@ int xenvif_connect_data(struct xenvif_qu
+       if (tx_evtchn == rx_evtchn) {
+               /* feature-split-event-channels == 0 */
+-              err = bind_interdomain_evtchn_to_irqhandler(
++              err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+                       queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
+                       queue->name, queue);
+               if (err < 0)
+@@ -664,7 +707,7 @@ int xenvif_connect_data(struct xenvif_qu
+               /* feature-split-event-channels == 1 */
+               snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
+                        "%s-tx", queue->name);
+-              err = bind_interdomain_evtchn_to_irqhandler(
++              err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+                       queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
+                       queue->tx_irq_name, queue);
+               if (err < 0)
+@@ -674,7 +717,7 @@ int xenvif_connect_data(struct xenvif_qu
+               snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
+                        "%s-rx", queue->name);
+-              err = bind_interdomain_evtchn_to_irqhandler(
++              err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+                       queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
+                       queue->rx_irq_name, queue);
+               if (err < 0)
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -162,6 +162,10 @@ void xenvif_napi_schedule_or_enable_even
+       if (more_to_do)
+               napi_schedule(&queue->napi);
++      else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI,
++                                   &queue->eoi_pending) &
++               (NETBK_TX_EOI | NETBK_COMMON_EOI))
++              xen_irq_lateeoi(queue->tx_irq, 0);
+ }
+ static void tx_add_credit(struct xenvif_queue *queue)
+@@ -1615,9 +1619,14 @@ static bool xenvif_ctrl_work_todo(struct
+ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
+ {
+       struct xenvif *vif = data;
++      unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
+-      while (xenvif_ctrl_work_todo(vif))
++      while (xenvif_ctrl_work_todo(vif)) {
+               xenvif_ctrl_action(vif);
++              eoi_flag = 0;
++      }
++
++      xen_irq_lateeoi(irq, eoi_flag);
+       return IRQ_HANDLED;
+ }
+--- a/drivers/net/xen-netback/rx.c
++++ b/drivers/net/xen-netback/rx.c
+@@ -490,13 +490,13 @@ static bool xenvif_rx_queue_ready(struct
+       return queue->stalled && prod - cons >= 1;
+ }
+-static bool xenvif_have_rx_work(struct xenvif_queue *queue)
++bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
+ {
+       return xenvif_rx_ring_slots_available(queue) ||
+               (queue->vif->stall_timeout &&
+                (xenvif_rx_queue_stalled(queue) ||
+                 xenvif_rx_queue_ready(queue))) ||
+-              kthread_should_stop() ||
++              (test_kthread && kthread_should_stop()) ||
+               queue->vif->disabled;
+ }
+@@ -527,15 +527,20 @@ static void xenvif_wait_for_rx_work(stru
+ {
+       DEFINE_WAIT(wait);
+-      if (xenvif_have_rx_work(queue))
++      if (xenvif_have_rx_work(queue, true))
+               return;
+       for (;;) {
+               long ret;
+               prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
+-              if (xenvif_have_rx_work(queue))
++              if (xenvif_have_rx_work(queue, true))
+                       break;
++              if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI,
++                                      &queue->eoi_pending) &
++                  (NETBK_RX_EOI | NETBK_COMMON_EOI))
++                      xen_irq_lateeoi(queue->rx_irq, 0);
++
+               ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
+               if (!ret)
+                       break;
diff --git a/queue-4.14/xen-pciback-use-lateeoi-irq-binding.patch b/queue-4.14/xen-pciback-use-lateeoi-irq-binding.patch
new file mode 100644 (file)
index 0000000..a95440b
--- /dev/null
@@ -0,0 +1,227 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:07 +0100
+Subject: xen/pciback: use lateeoi irq binding
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-11-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit c2711441bc961b37bba0615dd7135857d189035f upstream.
+
+In order to reduce the chance for the system becoming unresponsive due
+to event storms triggered by a misbehaving pcifront use the lateeoi irq
+binding for pciback and unmask the event channel only just before
+leaving the event handling function.
+
+Restructure the handling to support that scheme. Basically an event can
+come in for two reasons: either a normal request for a pciback action,
+which is handled in a worker, or in case the guest has finished an AER
+request which was requested by pciback.
+
+When an AER request is issued to the guest and a normal pciback action
+is currently active issue an EOI early in order to be able to receive
+another event when the AER request has been finished by the guest.
+
+Let the worker processing the normal requests run until no further
+request is pending, instead of starting a new worker ion that case.
+Issue the EOI only just before leaving the worker.
+
+This scheme allows to drop calling the generic function
+xen_pcibk_test_and_schedule_op() after processing of any request as
+the handling of both request types is now separated more cleanly.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/xen-pciback/pci_stub.c    |   14 ++++-----
+ drivers/xen/xen-pciback/pciback.h     |   12 +++++++-
+ drivers/xen/xen-pciback/pciback_ops.c |   48 ++++++++++++++++++++++++++--------
+ drivers/xen/xen-pciback/xenbus.c      |    2 -
+ 4 files changed, 56 insertions(+), 20 deletions(-)
+
+--- a/drivers/xen/xen-pciback/pci_stub.c
++++ b/drivers/xen/xen-pciback/pci_stub.c
+@@ -733,10 +733,17 @@ static pci_ers_result_t common_process(s
+       wmb();
+       notify_remote_via_irq(pdev->evtchn_irq);
++      /* Enable IRQ to signal "request done". */
++      xen_pcibk_lateeoi(pdev, 0);
++
+       ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
+                                !(test_bit(_XEN_PCIB_active, (unsigned long *)
+                                &sh_info->flags)), 300*HZ);
++      /* Enable IRQ for pcifront request if not already active. */
++      if (!test_bit(_PDEVF_op_active, &pdev->flags))
++              xen_pcibk_lateeoi(pdev, 0);
++
+       if (!ret) {
+               if (test_bit(_XEN_PCIB_active,
+                       (unsigned long *)&sh_info->flags)) {
+@@ -750,13 +757,6 @@ static pci_ers_result_t common_process(s
+       }
+       clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
+-      if (test_bit(_XEN_PCIF_active,
+-              (unsigned long *)&sh_info->flags)) {
+-              dev_dbg(&psdev->dev->dev,
+-                      "schedule pci_conf service in " DRV_NAME "\n");
+-              xen_pcibk_test_and_schedule_op(psdev->pdev);
+-      }
+-
+       res = (pci_ers_result_t)aer_op->err;
+       return res;
+ }
+--- a/drivers/xen/xen-pciback/pciback.h
++++ b/drivers/xen/xen-pciback/pciback.h
+@@ -14,6 +14,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/workqueue.h>
+ #include <linux/atomic.h>
++#include <xen/events.h>
+ #include <xen/interface/io/pciif.h>
+ #define DRV_NAME      "xen-pciback"
+@@ -27,6 +28,8 @@ struct pci_dev_entry {
+ #define PDEVF_op_active               (1<<(_PDEVF_op_active))
+ #define _PCIB_op_pending      (1)
+ #define PCIB_op_pending               (1<<(_PCIB_op_pending))
++#define _EOI_pending          (2)
++#define EOI_pending           (1<<(_EOI_pending))
+ struct xen_pcibk_device {
+       void *pci_dev_data;
+@@ -182,12 +185,17 @@ static inline void xen_pcibk_release_dev
+ irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
+ void xen_pcibk_do_op(struct work_struct *data);
++static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
++                                   unsigned int eoi_flag)
++{
++      if (test_and_clear_bit(_EOI_pending, &pdev->flags))
++              xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
++}
++
+ int xen_pcibk_xenbus_register(void);
+ void xen_pcibk_xenbus_unregister(void);
+ extern int verbose_request;
+-
+-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
+ #endif
+ /* Handles shared IRQs that can to device domain and control domain. */
+--- a/drivers/xen/xen-pciback/pciback_ops.c
++++ b/drivers/xen/xen-pciback/pciback_ops.c
+@@ -297,26 +297,41 @@ int xen_pcibk_disable_msix(struct xen_pc
+       return 0;
+ }
+ #endif
++
++static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
++{
++      return test_bit(_XEN_PCIF_active,
++                      (unsigned long *)&pdev->sh_info->flags) &&
++             !test_and_set_bit(_PDEVF_op_active, &pdev->flags);
++}
++
+ /*
+ * Now the same evtchn is used for both pcifront conf_read_write request
+ * as well as pcie aer front end ack. We use a new work_queue to schedule
+ * xen_pcibk conf_read_write service for avoiding confict with aer_core
+ * do_recovery job which also use the system default work_queue
+ */
+-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
++static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
+ {
++      bool eoi = true;
++
+       /* Check that frontend is requesting an operation and that we are not
+        * already processing a request */
+-      if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
+-          && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
++      if (xen_pcibk_test_op_pending(pdev)) {
+               schedule_work(&pdev->op_work);
++              eoi = false;
+       }
+       /*_XEN_PCIB_active should have been cleared by pcifront. And also make
+       sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
+       if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
+           && test_bit(_PCIB_op_pending, &pdev->flags)) {
+               wake_up(&xen_pcibk_aer_wait_queue);
++              eoi = false;
+       }
++
++      /* EOI if there was nothing to do. */
++      if (eoi)
++              xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
+ }
+ /* Performing the configuration space reads/writes must not be done in atomic
+@@ -324,10 +339,8 @@ void xen_pcibk_test_and_schedule_op(stru
+  * use of semaphores). This function is intended to be called from a work
+  * queue in process context taking a struct xen_pcibk_device as a parameter */
+-void xen_pcibk_do_op(struct work_struct *data)
++static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
+ {
+-      struct xen_pcibk_device *pdev =
+-              container_of(data, struct xen_pcibk_device, op_work);
+       struct pci_dev *dev;
+       struct xen_pcibk_dev_data *dev_data = NULL;
+       struct xen_pci_op *op = &pdev->op;
+@@ -400,16 +413,31 @@ void xen_pcibk_do_op(struct work_struct
+       smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
+       clear_bit(_PDEVF_op_active, &pdev->flags);
+       smp_mb__after_atomic(); /* /before/ final check for work */
++}
+-      /* Check to see if the driver domain tried to start another request in
+-       * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
+-      */
+-      xen_pcibk_test_and_schedule_op(pdev);
++void xen_pcibk_do_op(struct work_struct *data)
++{
++      struct xen_pcibk_device *pdev =
++              container_of(data, struct xen_pcibk_device, op_work);
++
++      do {
++              xen_pcibk_do_one_op(pdev);
++      } while (xen_pcibk_test_op_pending(pdev));
++
++      xen_pcibk_lateeoi(pdev, 0);
+ }
+ irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
+ {
+       struct xen_pcibk_device *pdev = dev_id;
++      bool eoi;
++
++      /* IRQs might come in before pdev->evtchn_irq is written. */
++      if (unlikely(pdev->evtchn_irq != irq))
++              pdev->evtchn_irq = irq;
++
++      eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
++      WARN(eoi, "IRQ while EOI pending\n");
+       xen_pcibk_test_and_schedule_op(pdev);
+--- a/drivers/xen/xen-pciback/xenbus.c
++++ b/drivers/xen/xen-pciback/xenbus.c
+@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xe
+       pdev->sh_info = vaddr;
+-      err = bind_interdomain_evtchn_to_irqhandler(
++      err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+               pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
+               0, DRV_NAME, pdev);
+       if (err < 0) {
diff --git a/queue-4.14/xen-pvcallsback-use-lateeoi-irq-binding.patch b/queue-4.14/xen-pvcallsback-use-lateeoi-irq-binding.patch
new file mode 100644 (file)
index 0000000..7723a65
--- /dev/null
@@ -0,0 +1,231 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:06 +0100
+Subject: xen/pvcallsback: use lateeoi irq binding
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-10-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit c8d647a326f06a39a8e5f0f1af946eacfa1835f8 upstream.
+
+In order to reduce the chance for the system becoming unresponsive due
+to event storms triggered by a misbehaving pvcallsfront use the lateeoi
+irq binding for pvcallsback and unmask the event channel only after
+handling all write requests, which are the ones coming in via an irq.
+
+This requires modifying the logic a little bit to not require an event
+for each write request, but to keep the ioworker running until no
+further data is found on the ring page to be processed.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/pvcalls-back.c |   76 +++++++++++++++++++++++++++------------------
+ 1 file changed, 46 insertions(+), 30 deletions(-)
+
+--- a/drivers/xen/pvcalls-back.c
++++ b/drivers/xen/pvcalls-back.c
+@@ -75,6 +75,7 @@ struct sock_mapping {
+       atomic_t write;
+       atomic_t io;
+       atomic_t release;
++      atomic_t eoi;
+       void (*saved_data_ready)(struct sock *sk);
+       struct pvcalls_ioworker ioworker;
+ };
+@@ -96,7 +97,7 @@ static int pvcalls_back_release_active(s
+                                      struct pvcalls_fedata *fedata,
+                                      struct sock_mapping *map);
+-static void pvcalls_conn_back_read(void *opaque)
++static bool pvcalls_conn_back_read(void *opaque)
+ {
+       struct sock_mapping *map = (struct sock_mapping *)opaque;
+       struct msghdr msg;
+@@ -116,17 +117,17 @@ static void pvcalls_conn_back_read(void
+       virt_mb();
+       if (error)
+-              return;
++              return false;
+       size = pvcalls_queued(prod, cons, array_size);
+       if (size >= array_size)
+-              return;
++              return false;
+       spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
+       if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
+               atomic_set(&map->read, 0);
+               spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
+                               flags);
+-              return;
++              return true;
+       }
+       spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
+       wanted = array_size - size;
+@@ -154,7 +155,7 @@ static void pvcalls_conn_back_read(void
+       ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
+       WARN_ON(ret > wanted);
+       if (ret == -EAGAIN) /* shouldn't happen */
+-              return;
++              return true;
+       if (!ret)
+               ret = -ENOTCONN;
+       spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
+@@ -173,10 +174,10 @@ static void pvcalls_conn_back_read(void
+       virt_wmb();
+       notify_remote_via_irq(map->irq);
+-      return;
++      return true;
+ }
+-static void pvcalls_conn_back_write(struct sock_mapping *map)
++static bool pvcalls_conn_back_write(struct sock_mapping *map)
+ {
+       struct pvcalls_data_intf *intf = map->ring;
+       struct pvcalls_data *data = &map->data;
+@@ -193,7 +194,7 @@ static void pvcalls_conn_back_write(stru
+       array_size = XEN_FLEX_RING_SIZE(map->ring_order);
+       size = pvcalls_queued(prod, cons, array_size);
+       if (size == 0)
+-              return;
++              return false;
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_flags |= MSG_DONTWAIT;
+@@ -215,12 +216,11 @@ static void pvcalls_conn_back_write(stru
+       atomic_set(&map->write, 0);
+       ret = inet_sendmsg(map->sock, &msg, size);
+-      if (ret == -EAGAIN || (ret >= 0 && ret < size)) {
++      if (ret == -EAGAIN) {
+               atomic_inc(&map->write);
+               atomic_inc(&map->io);
++              return true;
+       }
+-      if (ret == -EAGAIN)
+-              return;
+       /* write the data, then update the indexes */
+       virt_wmb();
+@@ -233,9 +233,13 @@ static void pvcalls_conn_back_write(stru
+       }
+       /* update the indexes, then notify the other end */
+       virt_wmb();
+-      if (prod != cons + ret)
++      if (prod != cons + ret) {
+               atomic_inc(&map->write);
++              atomic_inc(&map->io);
++      }
+       notify_remote_via_irq(map->irq);
++
++      return true;
+ }
+ static void pvcalls_back_ioworker(struct work_struct *work)
+@@ -244,6 +248,7 @@ static void pvcalls_back_ioworker(struct
+               struct pvcalls_ioworker, register_work);
+       struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
+               ioworker);
++      unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+       while (atomic_read(&map->io) > 0) {
+               if (atomic_read(&map->release) > 0) {
+@@ -251,10 +256,18 @@ static void pvcalls_back_ioworker(struct
+                       return;
+               }
+-              if (atomic_read(&map->read) > 0)
+-                      pvcalls_conn_back_read(map);
+-              if (atomic_read(&map->write) > 0)
+-                      pvcalls_conn_back_write(map);
++              if (atomic_read(&map->read) > 0 &&
++                  pvcalls_conn_back_read(map))
++                      eoi_flags = 0;
++              if (atomic_read(&map->write) > 0 &&
++                  pvcalls_conn_back_write(map))
++                      eoi_flags = 0;
++
++              if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) {
++                      atomic_set(&map->eoi, 0);
++                      xen_irq_lateeoi(map->irq, eoi_flags);
++                      eoi_flags = XEN_EOI_FLAG_SPURIOUS;
++              }
+               atomic_dec(&map->io);
+       }
+@@ -351,12 +364,9 @@ static struct sock_mapping *pvcalls_new_
+               goto out;
+       map->bytes = page;
+-      ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
+-                                                  evtchn,
+-                                                  pvcalls_back_conn_event,
+-                                                  0,
+-                                                  "pvcalls-backend",
+-                                                  map);
++      ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
++                      fedata->dev->otherend_id, evtchn,
++                      pvcalls_back_conn_event, 0, "pvcalls-backend", map);
+       if (ret < 0)
+               goto out;
+       map->irq = ret;
+@@ -890,15 +900,18 @@ static irqreturn_t pvcalls_back_event(in
+ {
+       struct xenbus_device *dev = dev_id;
+       struct pvcalls_fedata *fedata = NULL;
++      unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+-      if (dev == NULL)
+-              return IRQ_HANDLED;
++      if (dev) {
++              fedata = dev_get_drvdata(&dev->dev);
++              if (fedata) {
++                      pvcalls_back_work(fedata);
++                      eoi_flags = 0;
++              }
++      }
+-      fedata = dev_get_drvdata(&dev->dev);
+-      if (fedata == NULL)
+-              return IRQ_HANDLED;
++      xen_irq_lateeoi(irq, eoi_flags);
+-      pvcalls_back_work(fedata);
+       return IRQ_HANDLED;
+ }
+@@ -908,12 +921,15 @@ static irqreturn_t pvcalls_back_conn_eve
+       struct pvcalls_ioworker *iow;
+       if (map == NULL || map->sock == NULL || map->sock->sk == NULL ||
+-              map->sock->sk->sk_user_data != map)
++              map->sock->sk->sk_user_data != map) {
++              xen_irq_lateeoi(irq, 0);
+               return IRQ_HANDLED;
++      }
+       iow = &map->ioworker;
+       atomic_inc(&map->write);
++      atomic_inc(&map->eoi);
+       atomic_inc(&map->io);
+       queue_work(iow->wq, &iow->register_work);
+@@ -948,7 +964,7 @@ static int backend_connect(struct xenbus
+               goto error;
+       }
+-      err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
++      err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn);
+       if (err < 0)
+               goto error;
+       fedata->irq = err;
diff --git a/queue-4.14/xen-scsiback-use-lateeoi-irq-binding.patch b/queue-4.14/xen-scsiback-use-lateeoi-irq-binding.patch
new file mode 100644 (file)
index 0000000..17519ab
--- /dev/null
@@ -0,0 +1,107 @@
+From foo@baz Tue Nov 17 12:38:50 PM CET 2020
+From: Juergen Gross <jgross@suse.com>
+Date: Tue,  3 Nov 2020 15:29:05 +0100
+Subject: xen/scsiback: use lateeoi irq binding
+To: stable@vger.kernel.org
+Message-ID: <20201103142911.21980-9-jgross@suse.com>
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 86991b6e7ea6c613b7692f65106076943449b6b7 upstream.
+
+In order to reduce the chance for the system becoming unresponsive due
+to event storms triggered by a misbehaving scsifront use the lateeoi
+irq binding for scsiback and unmask the event channel only just before
+leaving the event handling function.
+
+In case of a ring protocol error don't issue an EOI in order to avoid
+the possibility to use that for producing an event storm. This at once
+will result in no further call of scsiback_irq_fn(), so the ring_error
+struct member can be dropped and scsiback_do_cmd_fn() can signal the
+protocol error via a negative return value.
+
+This is part of XSA-332.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Wei Liu <wl@xen.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/xen-scsiback.c |   23 +++++++++++++----------
+ 1 file changed, 13 insertions(+), 10 deletions(-)
+
+--- a/drivers/xen/xen-scsiback.c
++++ b/drivers/xen/xen-scsiback.c
+@@ -91,7 +91,6 @@ struct vscsibk_info {
+       unsigned int irq;
+       struct vscsiif_back_ring ring;
+-      int ring_error;
+       spinlock_t ring_lock;
+       atomic_t nr_unreplied_reqs;
+@@ -721,7 +720,8 @@ static struct vscsibk_pend *prepare_pend
+       return pending_req;
+ }
+-static int scsiback_do_cmd_fn(struct vscsibk_info *info)
++static int scsiback_do_cmd_fn(struct vscsibk_info *info,
++                            unsigned int *eoi_flags)
+ {
+       struct vscsiif_back_ring *ring = &info->ring;
+       struct vscsiif_request ring_req;
+@@ -738,11 +738,12 @@ static int scsiback_do_cmd_fn(struct vsc
+               rc = ring->rsp_prod_pvt;
+               pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
+                          info->domid, rp, rc, rp - rc);
+-              info->ring_error = 1;
+-              return 0;
++              return -EINVAL;
+       }
+       while ((rc != rp)) {
++              *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
++
+               if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
+                       break;
+@@ -801,13 +802,16 @@ static int scsiback_do_cmd_fn(struct vsc
+ static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
+ {
+       struct vscsibk_info *info = dev_id;
++      int rc;
++      unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+-      if (info->ring_error)
+-              return IRQ_HANDLED;
+-
+-      while (scsiback_do_cmd_fn(info))
++      while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0)
+               cond_resched();
++      /* In case of a ring error we keep the event channel masked. */
++      if (!rc)
++              xen_irq_lateeoi(irq, eoi_flags);
++
+       return IRQ_HANDLED;
+ }
+@@ -828,7 +832,7 @@ static int scsiback_init_sring(struct vs
+       sring = (struct vscsiif_sring *)area;
+       BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
+-      err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
++      err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn);
+       if (err < 0)
+               goto unmap_page;
+@@ -1251,7 +1255,6 @@ static int scsiback_probe(struct xenbus_
+       info->domid = dev->otherend_id;
+       spin_lock_init(&info->ring_lock);
+-      info->ring_error = 0;
+       atomic_set(&info->nr_unreplied_reqs, 0);
+       init_waitqueue_head(&info->waiting_to_free);
+       info->dev = dev;