]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 27 Jun 2021 14:27:15 +0000 (16:27 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 27 Jun 2021 14:27:15 +0000 (16:27 +0200)
added patches:
ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
i2c-robotfuzz-osif-fix-control-request-directions.patch
kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
s390-clear-pt_regs-flags-on-irq-entry.patch
s390-fix-system-call-restart-with-multiple-signals.patch
s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
s390-topology-clear-thread-group-maps-for-offline-cpus.patch
x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
xen-events-reset-active-flag-for-lateeoi-events-later.patch

13 files changed:
queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch [new file with mode: 0644]
queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch [new file with mode: 0644]
queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch [new file with mode: 0644]
queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch [new file with mode: 0644]
queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch [new file with mode: 0644]
queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch [new file with mode: 0644]
queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch [new file with mode: 0644]
queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch [new file with mode: 0644]
queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch [new file with mode: 0644]
queue-5.12/series
queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch [new file with mode: 0644]
queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch [new file with mode: 0644]
queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch [new file with mode: 0644]

diff --git a/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch b/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
new file mode 100644 (file)
index 0000000..efa5c74
--- /dev/null
@@ -0,0 +1,54 @@
+From 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Tue, 1 Jun 2021 09:40:25 -0400
+Subject: ceph: must hold snap_rwsem when filling inode for async create
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 upstream.
+
+...and add a lockdep assertion for it to ceph_fill_inode().
+
+Cc: stable@vger.kernel.org # v5.7+
+Fixes: 9a8d03ca2e2c3 ("ceph: attempt to do async create when possible")
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/file.c  |    3 +++
+ fs/ceph/inode.c |    2 ++
+ 2 files changed, 5 insertions(+)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -578,6 +578,7 @@ static int ceph_finish_async_create(stru
+       struct ceph_inode_info *ci = ceph_inode(dir);
+       struct inode *inode;
+       struct timespec64 now;
++      struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+       struct ceph_vino vino = { .ino = req->r_deleg_ino,
+                                 .snap = CEPH_NOSNAP };
+@@ -615,8 +616,10 @@ static int ceph_finish_async_create(stru
+       ceph_file_layout_to_legacy(lo, &in.layout);
++      down_read(&mdsc->snap_rwsem);
+       ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
+                             req->r_fmode, NULL);
++      up_read(&mdsc->snap_rwsem);
+       if (ret) {
+               dout("%s failed to fill inode: %d\n", __func__, ret);
+               ceph_dir_clear_complete(dir);
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode,
+       bool new_version = false;
+       bool fill_inline = false;
++      lockdep_assert_held(&mdsc->snap_rwsem);
++
+       dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
+            inode, ceph_vinop(inode), le64_to_cpu(info->version),
+            ci->i_version);
diff --git a/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch b/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch
new file mode 100644 (file)
index 0000000..dfaa7b6
--- /dev/null
@@ -0,0 +1,50 @@
+From 4ca070ef0dd885616ef294d269a9bf8e3b258e1a Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Mon, 24 May 2021 11:09:12 +0200
+Subject: i2c: robotfuzz-osif: fix control-request directions
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 4ca070ef0dd885616ef294d269a9bf8e3b258e1a upstream.
+
+The direction of the pipe argument must match the request-type direction
+bit or control requests may fail depending on the host-controller-driver
+implementation.
+
+Control transfers without a data stage are treated as OUT requests by
+the USB stack and should be using usb_sndctrlpipe(). Failing to do so
+will now trigger a warning.
+
+Fix the OSIFI2C_SET_BIT_RATE and OSIFI2C_STOP requests which erroneously
+used the osif_usb_read() helper and set the IN direction bit.
+
+Reported-by: syzbot+9d7dadd15b8819d73f41@syzkaller.appspotmail.com
+Fixes: 83e53a8f120f ("i2c: Add bus driver for for OSIF USB i2c device.")
+Cc: stable@vger.kernel.org      # 3.14
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-robotfuzz-osif.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c
++++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c
+@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter
+                       }
+               }
+-              ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
++              ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
+               if (ret) {
+                       dev_err(&adapter->dev, "failure sending STOP\n");
+                       return -EREMOTEIO;
+@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interfa
+        * Set bus frequency. The frequency is:
+        * 120,000,000 / ( 16 + 2 * div * 4^prescale).
+        * Using dev = 52, prescale = 0 give 100KHz */
+-      ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
++      ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
+                           NULL, 0);
+       if (ret) {
+               dev_err(&interface->dev, "failure sending bit rate");
diff --git a/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch b/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
new file mode 100644 (file)
index 0000000..c167187
--- /dev/null
@@ -0,0 +1,181 @@
+From 5fa54346caf67b4b1b10b1f390316ae466da4d53 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Thu, 24 Jun 2021 18:39:48 -0700
+Subject: kthread: prevent deadlock when kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync()
+
+From: Petr Mladek <pmladek@suse.com>
+
+commit 5fa54346caf67b4b1b10b1f390316ae466da4d53 upstream.
+
+The system might hang with the following backtrace:
+
+       schedule+0x80/0x100
+       schedule_timeout+0x48/0x138
+       wait_for_common+0xa4/0x134
+       wait_for_completion+0x1c/0x2c
+       kthread_flush_work+0x114/0x1cc
+       kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144
+       kthread_cancel_delayed_work_sync+0x18/0x2c
+       xxxx_pm_notify+0xb0/0xd8
+       blocking_notifier_call_chain_robust+0x80/0x194
+       pm_notifier_call_chain_robust+0x28/0x4c
+       suspend_prepare+0x40/0x260
+       enter_state+0x80/0x3f4
+       pm_suspend+0x60/0xdc
+       state_store+0x108/0x144
+       kobj_attr_store+0x38/0x88
+       sysfs_kf_write+0x64/0xc0
+       kernfs_fop_write_iter+0x108/0x1d0
+       vfs_write+0x2f4/0x368
+       ksys_write+0x7c/0xec
+
+It is caused by the following race between kthread_mod_delayed_work()
+and kthread_cancel_delayed_work_sync():
+
+CPU0                           CPU1
+
+Context: Thread A              Context: Thread B
+
+kthread_mod_delayed_work()
+  spin_lock()
+  __kthread_cancel_work()
+     spin_unlock()
+     del_timer_sync()
+                               kthread_cancel_delayed_work_sync()
+                                 spin_lock()
+                                 __kthread_cancel_work()
+                                   spin_unlock()
+                                   del_timer_sync()
+                                   spin_lock()
+
+                                 work->canceling++
+                                 spin_unlock
+     spin_lock()
+   queue_delayed_work()
+     // dwork is put into the worker->delayed_work_list
+
+   spin_unlock()
+
+                                 kthread_flush_work()
+     // flush_work is put at the tail of the dwork
+
+                                   wait_for_completion()
+
+Context: IRQ
+
+  kthread_delayed_work_timer_fn()
+    spin_lock()
+    list_del_init(&work->node);
+    spin_unlock()
+
+BANG: flush_work is not longer linked and will never get proceed.
+
+The problem is that kthread_mod_delayed_work() checks work->canceling
+flag before canceling the timer.
+
+A simple solution is to (re)check work->canceling after
+__kthread_cancel_work().  But then it is not clear what should be
+returned when __kthread_cancel_work() removed the work from the queue
+(list) and it can't queue it again with the new @delay.
+
+The return value might be used for reference counting.  The caller has
+to know whether a new work has been queued or an existing one was
+replaced.
+
+The proper solution is that kthread_mod_delayed_work() will remove the
+work from the queue (list) _only_ when work->canceling is not set.  The
+flag must be checked after the timer is stopped and the remaining
+operations can be done under worker->lock.
+
+Note that kthread_mod_delayed_work() could remove the timer and then
+bail out.  It is fine.  The other canceling caller needs to cancel the
+timer as well.  The important thing is that the queue (list)
+manipulation is done atomically under worker->lock.
+
+Link: https://lkml.kernel.org/r/20210610133051.15337-3-pmladek@suse.com
+Fixes: 9a6b06c8d9a220860468a ("kthread: allow to modify delayed kthread work")
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Reported-by: Martin Liu <liumartin@google.com>
+Cc: <jenhaochen@google.com>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c |   35 ++++++++++++++++++++++++-----------
+ 1 file changed, 24 insertions(+), 11 deletions(-)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -1119,8 +1119,11 @@ static void kthread_cancel_delayed_work_
+ }
+ /*
+- * This function removes the work from the worker queue. Also it makes sure
+- * that it won't get queued later via the delayed work's timer.
++ * This function removes the work from the worker queue.
++ *
++ * It is called under worker->lock. The caller must make sure that
++ * the timer used by delayed work is not running, e.g. by calling
++ * kthread_cancel_delayed_work_timer().
+  *
+  * The work might still be in use when this function finishes. See the
+  * current_work proceed by the worker.
+@@ -1128,13 +1131,8 @@ static void kthread_cancel_delayed_work_
+  * Return: %true if @work was pending and successfully canceled,
+  *    %false if @work was not pending
+  */
+-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
+-                                unsigned long *flags)
++static bool __kthread_cancel_work(struct kthread_work *work)
+ {
+-      /* Try to cancel the timer if exists. */
+-      if (is_dwork)
+-              kthread_cancel_delayed_work_timer(work, flags);
+-
+       /*
+        * Try to remove the work from a worker list. It might either
+        * be from worker->work_list or from worker->delayed_work_list.
+@@ -1187,11 +1185,23 @@ bool kthread_mod_delayed_work(struct kth
+       /* Work must not be used with >1 worker, see kthread_queue_work() */
+       WARN_ON_ONCE(work->worker != worker);
+-      /* Do not fight with another command that is canceling this work. */
++      /*
++       * Temporary cancel the work but do not fight with another command
++       * that is canceling the work as well.
++       *
++       * It is a bit tricky because of possible races with another
++       * mod_delayed_work() and cancel_delayed_work() callers.
++       *
++       * The timer must be canceled first because worker->lock is released
++       * when doing so. But the work can be removed from the queue (list)
++       * only when it can be queued again so that the return value can
++       * be used for reference counting.
++       */
++      kthread_cancel_delayed_work_timer(work, &flags);
+       if (work->canceling)
+               goto out;
++      ret = __kthread_cancel_work(work);
+-      ret = __kthread_cancel_work(work, true, &flags);
+ fast_queue:
+       __kthread_queue_delayed_work(worker, dwork, delay);
+ out:
+@@ -1213,7 +1223,10 @@ static bool __kthread_cancel_work_sync(s
+       /* Work must not be used with >1 worker, see kthread_queue_work(). */
+       WARN_ON_ONCE(work->worker != worker);
+-      ret = __kthread_cancel_work(work, is_dwork, &flags);
++      if (is_dwork)
++              kthread_cancel_delayed_work_timer(work, &flags);
++
++      ret = __kthread_cancel_work(work);
+       if (worker->current_work != work)
+               goto out_fast;
diff --git a/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch b/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
new file mode 100644 (file)
index 0000000..6c741c0
--- /dev/null
@@ -0,0 +1,102 @@
+From 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Thu, 24 Jun 2021 18:39:45 -0700
+Subject: kthread_worker: split code for canceling the delayed work timer
+
+From: Petr Mladek <pmladek@suse.com>
+
+commit 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 upstream.
+
+Patch series "kthread_worker: Fix race between kthread_mod_delayed_work()
+and kthread_cancel_delayed_work_sync()".
+
+This patchset fixes the race between kthread_mod_delayed_work() and
+kthread_cancel_delayed_work_sync() including proper return value
+handling.
+
+This patch (of 2):
+
+Simple code refactoring as a preparation step for fixing a race between
+kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync().
+
+It does not modify the existing behavior.
+
+Link: https://lkml.kernel.org/r/20210610133051.15337-2-pmladek@suse.com
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Cc: <jenhaochen@google.com>
+Cc: Martin Liu <liumartin@google.com>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c |   46 +++++++++++++++++++++++++++++-----------------
+ 1 file changed, 29 insertions(+), 17 deletions(-)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -1092,6 +1092,33 @@ void kthread_flush_work(struct kthread_w
+ EXPORT_SYMBOL_GPL(kthread_flush_work);
+ /*
++ * Make sure that the timer is neither set nor running and could
++ * not manipulate the work list_head any longer.
++ *
++ * The function is called under worker->lock. The lock is temporary
++ * released but the timer can't be set again in the meantime.
++ */
++static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
++                                            unsigned long *flags)
++{
++      struct kthread_delayed_work *dwork =
++              container_of(work, struct kthread_delayed_work, work);
++      struct kthread_worker *worker = work->worker;
++
++      /*
++       * del_timer_sync() must be called to make sure that the timer
++       * callback is not running. The lock must be temporary released
++       * to avoid a deadlock with the callback. In the meantime,
++       * any queuing is blocked by setting the canceling counter.
++       */
++      work->canceling++;
++      raw_spin_unlock_irqrestore(&worker->lock, *flags);
++      del_timer_sync(&dwork->timer);
++      raw_spin_lock_irqsave(&worker->lock, *flags);
++      work->canceling--;
++}
++
++/*
+  * This function removes the work from the worker queue. Also it makes sure
+  * that it won't get queued later via the delayed work's timer.
+  *
+@@ -1105,23 +1132,8 @@ static bool __kthread_cancel_work(struct
+                                 unsigned long *flags)
+ {
+       /* Try to cancel the timer if exists. */
+-      if (is_dwork) {
+-              struct kthread_delayed_work *dwork =
+-                      container_of(work, struct kthread_delayed_work, work);
+-              struct kthread_worker *worker = work->worker;
+-
+-              /*
+-               * del_timer_sync() must be called to make sure that the timer
+-               * callback is not running. The lock must be temporary released
+-               * to avoid a deadlock with the callback. In the meantime,
+-               * any queuing is blocked by setting the canceling counter.
+-               */
+-              work->canceling++;
+-              raw_spin_unlock_irqrestore(&worker->lock, *flags);
+-              del_timer_sync(&dwork->timer);
+-              raw_spin_lock_irqsave(&worker->lock, *flags);
+-              work->canceling--;
+-      }
++      if (is_dwork)
++              kthread_cancel_delayed_work_timer(work, flags);
+       /*
+        * Try to remove the work from a worker list. It might either
diff --git a/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch b/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
new file mode 100644 (file)
index 0000000..78ef42c
--- /dev/null
@@ -0,0 +1,70 @@
+From f8be156be163a052a067306417cd0ff679068c97 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 24 Jun 2021 08:29:04 -0400
+Subject: KVM: do not allow mapping valid but non-reference-counted pages
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit f8be156be163a052a067306417cd0ff679068c97 upstream.
+
+It's possible to create a region which maps valid but non-refcounted
+pages (e.g., tail pages of non-compound higher order allocations). These
+host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family
+of APIs, which take a reference to the page, which takes it from 0 to 1.
+When the reference is dropped, this will free the page incorrectly.
+
+Fix this by only taking a reference on valid pages if it was non-zero,
+which indicates it is participating in normal refcounting (and can be
+released with put_page).
+
+This addresses CVE-2021-22543.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Tested-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1919,6 +1919,13 @@ static bool vma_is_valid(struct vm_area_
+       return true;
+ }
++static int kvm_try_get_pfn(kvm_pfn_t pfn)
++{
++      if (kvm_is_reserved_pfn(pfn))
++              return 1;
++      return get_page_unless_zero(pfn_to_page(pfn));
++}
++
+ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+                              unsigned long addr, bool *async,
+                              bool write_fault, bool *writable,
+@@ -1968,13 +1975,21 @@ static int hva_to_pfn_remapped(struct vm
+        * Whoever called remap_pfn_range is also going to call e.g.
+        * unmap_mapping_range before the underlying pages are freed,
+        * causing a call to our MMU notifier.
++       *
++       * Certain IO or PFNMAP mappings can be backed with valid
++       * struct pages, but be allocated without refcounting e.g.,
++       * tail pages of non-compound higher order allocations, which
++       * would then underflow the refcount when the caller does the
++       * required put_page. Don't allow those pages here.
+        */ 
+-      kvm_get_pfn(pfn);
++      if (!kvm_try_get_pfn(pfn))
++              r = -EFAULT;
+ out:
+       pte_unmap_unlock(ptep, ptl);
+       *p_pfn = pfn;
+-      return 0;
++
++      return r;
+ }
+ /*
diff --git a/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch b/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch
new file mode 100644 (file)
index 0000000..90ecf27
--- /dev/null
@@ -0,0 +1,37 @@
+From ca1f4d702d534387aa1f16379edb3b03cdb6ceda Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Fri, 11 Jun 2021 16:08:18 +0200
+Subject: s390: clear pt_regs::flags on irq entry
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit ca1f4d702d534387aa1f16379edb3b03cdb6ceda upstream.
+
+The current irq entry code doesn't initialize pt_regs::flags. On exit to
+user mode arch_do_signal_or_restart() tests whether PIF_SYSCALL is set,
+which might yield wrong results.
+
+Fix this by clearing pt_regs::flags in the entry.S irq handler
+code.
+
+Reported-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Fixes: 56e62a737028 ("s390: convert to generic entry")
+Cc: <stable@vger.kernel.org> # 5.12
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/entry.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -418,6 +418,7 @@ ENTRY(\name)
+       xgr     %r6,%r6
+       xgr     %r7,%r7
+       xgr     %r10,%r10
++      xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+       mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+       stmg    %r8,%r9,__PT_PSW(%r11)
+       tm      %r8,0x0001              # coming from user space?
diff --git a/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch b/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch
new file mode 100644 (file)
index 0000000..8081288
--- /dev/null
@@ -0,0 +1,44 @@
+From fc66127dc3396338f287c3b494dfbf102547e770 Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Fri, 11 Jun 2021 10:27:51 +0200
+Subject: s390: fix system call restart with multiple signals
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit fc66127dc3396338f287c3b494dfbf102547e770 upstream.
+
+glibc complained with "The futex facility returned an unexpected error
+code.". It turned out that the futex syscall returned -ERESTARTSYS because
+a signal is pending. arch_do_signal_or_restart() restored the syscall
+parameters (nameley regs->gprs[2]) and set PIF_SYSCALL_RESTART. When
+another signal is made pending later in the exit loop
+arch_do_signal_or_restart() is called again. This function clears
+PIF_SYSCALL_RESTART and checks the return code which is set in
+regs->gprs[2]. However, regs->gprs[2] was restored in the previous run
+and no longer contains -ERESTARTSYS, so PIF_SYSCALL_RESTART isn't set
+again and the syscall is skipped.
+
+Fix this by not clearing PIF_SYSCALL_RESTART - it is already cleared in
+__do_syscall() when the syscall is restarted.
+
+Reported-by: Bjoern Walk <bwalk@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Fixes: 56e62a737028 ("s390: convert to generic entry")
+Cc: <stable@vger.kernel.org> # 5.12
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/signal.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/s390/kernel/signal.c
++++ b/arch/s390/kernel/signal.c
+@@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt
+       /* No handlers present - check for system call restart */
+       clear_pt_regs_flag(regs, PIF_SYSCALL);
+-      clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
+       if (current->thread.system_call) {
+               regs->int_code = current->thread.system_call;
+               switch (regs->gprs[2]) {
diff --git a/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch b/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
new file mode 100644 (file)
index 0000000..c86febb
--- /dev/null
@@ -0,0 +1,67 @@
+From 67147e96a332b56c7206238162771d82467f86c0 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Fri, 18 Jun 2021 16:58:47 +0200
+Subject: s390/stack: fix possible register corruption with stack switch helper
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit 67147e96a332b56c7206238162771d82467f86c0 upstream.
+
+The CALL_ON_STACK macro is used to call a C function from inline
+assembly, and therefore must consider the C ABI, which says that only
+registers 6-13, and 15 are non-volatile (restored by the called
+function).
+
+The inline assembly incorrectly marks all registers used to pass
+parameters to the called function as read-only input operands, instead
+of operands that are read and written to. This might result in
+register corruption depending on usage, compiler, and compile options.
+
+Fix this by marking all operands used to pass parameters as read/write
+operands. To keep the code simple even register 6, if used, is marked
+as read-write operand.
+
+Fixes: ff340d2472ec ("s390: add stack switch helper")
+Cc: <stable@kernel.org> # 4.20
+Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/stacktrace.h |   18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/arch/s390/include/asm/stacktrace.h
++++ b/arch/s390/include/asm/stacktrace.h
+@@ -91,12 +91,16 @@ struct stack_frame {
+       CALL_ARGS_4(arg1, arg2, arg3, arg4);                            \
+       register unsigned long r4 asm("6") = (unsigned long)(arg5)
+-#define CALL_FMT_0 "=&d" (r2) :
+-#define CALL_FMT_1 "+&d" (r2) :
+-#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+-#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+-#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+-#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
++/*
++ * To keep this simple mark register 2-6 as being changed (volatile)
++ * by the called function, even though register 6 is saved/nonvolatile.
++ */
++#define CALL_FMT_0 "=&d" (r2)
++#define CALL_FMT_1 "+&d" (r2)
++#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3)
++#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4)
++#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5)
++#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6)
+ #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+ #define CALL_CLOBBER_4 CALL_CLOBBER_5
+@@ -118,7 +122,7 @@ struct stack_frame {
+               "       brasl   14,%[_fn]\n"                            \
+               "       la      15,0(%[_prev])\n"                       \
+               : [_prev] "=&a" (prev), CALL_FMT_##nr                   \
+-                [_stack] "R" (stack),                                 \
++              : [_stack] "R" (stack),                                 \
+                 [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
+                 [_frame] "d" (frame),                                 \
+                 [_fn] "X" (fn) : CALL_CLOBBER_##nr);                  \
diff --git a/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch b/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch
new file mode 100644 (file)
index 0000000..ece1f38
--- /dev/null
@@ -0,0 +1,78 @@
+From 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Tue, 15 Jun 2021 15:05:22 +0200
+Subject: s390/topology: clear thread/group maps for offline cpus
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 upstream.
+
+The current code doesn't clear the thread/group maps for offline
+CPUs. This may cause kernel crashes like the one bewlow in common
+code that assumes if a CPU has sibblings it is online.
+
+Unable to handle kernel pointer dereference in virtual kernel address space
+
+Call Trace:
+ [<000000013a4b8c3c>] blk_mq_map_swqueue+0x10c/0x388
+([<000000013a4b8bcc>] blk_mq_map_swqueue+0x9c/0x388)
+ [<000000013a4b9300>] blk_mq_init_allocated_queue+0x448/0x478
+ [<000000013a4b9416>] blk_mq_init_queue+0x4e/0x90
+ [<000003ff8019d3e6>] loop_add+0x106/0x278 [loop]
+ [<000003ff801b8148>] loop_init+0x148/0x1000 [loop]
+ [<0000000139de4924>] do_one_initcall+0x3c/0x1e0
+ [<0000000139ef449a>] do_init_module+0x6a/0x2a0
+ [<0000000139ef61bc>] __do_sys_finit_module+0xa4/0xc0
+ [<0000000139de9e6e>] do_syscall+0x7e/0xd0
+ [<000000013a8e0aec>] __do_syscall+0xbc/0x110
+ [<000000013a8ee2e8>] system_call+0x78/0xa0
+
+Fixes: 52aeda7accb6 ("s390/topology: remove offline CPUs from CPU topology masks")
+Cc: <stable@kernel.org> # 5.7+
+Reported-by: Marius Hillenbrand <mhillen@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/topology.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kernel/topology.c
++++ b/arch/s390/kernel/topology.c
+@@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst
+ {
+       static cpumask_t mask;
+-      cpumask_copy(&mask, cpumask_of(cpu));
++      cpumask_clear(&mask);
++      if (!cpu_online(cpu))
++              goto out;
++      cpumask_set_cpu(cpu, &mask);
+       switch (topology_mode) {
+       case TOPOLOGY_MODE_HW:
+               while (info) {
+@@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst
+       default:
+               fallthrough;
+       case TOPOLOGY_MODE_SINGLE:
+-              cpumask_copy(&mask, cpumask_of(cpu));
+               break;
+       }
+       cpumask_and(&mask, &mask, cpu_online_mask);
++out:
+       cpumask_copy(dst, &mask);
+ }
+@@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *ds
+       static cpumask_t mask;
+       int i;
+-      cpumask_copy(&mask, cpumask_of(cpu));
++      cpumask_clear(&mask);
++      if (!cpu_online(cpu))
++              goto out;
++      cpumask_set_cpu(cpu, &mask);
+       if (topology_mode != TOPOLOGY_MODE_HW)
+               goto out;
+       cpu -= cpu % (smp_cpu_mtid + 1);
index 7b9a96441cbb3169b538632e245a18dc7d99ee27..4d7f5daa9a60481e7d49f780ea4b1d4aa0c8b777 100644 (file)
@@ -66,3 +66,15 @@ gpiolib-cdev-zero-padding-during-conversion-to-gpiol.patch
 scsi-sd-call-sd_revalidate_disk-for-ioctl-blkrrpart.patch
 software-node-handle-software-node-injection-to-an-e.patch
 nilfs2-fix-memory-leak-in-nilfs_sysfs_delete_device_.patch
+s390-topology-clear-thread-group-maps-for-offline-cpus.patch
+s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
+s390-fix-system-call-restart-with-multiple-signals.patch
+s390-clear-pt_regs-flags-on-irq-entry.patch
+kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
+i2c-robotfuzz-osif-fix-control-request-directions.patch
+ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
+xen-events-reset-active-flag-for-lateeoi-events-later.patch
+kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
+kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
+x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
+x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
diff --git a/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch b/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
new file mode 100644 (file)
index 0000000..83cc50c
--- /dev/null
@@ -0,0 +1,165 @@
+From f9dfb5e390fab2df9f7944bb91e7705aba14cd26 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Jun 2021 16:18:25 +0200
+Subject: x86/fpu: Make init_fpstate correct with optimized XSAVE
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f9dfb5e390fab2df9f7944bb91e7705aba14cd26 upstream.
+
+The XSAVE init code initializes all enabled and supported components with
+XRSTOR(S) to init state. Then it XSAVEs the state of the components back
+into init_fpstate which is used in several places to fill in the init state
+of components.
+
+This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because
+those use the init optimization and skip writing state of components which
+are in init state. So init_fpstate.xsave still contains all zeroes after
+this operation.
+
+There are two ways to solve that:
+
+   1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when
+      XSAVES is enabled because XSAVES uses compacted format.
+
+   2) Save the components which are known to have a non-zero init state by other
+      means.
+
+Looking deeper, #2 is the right thing to do because all components the
+kernel supports have all-zeroes init state except the legacy features (FP,
+SSE). Those cannot be hard coded because the states are not identical on all
+CPUs, but they can be saved with FXSAVE which avoids all conditionals.
+
+Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with
+a BUILD_BUG_ON() which reminds developers to validate that a newly added
+component has all zeroes init state. As a bonus remove the now unused
+copy_xregs_to_kernel_booting() crutch.
+
+The XSAVE and reshuffle method can still be implemented in the unlikely
+case that components are added which have a non-zero init state and no
+other means to save them. For now, FXSAVE is just simple and good enough.
+
+  [ bp: Fix a typo or two in the text. ]
+
+Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/fpu/internal.h |   30 +++++++-------------------
+ arch/x86/kernel/fpu/xstate.c        |   41 +++++++++++++++++++++++++++++++++---
+ 2 files changed, 46 insertions(+), 25 deletions(-)
+
+--- a/arch/x86/include/asm/fpu/internal.h
++++ b/arch/x86/include/asm/fpu/internal.h
+@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel
+               asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
+ }
++static inline void fxsave(struct fxregs_state *fx)
++{
++      if (IS_ENABLED(CONFIG_X86_32))
++              asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
++      else
++              asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
++}
++
+ /* These macros all use (%edi)/(%rdi) as the single memory argument. */
+ #define XSAVE         ".byte " REX_PREFIX "0x0f,0xae,0x27"
+ #define XSAVEOPT      ".byte " REX_PREFIX "0x0f,0xae,0x37"
+@@ -270,28 +278,6 @@ static inline void copy_fxregs_to_kernel
+ /*
+  * This function is called only during boot time when x86 caps are not set
+- * up and alternative can not be used yet.
+- */
+-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
+-{
+-      u64 mask = xfeatures_mask_all;
+-      u32 lmask = mask;
+-      u32 hmask = mask >> 32;
+-      int err;
+-
+-      WARN_ON(system_state != SYSTEM_BOOTING);
+-
+-      if (boot_cpu_has(X86_FEATURE_XSAVES))
+-              XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+-      else
+-              XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
+-
+-      /* We should never fault when copying to a kernel buffer: */
+-      WARN_ON_FPU(err);
+-}
+-
+-/*
+- * This function is called only during boot time when x86 caps are not set
+  * up and alternative can not be used yet.
+  */
+ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -441,12 +441,35 @@ static void __init print_xstate_offset_s
+ }
+ /*
++ * All supported features have either init state all zeros or are
++ * handled in setup_init_fpu() individually. This is an explicit
++ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
++ * newly added supported features at build time and make people
++ * actually look at the init state for the new feature.
++ */
++#define XFEATURES_INIT_FPSTATE_HANDLED                \
++      (XFEATURE_MASK_FP |                     \
++       XFEATURE_MASK_SSE |                    \
++       XFEATURE_MASK_YMM |                    \
++       XFEATURE_MASK_OPMASK |                 \
++       XFEATURE_MASK_ZMM_Hi256 |              \
++       XFEATURE_MASK_Hi16_ZMM  |              \
++       XFEATURE_MASK_PKRU |                   \
++       XFEATURE_MASK_BNDREGS |                \
++       XFEATURE_MASK_BNDCSR |                 \
++       XFEATURE_MASK_PASID)
++
++/*
+  * setup the xstate image representing the init state
+  */
+ static void __init setup_init_fpu_buf(void)
+ {
+       static int on_boot_cpu __initdata = 1;
++      BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
++                    XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
++                   XFEATURES_INIT_FPSTATE_HANDLED);
++
+       WARN_ON_FPU(!on_boot_cpu);
+       on_boot_cpu = 0;
+@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(vo
+       copy_kernel_to_xregs_booting(&init_fpstate.xsave);
+       /*
+-       * Dump the init state again. This is to identify the init state
+-       * of any feature which is not represented by all zero's.
++       * All components are now in init state. Read the state back so
++       * that init_fpstate contains all non-zero init state. This only
++       * works with XSAVE, but not with XSAVEOPT and XSAVES because
++       * those use the init optimization which skips writing data for
++       * components in init state.
++       *
++       * XSAVE could be used, but that would require to reshuffle the
++       * data when XSAVES is available because XSAVES uses xstate
++       * compaction. But doing so is a pointless exercise because most
++       * components have an all zeros init state except for the legacy
++       * ones (FP and SSE). Those can be saved with FXSAVE into the
++       * legacy area. Adding new features requires to ensure that init
++       * state is all zeroes or if not to add the necessary handling
++       * here.
+        */
+-      copy_xregs_to_kernel_booting(&init_fpstate.xsave);
++      fxsave(&init_fpstate.fxsave);
+ }
+ static int xfeature_uncompacted_offset(int xfeature_nr)
diff --git a/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch b/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
new file mode 100644 (file)
index 0000000..79ddb0d
--- /dev/null
@@ -0,0 +1,66 @@
+From 9301982c424a003c0095bf157154a85bf5322bd0 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Jun 2021 16:18:24 +0200
+Subject: x86/fpu: Preserve supervisor states in sanitize_restored_user_xstate()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 9301982c424a003c0095bf157154a85bf5322bd0 upstream.
+
+sanitize_restored_user_xstate() preserves the supervisor states only
+when the fx_only argument is zero, which allows unprivileged user space
+to put supervisor states back into init state.
+
+Preserve them unconditionally.
+
+ [ bp: Fix a typo or two in the text. ]
+
+Fixes: 5d6b6a6f9b5c ("x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210618143444.438635017@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c |   26 ++++++++------------------
+ 1 file changed, 8 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpre
+       if (use_xsave()) {
+               /*
+-               * Note: we don't need to zero the reserved bits in the
+-               * xstate_header here because we either didn't copy them at all,
+-               * or we checked earlier that they aren't set.
++               * Clear all feature bits which are not set in
++               * user_xfeatures and clear all extended features
++               * for fx_only mode.
+                */
++              u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures;
+               /*
+-               * 'user_xfeatures' might have bits clear which are
+-               * set in header->xfeatures. This represents features that
+-               * were in init state prior to a signal delivery, and need
+-               * to be reset back to the init state.  Clear any user
+-               * feature bits which are set in the kernel buffer to get
+-               * them back to the init state.
+-               *
+-               * Supervisor state is unchanged by input from userspace.
+-               * Ensure supervisor state bits stay set and supervisor
+-               * state is not modified.
++               * Supervisor state has to be preserved. The sigframe
++               * restore can only modify user features, i.e. @mask
++               * cannot contain them.
+                */
+-              if (fx_only)
+-                      header->xfeatures = XFEATURE_MASK_FPSSE;
+-              else
+-                      header->xfeatures &= user_xfeatures |
+-                                           xfeatures_mask_supervisor();
++              header->xfeatures &= mask | xfeatures_mask_supervisor();
+       }
+       if (use_fxsr()) {
diff --git a/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch b/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch
new file mode 100644 (file)
index 0000000..ab19055
--- /dev/null
@@ -0,0 +1,63 @@
+From 3de218ff39b9e3f0d453fe3154f12a174de44b25 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 23 Jun 2021 15:09:13 +0200
+Subject: xen/events: reset active flag for lateeoi events later
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 3de218ff39b9e3f0d453fe3154f12a174de44b25 upstream.
+
+In order to avoid a race condition for user events when changing
+cpu affinity reset the active flag only when EOI-ing the event.
+
+This is working fine as all user events are lateeoi events. Note that
+lateeoi_ack_mask_dynirq() is not modified as there is no explicit call
+to xen_irq_lateeoi() expected later.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Fixes: b6622798bc50b62 ("xen/events: avoid handling the same event on two cpus at the same time")
+Tested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrvsky@oracle.com>
+Link: https://lore.kernel.org/r/20210623130913.9405-1-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struc
+       }
+       info->eoi_time = 0;
++
++      /* is_active hasn't been reset yet, do it now. */
++      smp_store_release(&info->is_active, 0);
+       do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
+ }
+@@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port
+               BUG();
+ }
++/* Not called for lateeoi events. */
+ static void event_handler_exit(struct irq_info *info)
+ {
+       smp_store_release(&info->is_active, 0);
+@@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct ir
+       if (VALID_EVTCHN(evtchn)) {
+               do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+-              event_handler_exit(info);
++              /*
++               * Don't call event_handler_exit().
++               * Need to keep is_active non-zero in order to ignore re-raised
++               * events after cpu affinity changes while a lateeoi is pending.
++               */
++              clear_evtchn(evtchn);
+       }
+ }