From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 27 Jun 2021 14:27:15 +0000 (+0200)
Subject: 5.12-stable patches
X-Git-Tag: v5.12.14~14
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b1cd1baa1984d1ee486b73f2927ad7f22ec49646;p=thirdparty%2Fkernel%2Fstable-queue.git

5.12-stable patches

added patches:
	ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
	i2c-robotfuzz-osif-fix-control-request-directions.patch
	kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
	kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
	kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
	s390-clear-pt_regs-flags-on-irq-entry.patch
	s390-fix-system-call-restart-with-multiple-signals.patch
	s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
	s390-topology-clear-thread-group-maps-for-offline-cpus.patch
	x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
	x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
	xen-events-reset-active-flag-for-lateeoi-events-later.patch
---

diff --git a/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch b/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
new file mode 100644
index 00000000000..efa5c7420f6
--- /dev/null
+++ b/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
@@ -0,0 +1,54 @@
+From 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Tue, 1 Jun 2021 09:40:25 -0400
+Subject: ceph: must hold snap_rwsem when filling inode for async create
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 upstream.
+
+...and add a lockdep assertion for it to ceph_fill_inode().
+
+Cc: stable@vger.kernel.org # v5.7+
+Fixes: 9a8d03ca2e2c3 ("ceph: attempt to do async create when possible")
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/file.c  |    3 +++
+ fs/ceph/inode.c |    2 ++
+ 2 files changed, 5 insertions(+)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -578,6 +578,7 @@ static int ceph_finish_async_create(stru
+ 	struct ceph_inode_info *ci = ceph_inode(dir);
+ 	struct inode *inode;
+ 	struct timespec64 now;
++	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+ 	struct ceph_vino vino = { .ino = req->r_deleg_ino,
+ 				  .snap = CEPH_NOSNAP };
+ 
+@@ -615,8 +616,10 @@ static int ceph_finish_async_create(stru
+ 
+ 	ceph_file_layout_to_legacy(lo, &in.layout);
+ 
++	down_read(&mdsc->snap_rwsem);
+ 	ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
+ 			      req->r_fmode, NULL);
++	up_read(&mdsc->snap_rwsem);
+ 	if (ret) {
+ 		dout("%s failed to fill inode: %d\n", __func__, ret);
+ 		ceph_dir_clear_complete(dir);
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode,
+ 	bool new_version = false;
+ 	bool fill_inline = false;
+ 
++	lockdep_assert_held(&mdsc->snap_rwsem);
++
+ 	dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
+ 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
+ 	     ci->i_version);
diff --git a/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch b/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch
new file mode 100644
index 00000000000..dfaa7b63ffc
--- /dev/null
+++ b/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch
@@ -0,0 +1,50 @@
+From 4ca070ef0dd885616ef294d269a9bf8e3b258e1a Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Mon, 24 May 2021 11:09:12 +0200
+Subject: i2c: robotfuzz-osif: fix control-request directions
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 4ca070ef0dd885616ef294d269a9bf8e3b258e1a upstream.
+
+The direction of the pipe argument must match the request-type direction
+bit or control requests may fail depending on the host-controller-driver
+implementation.
+
+Control transfers without a data stage are treated as OUT requests by
+the USB stack and should be using usb_sndctrlpipe(). Failing to do so
+will now trigger a warning.
+
+Fix the OSIFI2C_SET_BIT_RATE and OSIFI2C_STOP requests which erroneously
+used the osif_usb_read() helper and set the IN direction bit.
+
+Reported-by: syzbot+9d7dadd15b8819d73f41@syzkaller.appspotmail.com
+Fixes: 83e53a8f120f ("i2c: Add bus driver for for OSIF USB i2c device.")
+Cc: stable@vger.kernel.org      # 3.14
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-robotfuzz-osif.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c
++++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c
+@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter
+ 			}
+ 		}
+ 
+-		ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
++		ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
+ 		if (ret) {
+ 			dev_err(&adapter->dev, "failure sending STOP\n");
+ 			return -EREMOTEIO;
+@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interfa
+ 	 * Set bus frequency. The frequency is:
+ 	 * 120,000,000 / ( 16 + 2 * div * 4^prescale).
+ 	 * Using dev = 52, prescale = 0 give 100KHz */
+-	ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
++	ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
+ 			    NULL, 0);
+ 	if (ret) {
+ 		dev_err(&interface->dev, "failure sending bit rate");
diff --git a/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch b/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
new file mode 100644
index 00000000000..c167187471b
--- /dev/null
+++ b/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
@@ -0,0 +1,181 @@
+From 5fa54346caf67b4b1b10b1f390316ae466da4d53 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Thu, 24 Jun 2021 18:39:48 -0700
+Subject: kthread: prevent deadlock when kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync()
+
+From: Petr Mladek <pmladek@suse.com>
+
+commit 5fa54346caf67b4b1b10b1f390316ae466da4d53 upstream.
+
+The system might hang with the following backtrace:
+
+	schedule+0x80/0x100
+	schedule_timeout+0x48/0x138
+	wait_for_common+0xa4/0x134
+	wait_for_completion+0x1c/0x2c
+	kthread_flush_work+0x114/0x1cc
+	kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144
+	kthread_cancel_delayed_work_sync+0x18/0x2c
+	xxxx_pm_notify+0xb0/0xd8
+	blocking_notifier_call_chain_robust+0x80/0x194
+	pm_notifier_call_chain_robust+0x28/0x4c
+	suspend_prepare+0x40/0x260
+	enter_state+0x80/0x3f4
+	pm_suspend+0x60/0xdc
+	state_store+0x108/0x144
+	kobj_attr_store+0x38/0x88
+	sysfs_kf_write+0x64/0xc0
+	kernfs_fop_write_iter+0x108/0x1d0
+	vfs_write+0x2f4/0x368
+	ksys_write+0x7c/0xec
+
+It is caused by the following race between kthread_mod_delayed_work()
+and kthread_cancel_delayed_work_sync():
+
+CPU0				CPU1
+
+Context: Thread A		Context: Thread B
+
+kthread_mod_delayed_work()
+  spin_lock()
+  __kthread_cancel_work()
+     spin_unlock()
+     del_timer_sync()
+				kthread_cancel_delayed_work_sync()
+				  spin_lock()
+				  __kthread_cancel_work()
+				    spin_unlock()
+				    del_timer_sync()
+				    spin_lock()
+
+				  work->canceling++
+				  spin_unlock
+     spin_lock()
+   queue_delayed_work()
+     // dwork is put into the worker->delayed_work_list
+
+   spin_unlock()
+
+				  kthread_flush_work()
+     // flush_work is put at the tail of the dwork
+
+				    wait_for_completion()
+
+Context: IRQ
+
+  kthread_delayed_work_timer_fn()
+    spin_lock()
+    list_del_init(&work->node);
+    spin_unlock()
+
+BANG: flush_work is not longer linked and will never get proceed.
+
+The problem is that kthread_mod_delayed_work() checks work->canceling
+flag before canceling the timer.
+
+A simple solution is to (re)check work->canceling after
+__kthread_cancel_work().  But then it is not clear what should be
+returned when __kthread_cancel_work() removed the work from the queue
+(list) and it can't queue it again with the new @delay.
+
+The return value might be used for reference counting.  The caller has
+to know whether a new work has been queued or an existing one was
+replaced.
+
+The proper solution is that kthread_mod_delayed_work() will remove the
+work from the queue (list) _only_ when work->canceling is not set.  The
+flag must be checked after the timer is stopped and the remaining
+operations can be done under worker->lock.
+
+Note that kthread_mod_delayed_work() could remove the timer and then
+bail out.  It is fine.  The other canceling caller needs to cancel the
+timer as well.  The important thing is that the queue (list)
+manipulation is done atomically under worker->lock.
+
+Link: https://lkml.kernel.org/r/20210610133051.15337-3-pmladek@suse.com
+Fixes: 9a6b06c8d9a220860468a ("kthread: allow to modify delayed kthread work")
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Reported-by: Martin Liu <liumartin@google.com>
+Cc: <jenhaochen@google.com>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c |   35 ++++++++++++++++++++++++-----------
+ 1 file changed, 24 insertions(+), 11 deletions(-)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -1119,8 +1119,11 @@ static void kthread_cancel_delayed_work_
+ }
+ 
+ /*
+- * This function removes the work from the worker queue. Also it makes sure
+- * that it won't get queued later via the delayed work's timer.
++ * This function removes the work from the worker queue.
++ *
++ * It is called under worker->lock. The caller must make sure that
++ * the timer used by delayed work is not running, e.g. by calling
++ * kthread_cancel_delayed_work_timer().
+  *
+  * The work might still be in use when this function finishes. See the
+  * current_work proceed by the worker.
+@@ -1128,13 +1131,8 @@ static void kthread_cancel_delayed_work_
+  * Return: %true if @work was pending and successfully canceled,
+  *	%false if @work was not pending
+  */
+-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
+-				  unsigned long *flags)
++static bool __kthread_cancel_work(struct kthread_work *work)
+ {
+-	/* Try to cancel the timer if exists. */
+-	if (is_dwork)
+-		kthread_cancel_delayed_work_timer(work, flags);
+-
+ 	/*
+ 	 * Try to remove the work from a worker list. It might either
+ 	 * be from worker->work_list or from worker->delayed_work_list.
+@@ -1187,11 +1185,23 @@ bool kthread_mod_delayed_work(struct kth
+ 	/* Work must not be used with >1 worker, see kthread_queue_work() */
+ 	WARN_ON_ONCE(work->worker != worker);
+ 
+-	/* Do not fight with another command that is canceling this work. */
++	/*
++	 * Temporary cancel the work but do not fight with another command
++	 * that is canceling the work as well.
++	 *
++	 * It is a bit tricky because of possible races with another
++	 * mod_delayed_work() and cancel_delayed_work() callers.
++	 *
++	 * The timer must be canceled first because worker->lock is released
++	 * when doing so. But the work can be removed from the queue (list)
++	 * only when it can be queued again so that the return value can
++	 * be used for reference counting.
++	 */
++	kthread_cancel_delayed_work_timer(work, &flags);
+ 	if (work->canceling)
+ 		goto out;
++	ret = __kthread_cancel_work(work);
+ 
+-	ret = __kthread_cancel_work(work, true, &flags);
+ fast_queue:
+ 	__kthread_queue_delayed_work(worker, dwork, delay);
+ out:
+@@ -1213,7 +1223,10 @@ static bool __kthread_cancel_work_sync(s
+ 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
+ 	WARN_ON_ONCE(work->worker != worker);
+ 
+-	ret = __kthread_cancel_work(work, is_dwork, &flags);
++	if (is_dwork)
++		kthread_cancel_delayed_work_timer(work, &flags);
++
++	ret = __kthread_cancel_work(work);
+ 
+ 	if (worker->current_work != work)
+ 		goto out_fast;
diff --git a/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch b/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
new file mode 100644
index 00000000000..6c741c016a0
--- /dev/null
+++ b/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
@@ -0,0 +1,102 @@
+From 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Thu, 24 Jun 2021 18:39:45 -0700
+Subject: kthread_worker: split code for canceling the delayed work timer
+
+From: Petr Mladek <pmladek@suse.com>
+
+commit 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 upstream.
+
+Patch series "kthread_worker: Fix race between kthread_mod_delayed_work()
+and kthread_cancel_delayed_work_sync()".
+
+This patchset fixes the race between kthread_mod_delayed_work() and
+kthread_cancel_delayed_work_sync() including proper return value
+handling.
+
+This patch (of 2):
+
+Simple code refactoring as a preparation step for fixing a race between
+kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync().
+
+It does not modify the existing behavior.
+
+Link: https://lkml.kernel.org/r/20210610133051.15337-2-pmladek@suse.com
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Cc: <jenhaochen@google.com>
+Cc: Martin Liu <liumartin@google.com>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c |   46 +++++++++++++++++++++++++++++-----------------
+ 1 file changed, 29 insertions(+), 17 deletions(-)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -1092,6 +1092,33 @@ void kthread_flush_work(struct kthread_w
+ EXPORT_SYMBOL_GPL(kthread_flush_work);
+ 
+ /*
++ * Make sure that the timer is neither set nor running and could
++ * not manipulate the work list_head any longer.
++ *
++ * The function is called under worker->lock. The lock is temporary
++ * released but the timer can't be set again in the meantime.
++ */
++static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
++					      unsigned long *flags)
++{
++	struct kthread_delayed_work *dwork =
++		container_of(work, struct kthread_delayed_work, work);
++	struct kthread_worker *worker = work->worker;
++
++	/*
++	 * del_timer_sync() must be called to make sure that the timer
++	 * callback is not running. The lock must be temporary released
++	 * to avoid a deadlock with the callback. In the meantime,
++	 * any queuing is blocked by setting the canceling counter.
++	 */
++	work->canceling++;
++	raw_spin_unlock_irqrestore(&worker->lock, *flags);
++	del_timer_sync(&dwork->timer);
++	raw_spin_lock_irqsave(&worker->lock, *flags);
++	work->canceling--;
++}
++
++/*
+  * This function removes the work from the worker queue. Also it makes sure
+  * that it won't get queued later via the delayed work's timer.
+  *
+@@ -1105,23 +1132,8 @@ static bool __kthread_cancel_work(struct
+ 				  unsigned long *flags)
+ {
+ 	/* Try to cancel the timer if exists. */
+-	if (is_dwork) {
+-		struct kthread_delayed_work *dwork =
+-			container_of(work, struct kthread_delayed_work, work);
+-		struct kthread_worker *worker = work->worker;
+-
+-		/*
+-		 * del_timer_sync() must be called to make sure that the timer
+-		 * callback is not running. The lock must be temporary released
+-		 * to avoid a deadlock with the callback. In the meantime,
+-		 * any queuing is blocked by setting the canceling counter.
+-		 */
+-		work->canceling++;
+-		raw_spin_unlock_irqrestore(&worker->lock, *flags);
+-		del_timer_sync(&dwork->timer);
+-		raw_spin_lock_irqsave(&worker->lock, *flags);
+-		work->canceling--;
+-	}
++	if (is_dwork)
++		kthread_cancel_delayed_work_timer(work, flags);
+ 
+ 	/*
+ 	 * Try to remove the work from a worker list. It might either
diff --git a/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch b/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
new file mode 100644
index 00000000000..78ef42c3e53
--- /dev/null
+++ b/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
@@ -0,0 +1,70 @@
+From f8be156be163a052a067306417cd0ff679068c97 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 24 Jun 2021 08:29:04 -0400
+Subject: KVM: do not allow mapping valid but non-reference-counted pages
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit f8be156be163a052a067306417cd0ff679068c97 upstream.
+
+It's possible to create a region which maps valid but non-refcounted
+pages (e.g., tail pages of non-compound higher order allocations). These
+host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family
+of APIs, which take a reference to the page, which takes it from 0 to 1.
+When the reference is dropped, this will free the page incorrectly.
+
+Fix this by only taking a reference on valid pages if it was non-zero,
+which indicates it is participating in normal refcounting (and can be
+released with put_page).
+
+This addresses CVE-2021-22543.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Tested-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1919,6 +1919,13 @@ static bool vma_is_valid(struct vm_area_
+ 	return true;
+ }
+ 
++static int kvm_try_get_pfn(kvm_pfn_t pfn)
++{
++	if (kvm_is_reserved_pfn(pfn))
++		return 1;
++	return get_page_unless_zero(pfn_to_page(pfn));
++}
++
+ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+ 			       unsigned long addr, bool *async,
+ 			       bool write_fault, bool *writable,
+@@ -1968,13 +1975,21 @@ static int hva_to_pfn_remapped(struct vm
+ 	 * Whoever called remap_pfn_range is also going to call e.g.
+ 	 * unmap_mapping_range before the underlying pages are freed,
+ 	 * causing a call to our MMU notifier.
++	 *
++	 * Certain IO or PFNMAP mappings can be backed with valid
++	 * struct pages, but be allocated without refcounting e.g.,
++	 * tail pages of non-compound higher order allocations, which
++	 * would then underflow the refcount when the caller does the
++	 * required put_page. Don't allow those pages here.
+ 	 */ 
+-	kvm_get_pfn(pfn);
++	if (!kvm_try_get_pfn(pfn))
++		r = -EFAULT;
+ 
+ out:
+ 	pte_unmap_unlock(ptep, ptl);
+ 	*p_pfn = pfn;
+-	return 0;
++
++	return r;
+ }
+ 
+ /*
diff --git a/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch b/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch
new file mode 100644
index 00000000000..90ecf273d0e
--- /dev/null
+++ b/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch
@@ -0,0 +1,37 @@
+From ca1f4d702d534387aa1f16379edb3b03cdb6ceda Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Fri, 11 Jun 2021 16:08:18 +0200
+Subject: s390: clear pt_regs::flags on irq entry
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit ca1f4d702d534387aa1f16379edb3b03cdb6ceda upstream.
+
+The current irq entry code doesn't initialize pt_regs::flags. On exit to
+user mode arch_do_signal_or_restart() tests whether PIF_SYSCALL is set,
+which might yield wrong results.
+
+Fix this by clearing pt_regs::flags in the entry.S irq handler
+code.
+
+Reported-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Fixes: 56e62a737028 ("s390: convert to generic entry")
+Cc: <stable@vger.kernel.org> # 5.12
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/entry.S |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -418,6 +418,7 @@ ENTRY(\name)
+ 	xgr	%r6,%r6
+ 	xgr	%r7,%r7
+ 	xgr	%r10,%r10
++	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ 	stmg	%r8,%r9,__PT_PSW(%r11)
+ 	tm	%r8,0x0001		# coming from user space?
diff --git a/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch b/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch
new file mode 100644
index 00000000000..808128806e4
--- /dev/null
+++ b/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch
@@ -0,0 +1,44 @@
+From fc66127dc3396338f287c3b494dfbf102547e770 Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Fri, 11 Jun 2021 10:27:51 +0200
+Subject: s390: fix system call restart with multiple signals
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit fc66127dc3396338f287c3b494dfbf102547e770 upstream.
+
+glibc complained with "The futex facility returned an unexpected error
+code.". It turned out that the futex syscall returned -ERESTARTSYS because
+a signal is pending. arch_do_signal_or_restart() restored the syscall
+parameters (nameley regs->gprs[2]) and set PIF_SYSCALL_RESTART. When
+another signal is made pending later in the exit loop
+arch_do_signal_or_restart() is called again. This function clears
+PIF_SYSCALL_RESTART and checks the return code which is set in
+regs->gprs[2]. However, regs->gprs[2] was restored in the previous run
+and no longer contains -ERESTARTSYS, so PIF_SYSCALL_RESTART isn't set
+again and the syscall is skipped.
+
+Fix this by not clearing PIF_SYSCALL_RESTART - it is already cleared in
+__do_syscall() when the syscall is restarted.
+
+Reported-by: Bjoern Walk <bwalk@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Fixes: 56e62a737028 ("s390: convert to generic entry")
+Cc: <stable@vger.kernel.org> # 5.12
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/signal.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/s390/kernel/signal.c
++++ b/arch/s390/kernel/signal.c
+@@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt
+ 
+ 	/* No handlers present - check for system call restart */
+ 	clear_pt_regs_flag(regs, PIF_SYSCALL);
+-	clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
+ 	if (current->thread.system_call) {
+ 		regs->int_code = current->thread.system_call;
+ 		switch (regs->gprs[2]) {
diff --git a/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch b/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
new file mode 100644
index 00000000000..c86febbc3ea
--- /dev/null
+++ b/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
@@ -0,0 +1,67 @@
+From 67147e96a332b56c7206238162771d82467f86c0 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Fri, 18 Jun 2021 16:58:47 +0200
+Subject: s390/stack: fix possible register corruption with stack switch helper
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit 67147e96a332b56c7206238162771d82467f86c0 upstream.
+
+The CALL_ON_STACK macro is used to call a C function from inline
+assembly, and therefore must consider the C ABI, which says that only
+registers 6-13, and 15 are non-volatile (restored by the called
+function).
+
+The inline assembly incorrectly marks all registers used to pass
+parameters to the called function as read-only input operands, instead
+of operands that are read and written to. This might result in
+register corruption depending on usage, compiler, and compile options.
+
+Fix this by marking all operands used to pass parameters as read/write
+operands. To keep the code simple even register 6, if used, is marked
+as read-write operand.
+
+Fixes: ff340d2472ec ("s390: add stack switch helper")
+Cc: <stable@kernel.org> # 4.20
+Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/stacktrace.h |   18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/arch/s390/include/asm/stacktrace.h
++++ b/arch/s390/include/asm/stacktrace.h
+@@ -91,12 +91,16 @@ struct stack_frame {
+ 	CALL_ARGS_4(arg1, arg2, arg3, arg4);				\
+ 	register unsigned long r4 asm("6") = (unsigned long)(arg5)
+ 
+-#define CALL_FMT_0 "=&d" (r2) :
+-#define CALL_FMT_1 "+&d" (r2) :
+-#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+-#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+-#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+-#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
++/*
++ * To keep this simple mark register 2-6 as being changed (volatile)
++ * by the called function, even though register 6 is saved/nonvolatile.
++ */
++#define CALL_FMT_0 "=&d" (r2)
++#define CALL_FMT_1 "+&d" (r2)
++#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3)
++#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4)
++#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5)
++#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6)
+ 
+ #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+ #define CALL_CLOBBER_4 CALL_CLOBBER_5
+@@ -118,7 +122,7 @@ struct stack_frame {
+ 		"	brasl	14,%[_fn]\n"				\
+ 		"	la	15,0(%[_prev])\n"			\
+ 		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
+-		  [_stack] "R" (stack),					\
++		: [_stack] "R" (stack),					\
+ 		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+ 		  [_frame] "d" (frame),					\
+ 		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
diff --git a/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch b/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch
new file mode 100644
index 00000000000..ece1f385036
--- /dev/null
+++ b/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch
@@ -0,0 +1,78 @@
+From 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Tue, 15 Jun 2021 15:05:22 +0200
+Subject: s390/topology: clear thread/group maps for offline cpus
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 upstream.
+
+The current code doesn't clear the thread/group maps for offline
+CPUs. This may cause kernel crashes like the one bewlow in common
+code that assumes if a CPU has sibblings it is online.
+
+Unable to handle kernel pointer dereference in virtual kernel address space
+
+Call Trace:
+ [<000000013a4b8c3c>] blk_mq_map_swqueue+0x10c/0x388
+([<000000013a4b8bcc>] blk_mq_map_swqueue+0x9c/0x388)
+ [<000000013a4b9300>] blk_mq_init_allocated_queue+0x448/0x478
+ [<000000013a4b9416>] blk_mq_init_queue+0x4e/0x90
+ [<000003ff8019d3e6>] loop_add+0x106/0x278 [loop]
+ [<000003ff801b8148>] loop_init+0x148/0x1000 [loop]
+ [<0000000139de4924>] do_one_initcall+0x3c/0x1e0
+ [<0000000139ef449a>] do_init_module+0x6a/0x2a0
+ [<0000000139ef61bc>] __do_sys_finit_module+0xa4/0xc0
+ [<0000000139de9e6e>] do_syscall+0x7e/0xd0
+ [<000000013a8e0aec>] __do_syscall+0xbc/0x110
+ [<000000013a8ee2e8>] system_call+0x78/0xa0
+
+Fixes: 52aeda7accb6 ("s390/topology: remove offline CPUs from CPU topology masks")
+Cc: <stable@kernel.org> # 5.7+
+Reported-by: Marius Hillenbrand <mhillen@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/topology.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kernel/topology.c
++++ b/arch/s390/kernel/topology.c
+@@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst
+ {
+ 	static cpumask_t mask;
+ 
+-	cpumask_copy(&mask, cpumask_of(cpu));
++	cpumask_clear(&mask);
++	if (!cpu_online(cpu))
++		goto out;
++	cpumask_set_cpu(cpu, &mask);
+ 	switch (topology_mode) {
+ 	case TOPOLOGY_MODE_HW:
+ 		while (info) {
+@@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst
+ 	default:
+ 		fallthrough;
+ 	case TOPOLOGY_MODE_SINGLE:
+-		cpumask_copy(&mask, cpumask_of(cpu));
+ 		break;
+ 	}
+ 	cpumask_and(&mask, &mask, cpu_online_mask);
++out:
+ 	cpumask_copy(dst, &mask);
+ }
+ 
+@@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *ds
+ 	static cpumask_t mask;
+ 	int i;
+ 
+-	cpumask_copy(&mask, cpumask_of(cpu));
++	cpumask_clear(&mask);
++	if (!cpu_online(cpu))
++		goto out;
++	cpumask_set_cpu(cpu, &mask);
+ 	if (topology_mode != TOPOLOGY_MODE_HW)
+ 		goto out;
+ 	cpu -= cpu % (smp_cpu_mtid + 1);
diff --git a/queue-5.12/series b/queue-5.12/series
index 7b9a96441cb..4d7f5daa9a6 100644
--- a/queue-5.12/series
+++ b/queue-5.12/series
@@ -66,3 +66,15 @@ gpiolib-cdev-zero-padding-during-conversion-to-gpiol.patch
 scsi-sd-call-sd_revalidate_disk-for-ioctl-blkrrpart.patch
 software-node-handle-software-node-injection-to-an-e.patch
 nilfs2-fix-memory-leak-in-nilfs_sysfs_delete_device_.patch
+s390-topology-clear-thread-group-maps-for-offline-cpus.patch
+s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
+s390-fix-system-call-restart-with-multiple-signals.patch
+s390-clear-pt_regs-flags-on-irq-entry.patch
+kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
+i2c-robotfuzz-osif-fix-control-request-directions.patch
+ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
+xen-events-reset-active-flag-for-lateeoi-events-later.patch
+kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
+kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
+x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
+x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
diff --git a/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch b/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
new file mode 100644
index 00000000000..83cc50c873a
--- /dev/null
+++ b/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
@@ -0,0 +1,165 @@
+From f9dfb5e390fab2df9f7944bb91e7705aba14cd26 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Jun 2021 16:18:25 +0200
+Subject: x86/fpu: Make init_fpstate correct with optimized XSAVE
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f9dfb5e390fab2df9f7944bb91e7705aba14cd26 upstream.
+
+The XSAVE init code initializes all enabled and supported components with
+XRSTOR(S) to init state. Then it XSAVEs the state of the components back
+into init_fpstate which is used in several places to fill in the init state
+of components.
+
+This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because
+those use the init optimization and skip writing state of components which
+are in init state. So init_fpstate.xsave still contains all zeroes after
+this operation.
+
+There are two ways to solve that:
+
+   1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when
+      XSAVES is enabled because XSAVES uses compacted format.
+
+   2) Save the components which are known to have a non-zero init state by other
+      means.
+
+Looking deeper, #2 is the right thing to do because all components the
+kernel supports have all-zeroes init state except the legacy features (FP,
+SSE). Those cannot be hard coded because the states are not identical on all
+CPUs, but they can be saved with FXSAVE which avoids all conditionals.
+
+Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with
+a BUILD_BUG_ON() which reminds developers to validate that a newly added
+component has all zeroes init state. As a bonus remove the now unused
+copy_xregs_to_kernel_booting() crutch.
+
+The XSAVE and reshuffle method can still be implemented in the unlikely
+case that components are added which have a non-zero init state and no
+other means to save them. For now, FXSAVE is just simple and good enough.
+
+  [ bp: Fix a typo or two in the text. ]
+
+Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/fpu/internal.h |   30 +++++++-------------------
+ arch/x86/kernel/fpu/xstate.c        |   41 +++++++++++++++++++++++++++++++++---
+ 2 files changed, 46 insertions(+), 25 deletions(-)
+
+--- a/arch/x86/include/asm/fpu/internal.h
++++ b/arch/x86/include/asm/fpu/internal.h
+@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel
+ 		asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
+ }
+ 
++static inline void fxsave(struct fxregs_state *fx)
++{
++	if (IS_ENABLED(CONFIG_X86_32))
++		asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
++	else
++		asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
++}
++
+ /* These macros all use (%edi)/(%rdi) as the single memory argument. */
+ #define XSAVE		".byte " REX_PREFIX "0x0f,0xae,0x27"
+ #define XSAVEOPT	".byte " REX_PREFIX "0x0f,0xae,0x37"
+@@ -270,28 +278,6 @@ static inline void copy_fxregs_to_kernel
+ 
+ /*
+  * This function is called only during boot time when x86 caps are not set
+- * up and alternative can not be used yet.
+- */
+-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
+-{
+-	u64 mask = xfeatures_mask_all;
+-	u32 lmask = mask;
+-	u32 hmask = mask >> 32;
+-	int err;
+-
+-	WARN_ON(system_state != SYSTEM_BOOTING);
+-
+-	if (boot_cpu_has(X86_FEATURE_XSAVES))
+-		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+-	else
+-		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
+-
+-	/* We should never fault when copying to a kernel buffer: */
+-	WARN_ON_FPU(err);
+-}
+-
+-/*
+- * This function is called only during boot time when x86 caps are not set
+  * up and alternative can not be used yet.
+  */
+ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -441,12 +441,35 @@ static void __init print_xstate_offset_s
+ }
+ 
+ /*
++ * All supported features have either init state all zeros or are
++ * handled in setup_init_fpu() individually. This is an explicit
++ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
++ * newly added supported features at build time and make people
++ * actually look at the init state for the new feature.
++ */
++#define XFEATURES_INIT_FPSTATE_HANDLED		\
++	(XFEATURE_MASK_FP |			\
++	 XFEATURE_MASK_SSE |			\
++	 XFEATURE_MASK_YMM |			\
++	 XFEATURE_MASK_OPMASK |			\
++	 XFEATURE_MASK_ZMM_Hi256 |		\
++	 XFEATURE_MASK_Hi16_ZMM	 |		\
++	 XFEATURE_MASK_PKRU |			\
++	 XFEATURE_MASK_BNDREGS |		\
++	 XFEATURE_MASK_BNDCSR |			\
++	 XFEATURE_MASK_PASID)
++
++/*
+  * setup the xstate image representing the init state
+  */
+ static void __init setup_init_fpu_buf(void)
+ {
+ 	static int on_boot_cpu __initdata = 1;
+ 
++	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
++		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
++		     XFEATURES_INIT_FPSTATE_HANDLED);
++
+ 	WARN_ON_FPU(!on_boot_cpu);
+ 	on_boot_cpu = 0;
+ 
+@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(vo
+ 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
+ 
+ 	/*
+-	 * Dump the init state again. This is to identify the init state
+-	 * of any feature which is not represented by all zero's.
++	 * All components are now in init state. Read the state back so
++	 * that init_fpstate contains all non-zero init state. This only
++	 * works with XSAVE, but not with XSAVEOPT and XSAVES because
++	 * those use the init optimization which skips writing data for
++	 * components in init state.
++	 *
++	 * XSAVE could be used, but that would require to reshuffle the
++	 * data when XSAVES is available because XSAVES uses xstate
++	 * compaction. But doing so is a pointless exercise because most
++	 * components have an all zeros init state except for the legacy
++	 * ones (FP and SSE). Those can be saved with FXSAVE into the
++	 * legacy area. Adding new features requires to ensure that init
++	 * state is all zeroes or if not to add the necessary handling
++	 * here.
+ 	 */
+-	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
++	fxsave(&init_fpstate.fxsave);
+ }
+ 
+ static int xfeature_uncompacted_offset(int xfeature_nr)
diff --git a/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch b/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
new file mode 100644
index 00000000000..79ddb0dfab5
--- /dev/null
+++ b/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
@@ -0,0 +1,66 @@
+From 9301982c424a003c0095bf157154a85bf5322bd0 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Jun 2021 16:18:24 +0200
+Subject: x86/fpu: Preserve supervisor states in sanitize_restored_user_xstate()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 9301982c424a003c0095bf157154a85bf5322bd0 upstream.
+
+sanitize_restored_user_xstate() preserves the supervisor states only
+when the fx_only argument is zero, which allows unprivileged user space
+to put supervisor states back into init state.
+
+Preserve them unconditionally.
+
+ [ bp: Fix a typo or two in the text. ]
+
+Fixes: 5d6b6a6f9b5c ("x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210618143444.438635017@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c |   26 ++++++++------------------
+ 1 file changed, 8 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpre
+ 
+ 	if (use_xsave()) {
+ 		/*
+-		 * Note: we don't need to zero the reserved bits in the
+-		 * xstate_header here because we either didn't copy them at all,
+-		 * or we checked earlier that they aren't set.
++		 * Clear all feature bits which are not set in
++		 * user_xfeatures and clear all extended features
++		 * for fx_only mode.
+ 		 */
++		u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures;
+ 
+ 		/*
+-		 * 'user_xfeatures' might have bits clear which are
+-		 * set in header->xfeatures. This represents features that
+-		 * were in init state prior to a signal delivery, and need
+-		 * to be reset back to the init state.  Clear any user
+-		 * feature bits which are set in the kernel buffer to get
+-		 * them back to the init state.
+-		 *
+-		 * Supervisor state is unchanged by input from userspace.
+-		 * Ensure supervisor state bits stay set and supervisor
+-		 * state is not modified.
++		 * Supervisor state has to be preserved. The sigframe
++		 * restore can only modify user features, i.e. @mask
++		 * cannot contain them.
+ 		 */
+-		if (fx_only)
+-			header->xfeatures = XFEATURE_MASK_FPSSE;
+-		else
+-			header->xfeatures &= user_xfeatures |
+-					     xfeatures_mask_supervisor();
++		header->xfeatures &= mask | xfeatures_mask_supervisor();
+ 	}
+ 
+ 	if (use_fxsr()) {
diff --git a/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch b/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch
new file mode 100644
index 00000000000..ab19055d14c
--- /dev/null
+++ b/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch
@@ -0,0 +1,63 @@
+From 3de218ff39b9e3f0d453fe3154f12a174de44b25 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 23 Jun 2021 15:09:13 +0200
+Subject: xen/events: reset active flag for lateeoi events later
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 3de218ff39b9e3f0d453fe3154f12a174de44b25 upstream.
+
+In order to avoid a race condition for user events when changing
+cpu affinity reset the active flag only when EOI-ing the event.
+
+This is working fine as all user events are lateeoi events. Note that
+lateeoi_ack_mask_dynirq() is not modified as there is no explicit call
+to xen_irq_lateeoi() expected later.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Fixes: b6622798bc50b62 ("xen/events: avoid handling the same event on two cpus at the same time")
+Tested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrvsky@oracle.com>
+Link: https://lore.kernel.org/r/20210623130913.9405-1-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struc
+ 	}
+ 
+ 	info->eoi_time = 0;
++
++	/* is_active hasn't been reset yet, do it now. */
++	smp_store_release(&info->is_active, 0);
+ 	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
+ }
+ 
+@@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port
+ 		BUG();
+ }
+ 
++/* Not called for lateeoi events. */
+ static void event_handler_exit(struct irq_info *info)
+ {
+ 	smp_store_release(&info->is_active, 0);
+@@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct ir
+ 
+ 	if (VALID_EVTCHN(evtchn)) {
+ 		do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+-		event_handler_exit(info);
++		/*
++		 * Don't call event_handler_exit().
++		 * Need to keep is_active non-zero in order to ignore re-raised
++		 * events after cpu affinity changes while a lateeoi is pending.
++		 */
++		clear_evtchn(evtchn);
+ 	}
+ }
+