--- /dev/null
+From 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@kernel.org>
+Date: Tue, 1 Jun 2021 09:40:25 -0400
+Subject: ceph: must hold snap_rwsem when filling inode for async create
+
+From: Jeff Layton <jlayton@kernel.org>
+
+commit 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 upstream.
+
+...and add a lockdep assertion for it to ceph_fill_inode().
+
+Cc: stable@vger.kernel.org # v5.7+
+Fixes: 9a8d03ca2e2c3 ("ceph: attempt to do async create when possible")
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/file.c | 3 +++
+ fs/ceph/inode.c | 2 ++
+ 2 files changed, 5 insertions(+)
+
+--- a/fs/ceph/file.c
++++ b/fs/ceph/file.c
+@@ -578,6 +578,7 @@ static int ceph_finish_async_create(stru
+ struct ceph_inode_info *ci = ceph_inode(dir);
+ struct inode *inode;
+ struct timespec64 now;
++ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
+ struct ceph_vino vino = { .ino = req->r_deleg_ino,
+ .snap = CEPH_NOSNAP };
+
+@@ -615,8 +616,10 @@ static int ceph_finish_async_create(stru
+
+ ceph_file_layout_to_legacy(lo, &in.layout);
+
++ down_read(&mdsc->snap_rwsem);
+ ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
+ req->r_fmode, NULL);
++ up_read(&mdsc->snap_rwsem);
+ if (ret) {
+ dout("%s failed to fill inode: %d\n", __func__, ret);
+ ceph_dir_clear_complete(dir);
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode,
+ bool new_version = false;
+ bool fill_inline = false;
+
++ lockdep_assert_held(&mdsc->snap_rwsem);
++
+ dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
+ inode, ceph_vinop(inode), le64_to_cpu(info->version),
+ ci->i_version);
--- /dev/null
+From 4ca070ef0dd885616ef294d269a9bf8e3b258e1a Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Mon, 24 May 2021 11:09:12 +0200
+Subject: i2c: robotfuzz-osif: fix control-request directions
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 4ca070ef0dd885616ef294d269a9bf8e3b258e1a upstream.
+
+The direction of the pipe argument must match the request-type direction
+bit or control requests may fail depending on the host-controller-driver
+implementation.
+
+Control transfers without a data stage are treated as OUT requests by
+the USB stack and should be using usb_sndctrlpipe(). Failing to do so
+will now trigger a warning.
+
+Fix the OSIFI2C_SET_BIT_RATE and OSIFI2C_STOP requests which erroneously
+used the osif_usb_read() helper and set the IN direction bit.
+
+Reported-by: syzbot+9d7dadd15b8819d73f41@syzkaller.appspotmail.com
+Fixes: 83e53a8f120f ("i2c: Add bus driver for for OSIF USB i2c device.")
+Cc: stable@vger.kernel.org # 3.14
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/i2c/busses/i2c-robotfuzz-osif.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c
++++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c
+@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter
+ }
+ }
+
+- ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
++ ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
+ if (ret) {
+ dev_err(&adapter->dev, "failure sending STOP\n");
+ return -EREMOTEIO;
+@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interfa
+ * Set bus frequency. The frequency is:
+ * 120,000,000 / ( 16 + 2 * div * 4^prescale).
+ * Using dev = 52, prescale = 0 give 100KHz */
+- ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
++ ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
+ NULL, 0);
+ if (ret) {
+ dev_err(&interface->dev, "failure sending bit rate");
--- /dev/null
+From 5fa54346caf67b4b1b10b1f390316ae466da4d53 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Thu, 24 Jun 2021 18:39:48 -0700
+Subject: kthread: prevent deadlock when kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync()
+
+From: Petr Mladek <pmladek@suse.com>
+
+commit 5fa54346caf67b4b1b10b1f390316ae466da4d53 upstream.
+
+The system might hang with the following backtrace:
+
+ schedule+0x80/0x100
+ schedule_timeout+0x48/0x138
+ wait_for_common+0xa4/0x134
+ wait_for_completion+0x1c/0x2c
+ kthread_flush_work+0x114/0x1cc
+ kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144
+ kthread_cancel_delayed_work_sync+0x18/0x2c
+ xxxx_pm_notify+0xb0/0xd8
+ blocking_notifier_call_chain_robust+0x80/0x194
+ pm_notifier_call_chain_robust+0x28/0x4c
+ suspend_prepare+0x40/0x260
+ enter_state+0x80/0x3f4
+ pm_suspend+0x60/0xdc
+ state_store+0x108/0x144
+ kobj_attr_store+0x38/0x88
+ sysfs_kf_write+0x64/0xc0
+ kernfs_fop_write_iter+0x108/0x1d0
+ vfs_write+0x2f4/0x368
+ ksys_write+0x7c/0xec
+
+It is caused by the following race between kthread_mod_delayed_work()
+and kthread_cancel_delayed_work_sync():
+
+CPU0 CPU1
+
+Context: Thread A Context: Thread B
+
+kthread_mod_delayed_work()
+ spin_lock()
+ __kthread_cancel_work()
+ spin_unlock()
+ del_timer_sync()
+ kthread_cancel_delayed_work_sync()
+ spin_lock()
+ __kthread_cancel_work()
+ spin_unlock()
+ del_timer_sync()
+ spin_lock()
+
+ work->canceling++
+ spin_unlock
+ spin_lock()
+ queue_delayed_work()
+ // dwork is put into the worker->delayed_work_list
+
+ spin_unlock()
+
+ kthread_flush_work()
+ // flush_work is put at the tail of the dwork
+
+ wait_for_completion()
+
+Context: IRQ
+
+ kthread_delayed_work_timer_fn()
+ spin_lock()
+ list_del_init(&work->node);
+ spin_unlock()
+
+BANG: flush_work is not longer linked and will never get proceed.
+
+The problem is that kthread_mod_delayed_work() checks work->canceling
+flag before canceling the timer.
+
+A simple solution is to (re)check work->canceling after
+__kthread_cancel_work(). But then it is not clear what should be
+returned when __kthread_cancel_work() removed the work from the queue
+(list) and it can't queue it again with the new @delay.
+
+The return value might be used for reference counting. The caller has
+to know whether a new work has been queued or an existing one was
+replaced.
+
+The proper solution is that kthread_mod_delayed_work() will remove the
+work from the queue (list) _only_ when work->canceling is not set. The
+flag must be checked after the timer is stopped and the remaining
+operations can be done under worker->lock.
+
+Note that kthread_mod_delayed_work() could remove the timer and then
+bail out. It is fine. The other canceling caller needs to cancel the
+timer as well. The important thing is that the queue (list)
+manipulation is done atomically under worker->lock.
+
+Link: https://lkml.kernel.org/r/20210610133051.15337-3-pmladek@suse.com
+Fixes: 9a6b06c8d9a220860468a ("kthread: allow to modify delayed kthread work")
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Reported-by: Martin Liu <liumartin@google.com>
+Cc: <jenhaochen@google.com>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c | 35 ++++++++++++++++++++++++-----------
+ 1 file changed, 24 insertions(+), 11 deletions(-)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -1119,8 +1119,11 @@ static void kthread_cancel_delayed_work_
+ }
+
+ /*
+- * This function removes the work from the worker queue. Also it makes sure
+- * that it won't get queued later via the delayed work's timer.
++ * This function removes the work from the worker queue.
++ *
++ * It is called under worker->lock. The caller must make sure that
++ * the timer used by delayed work is not running, e.g. by calling
++ * kthread_cancel_delayed_work_timer().
+ *
+ * The work might still be in use when this function finishes. See the
+ * current_work proceed by the worker.
+@@ -1128,13 +1131,8 @@ static void kthread_cancel_delayed_work_
+ * Return: %true if @work was pending and successfully canceled,
+ * %false if @work was not pending
+ */
+-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
+- unsigned long *flags)
++static bool __kthread_cancel_work(struct kthread_work *work)
+ {
+- /* Try to cancel the timer if exists. */
+- if (is_dwork)
+- kthread_cancel_delayed_work_timer(work, flags);
+-
+ /*
+ * Try to remove the work from a worker list. It might either
+ * be from worker->work_list or from worker->delayed_work_list.
+@@ -1187,11 +1185,23 @@ bool kthread_mod_delayed_work(struct kth
+ /* Work must not be used with >1 worker, see kthread_queue_work() */
+ WARN_ON_ONCE(work->worker != worker);
+
+- /* Do not fight with another command that is canceling this work. */
++ /*
++ * Temporary cancel the work but do not fight with another command
++ * that is canceling the work as well.
++ *
++ * It is a bit tricky because of possible races with another
++ * mod_delayed_work() and cancel_delayed_work() callers.
++ *
++ * The timer must be canceled first because worker->lock is released
++ * when doing so. But the work can be removed from the queue (list)
++ * only when it can be queued again so that the return value can
++ * be used for reference counting.
++ */
++ kthread_cancel_delayed_work_timer(work, &flags);
+ if (work->canceling)
+ goto out;
++ ret = __kthread_cancel_work(work);
+
+- ret = __kthread_cancel_work(work, true, &flags);
+ fast_queue:
+ __kthread_queue_delayed_work(worker, dwork, delay);
+ out:
+@@ -1213,7 +1223,10 @@ static bool __kthread_cancel_work_sync(s
+ /* Work must not be used with >1 worker, see kthread_queue_work(). */
+ WARN_ON_ONCE(work->worker != worker);
+
+- ret = __kthread_cancel_work(work, is_dwork, &flags);
++ if (is_dwork)
++ kthread_cancel_delayed_work_timer(work, &flags);
++
++ ret = __kthread_cancel_work(work);
+
+ if (worker->current_work != work)
+ goto out_fast;
--- /dev/null
+From 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 Mon Sep 17 00:00:00 2001
+From: Petr Mladek <pmladek@suse.com>
+Date: Thu, 24 Jun 2021 18:39:45 -0700
+Subject: kthread_worker: split code for canceling the delayed work timer
+
+From: Petr Mladek <pmladek@suse.com>
+
+commit 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 upstream.
+
+Patch series "kthread_worker: Fix race between kthread_mod_delayed_work()
+and kthread_cancel_delayed_work_sync()".
+
+This patchset fixes the race between kthread_mod_delayed_work() and
+kthread_cancel_delayed_work_sync() including proper return value
+handling.
+
+This patch (of 2):
+
+Simple code refactoring as a preparation step for fixing a race between
+kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync().
+
+It does not modify the existing behavior.
+
+Link: https://lkml.kernel.org/r/20210610133051.15337-2-pmladek@suse.com
+Signed-off-by: Petr Mladek <pmladek@suse.com>
+Cc: <jenhaochen@google.com>
+Cc: Martin Liu <liumartin@google.com>
+Cc: Minchan Kim <minchan@google.com>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kthread.c | 46 +++++++++++++++++++++++++++++-----------------
+ 1 file changed, 29 insertions(+), 17 deletions(-)
+
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -1092,6 +1092,33 @@ void kthread_flush_work(struct kthread_w
+ EXPORT_SYMBOL_GPL(kthread_flush_work);
+
+ /*
++ * Make sure that the timer is neither set nor running and could
++ * not manipulate the work list_head any longer.
++ *
++ * The function is called under worker->lock. The lock is temporary
++ * released but the timer can't be set again in the meantime.
++ */
++static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
++ unsigned long *flags)
++{
++ struct kthread_delayed_work *dwork =
++ container_of(work, struct kthread_delayed_work, work);
++ struct kthread_worker *worker = work->worker;
++
++ /*
++ * del_timer_sync() must be called to make sure that the timer
++ * callback is not running. The lock must be temporary released
++ * to avoid a deadlock with the callback. In the meantime,
++ * any queuing is blocked by setting the canceling counter.
++ */
++ work->canceling++;
++ raw_spin_unlock_irqrestore(&worker->lock, *flags);
++ del_timer_sync(&dwork->timer);
++ raw_spin_lock_irqsave(&worker->lock, *flags);
++ work->canceling--;
++}
++
++/*
+ * This function removes the work from the worker queue. Also it makes sure
+ * that it won't get queued later via the delayed work's timer.
+ *
+@@ -1105,23 +1132,8 @@ static bool __kthread_cancel_work(struct
+ unsigned long *flags)
+ {
+ /* Try to cancel the timer if exists. */
+- if (is_dwork) {
+- struct kthread_delayed_work *dwork =
+- container_of(work, struct kthread_delayed_work, work);
+- struct kthread_worker *worker = work->worker;
+-
+- /*
+- * del_timer_sync() must be called to make sure that the timer
+- * callback is not running. The lock must be temporary released
+- * to avoid a deadlock with the callback. In the meantime,
+- * any queuing is blocked by setting the canceling counter.
+- */
+- work->canceling++;
+- raw_spin_unlock_irqrestore(&worker->lock, *flags);
+- del_timer_sync(&dwork->timer);
+- raw_spin_lock_irqsave(&worker->lock, *flags);
+- work->canceling--;
+- }
++ if (is_dwork)
++ kthread_cancel_delayed_work_timer(work, flags);
+
+ /*
+ * Try to remove the work from a worker list. It might either
--- /dev/null
+From f8be156be163a052a067306417cd0ff679068c97 Mon Sep 17 00:00:00 2001
+From: Nicholas Piggin <npiggin@gmail.com>
+Date: Thu, 24 Jun 2021 08:29:04 -0400
+Subject: KVM: do not allow mapping valid but non-reference-counted pages
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+commit f8be156be163a052a067306417cd0ff679068c97 upstream.
+
+It's possible to create a region which maps valid but non-refcounted
+pages (e.g., tail pages of non-compound higher order allocations). These
+host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family
+of APIs, which take a reference to the page, which takes it from 0 to 1.
+When the reference is dropped, this will free the page incorrectly.
+
+Fix this by only taking a reference on valid pages if it was non-zero,
+which indicates it is participating in normal refcounting (and can be
+released with put_page).
+
+This addresses CVE-2021-22543.
+
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Tested-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1919,6 +1919,13 @@ static bool vma_is_valid(struct vm_area_
+ return true;
+ }
+
++static int kvm_try_get_pfn(kvm_pfn_t pfn)
++{
++ if (kvm_is_reserved_pfn(pfn))
++ return 1;
++ return get_page_unless_zero(pfn_to_page(pfn));
++}
++
+ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+ unsigned long addr, bool *async,
+ bool write_fault, bool *writable,
+@@ -1968,13 +1975,21 @@ static int hva_to_pfn_remapped(struct vm
+ * Whoever called remap_pfn_range is also going to call e.g.
+ * unmap_mapping_range before the underlying pages are freed,
+ * causing a call to our MMU notifier.
++ *
++ * Certain IO or PFNMAP mappings can be backed with valid
++ * struct pages, but be allocated without refcounting e.g.,
++ * tail pages of non-compound higher order allocations, which
++ * would then underflow the refcount when the caller does the
++ * required put_page. Don't allow those pages here.
+ */
+- kvm_get_pfn(pfn);
++ if (!kvm_try_get_pfn(pfn))
++ r = -EFAULT;
+
+ out:
+ pte_unmap_unlock(ptep, ptl);
+ *p_pfn = pfn;
+- return 0;
++
++ return r;
+ }
+
+ /*
--- /dev/null
+From ca1f4d702d534387aa1f16379edb3b03cdb6ceda Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Fri, 11 Jun 2021 16:08:18 +0200
+Subject: s390: clear pt_regs::flags on irq entry
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit ca1f4d702d534387aa1f16379edb3b03cdb6ceda upstream.
+
+The current irq entry code doesn't initialize pt_regs::flags. On exit to
+user mode arch_do_signal_or_restart() tests whether PIF_SYSCALL is set,
+which might yield wrong results.
+
+Fix this by clearing pt_regs::flags in the entry.S irq handler
+code.
+
+Reported-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Fixes: 56e62a737028 ("s390: convert to generic entry")
+Cc: <stable@vger.kernel.org> # 5.12
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/entry.S | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -418,6 +418,7 @@ ENTRY(\name)
+ xgr %r6,%r6
+ xgr %r7,%r7
+ xgr %r10,%r10
++ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ tm %r8,0x0001 # coming from user space?
--- /dev/null
+From fc66127dc3396338f287c3b494dfbf102547e770 Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Fri, 11 Jun 2021 10:27:51 +0200
+Subject: s390: fix system call restart with multiple signals
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit fc66127dc3396338f287c3b494dfbf102547e770 upstream.
+
+glibc complained with "The futex facility returned an unexpected error
+code.". It turned out that the futex syscall returned -ERESTARTSYS because
+a signal is pending. arch_do_signal_or_restart() restored the syscall
+parameters (nameley regs->gprs[2]) and set PIF_SYSCALL_RESTART. When
+another signal is made pending later in the exit loop
+arch_do_signal_or_restart() is called again. This function clears
+PIF_SYSCALL_RESTART and checks the return code which is set in
+regs->gprs[2]. However, regs->gprs[2] was restored in the previous run
+and no longer contains -ERESTARTSYS, so PIF_SYSCALL_RESTART isn't set
+again and the syscall is skipped.
+
+Fix this by not clearing PIF_SYSCALL_RESTART - it is already cleared in
+__do_syscall() when the syscall is restarted.
+
+Reported-by: Bjoern Walk <bwalk@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Fixes: 56e62a737028 ("s390: convert to generic entry")
+Cc: <stable@vger.kernel.org> # 5.12
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/signal.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/s390/kernel/signal.c
++++ b/arch/s390/kernel/signal.c
+@@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt
+
+ /* No handlers present - check for system call restart */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+- clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
+ if (current->thread.system_call) {
+ regs->int_code = current->thread.system_call;
+ switch (regs->gprs[2]) {
--- /dev/null
+From 67147e96a332b56c7206238162771d82467f86c0 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Fri, 18 Jun 2021 16:58:47 +0200
+Subject: s390/stack: fix possible register corruption with stack switch helper
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit 67147e96a332b56c7206238162771d82467f86c0 upstream.
+
+The CALL_ON_STACK macro is used to call a C function from inline
+assembly, and therefore must consider the C ABI, which says that only
+registers 6-13, and 15 are non-volatile (restored by the called
+function).
+
+The inline assembly incorrectly marks all registers used to pass
+parameters to the called function as read-only input operands, instead
+of operands that are read and written to. This might result in
+register corruption depending on usage, compiler, and compile options.
+
+Fix this by marking all operands used to pass parameters as read/write
+operands. To keep the code simple even register 6, if used, is marked
+as read-write operand.
+
+Fixes: ff340d2472ec ("s390: add stack switch helper")
+Cc: <stable@kernel.org> # 4.20
+Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/stacktrace.h | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/arch/s390/include/asm/stacktrace.h
++++ b/arch/s390/include/asm/stacktrace.h
+@@ -91,12 +91,16 @@ struct stack_frame {
+ CALL_ARGS_4(arg1, arg2, arg3, arg4); \
+ register unsigned long r4 asm("6") = (unsigned long)(arg5)
+
+-#define CALL_FMT_0 "=&d" (r2) :
+-#define CALL_FMT_1 "+&d" (r2) :
+-#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+-#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+-#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+-#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
++/*
++ * To keep this simple mark register 2-6 as being changed (volatile)
++ * by the called function, even though register 6 is saved/nonvolatile.
++ */
++#define CALL_FMT_0 "=&d" (r2)
++#define CALL_FMT_1 "+&d" (r2)
++#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3)
++#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4)
++#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5)
++#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6)
+
+ #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+ #define CALL_CLOBBER_4 CALL_CLOBBER_5
+@@ -118,7 +122,7 @@ struct stack_frame {
+ " brasl 14,%[_fn]\n" \
+ " la 15,0(%[_prev])\n" \
+ : [_prev] "=&a" (prev), CALL_FMT_##nr \
+- [_stack] "R" (stack), \
++ : [_stack] "R" (stack), \
+ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
+ [_frame] "d" (frame), \
+ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \
--- /dev/null
+From 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 Mon Sep 17 00:00:00 2001
+From: Sven Schnelle <svens@linux.ibm.com>
+Date: Tue, 15 Jun 2021 15:05:22 +0200
+Subject: s390/topology: clear thread/group maps for offline cpus
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+commit 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 upstream.
+
+The current code doesn't clear the thread/group maps for offline
+CPUs. This may cause kernel crashes like the one bewlow in common
+code that assumes if a CPU has sibblings it is online.
+
+Unable to handle kernel pointer dereference in virtual kernel address space
+
+Call Trace:
+ [<000000013a4b8c3c>] blk_mq_map_swqueue+0x10c/0x388
+([<000000013a4b8bcc>] blk_mq_map_swqueue+0x9c/0x388)
+ [<000000013a4b9300>] blk_mq_init_allocated_queue+0x448/0x478
+ [<000000013a4b9416>] blk_mq_init_queue+0x4e/0x90
+ [<000003ff8019d3e6>] loop_add+0x106/0x278 [loop]
+ [<000003ff801b8148>] loop_init+0x148/0x1000 [loop]
+ [<0000000139de4924>] do_one_initcall+0x3c/0x1e0
+ [<0000000139ef449a>] do_init_module+0x6a/0x2a0
+ [<0000000139ef61bc>] __do_sys_finit_module+0xa4/0xc0
+ [<0000000139de9e6e>] do_syscall+0x7e/0xd0
+ [<000000013a8e0aec>] __do_syscall+0xbc/0x110
+ [<000000013a8ee2e8>] system_call+0x78/0xa0
+
+Fixes: 52aeda7accb6 ("s390/topology: remove offline CPUs from CPU topology masks")
+Cc: <stable@kernel.org> # 5.7+
+Reported-by: Marius Hillenbrand <mhillen@linux.ibm.com>
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kernel/topology.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kernel/topology.c
++++ b/arch/s390/kernel/topology.c
+@@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst
+ {
+ static cpumask_t mask;
+
+- cpumask_copy(&mask, cpumask_of(cpu));
++ cpumask_clear(&mask);
++ if (!cpu_online(cpu))
++ goto out;
++ cpumask_set_cpu(cpu, &mask);
+ switch (topology_mode) {
+ case TOPOLOGY_MODE_HW:
+ while (info) {
+@@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst
+ default:
+ fallthrough;
+ case TOPOLOGY_MODE_SINGLE:
+- cpumask_copy(&mask, cpumask_of(cpu));
+ break;
+ }
+ cpumask_and(&mask, &mask, cpu_online_mask);
++out:
+ cpumask_copy(dst, &mask);
+ }
+
+@@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *ds
+ static cpumask_t mask;
+ int i;
+
+- cpumask_copy(&mask, cpumask_of(cpu));
++ cpumask_clear(&mask);
++ if (!cpu_online(cpu))
++ goto out;
++ cpumask_set_cpu(cpu, &mask);
+ if (topology_mode != TOPOLOGY_MODE_HW)
+ goto out;
+ cpu -= cpu % (smp_cpu_mtid + 1);
scsi-sd-call-sd_revalidate_disk-for-ioctl-blkrrpart.patch
software-node-handle-software-node-injection-to-an-e.patch
nilfs2-fix-memory-leak-in-nilfs_sysfs_delete_device_.patch
+s390-topology-clear-thread-group-maps-for-offline-cpus.patch
+s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch
+s390-fix-system-call-restart-with-multiple-signals.patch
+s390-clear-pt_regs-flags-on-irq-entry.patch
+kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch
+i2c-robotfuzz-osif-fix-control-request-directions.patch
+ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch
+xen-events-reset-active-flag-for-lateeoi-events-later.patch
+kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch
+kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch
+x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch
+x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch
--- /dev/null
+From f9dfb5e390fab2df9f7944bb91e7705aba14cd26 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Jun 2021 16:18:25 +0200
+Subject: x86/fpu: Make init_fpstate correct with optimized XSAVE
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit f9dfb5e390fab2df9f7944bb91e7705aba14cd26 upstream.
+
+The XSAVE init code initializes all enabled and supported components with
+XRSTOR(S) to init state. Then it XSAVEs the state of the components back
+into init_fpstate which is used in several places to fill in the init state
+of components.
+
+This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because
+those use the init optimization and skip writing state of components which
+are in init state. So init_fpstate.xsave still contains all zeroes after
+this operation.
+
+There are two ways to solve that:
+
+ 1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when
+ XSAVES is enabled because XSAVES uses compacted format.
+
+ 2) Save the components which are known to have a non-zero init state by other
+ means.
+
+Looking deeper, #2 is the right thing to do because all components the
+kernel supports have all-zeroes init state except the legacy features (FP,
+SSE). Those cannot be hard coded because the states are not identical on all
+CPUs, but they can be saved with FXSAVE which avoids all conditionals.
+
+Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with
+a BUILD_BUG_ON() which reminds developers to validate that a newly added
+component has all zeroes init state. As a bonus remove the now unused
+copy_xregs_to_kernel_booting() crutch.
+
+The XSAVE and reshuffle method can still be implemented in the unlikely
+case that components are added which have a non-zero init state and no
+other means to save them. For now, FXSAVE is just simple and good enough.
+
+ [ bp: Fix a typo or two in the text. ]
+
+Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/fpu/internal.h | 30 +++++++-------------------
+ arch/x86/kernel/fpu/xstate.c | 41 +++++++++++++++++++++++++++++++++---
+ 2 files changed, 46 insertions(+), 25 deletions(-)
+
+--- a/arch/x86/include/asm/fpu/internal.h
++++ b/arch/x86/include/asm/fpu/internal.h
+@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel
+ asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
+ }
+
++static inline void fxsave(struct fxregs_state *fx)
++{
++ if (IS_ENABLED(CONFIG_X86_32))
++ asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
++ else
++ asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
++}
++
+ /* These macros all use (%edi)/(%rdi) as the single memory argument. */
+ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
+ #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
+@@ -270,28 +278,6 @@ static inline void copy_fxregs_to_kernel
+
+ /*
+ * This function is called only during boot time when x86 caps are not set
+- * up and alternative can not be used yet.
+- */
+-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
+-{
+- u64 mask = xfeatures_mask_all;
+- u32 lmask = mask;
+- u32 hmask = mask >> 32;
+- int err;
+-
+- WARN_ON(system_state != SYSTEM_BOOTING);
+-
+- if (boot_cpu_has(X86_FEATURE_XSAVES))
+- XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
+- else
+- XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
+-
+- /* We should never fault when copying to a kernel buffer: */
+- WARN_ON_FPU(err);
+-}
+-
+-/*
+- * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
+--- a/arch/x86/kernel/fpu/xstate.c
++++ b/arch/x86/kernel/fpu/xstate.c
+@@ -441,12 +441,35 @@ static void __init print_xstate_offset_s
+ }
+
+ /*
++ * All supported features have either init state all zeros or are
++ * handled in setup_init_fpu() individually. This is an explicit
++ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
++ * newly added supported features at build time and make people
++ * actually look at the init state for the new feature.
++ */
++#define XFEATURES_INIT_FPSTATE_HANDLED \
++ (XFEATURE_MASK_FP | \
++ XFEATURE_MASK_SSE | \
++ XFEATURE_MASK_YMM | \
++ XFEATURE_MASK_OPMASK | \
++ XFEATURE_MASK_ZMM_Hi256 | \
++ XFEATURE_MASK_Hi16_ZMM | \
++ XFEATURE_MASK_PKRU | \
++ XFEATURE_MASK_BNDREGS | \
++ XFEATURE_MASK_BNDCSR | \
++ XFEATURE_MASK_PASID)
++
++/*
+ * setup the xstate image representing the init state
+ */
+ static void __init setup_init_fpu_buf(void)
+ {
+ static int on_boot_cpu __initdata = 1;
+
++ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
++ XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
++ XFEATURES_INIT_FPSTATE_HANDLED);
++
+ WARN_ON_FPU(!on_boot_cpu);
+ on_boot_cpu = 0;
+
+@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(vo
+ copy_kernel_to_xregs_booting(&init_fpstate.xsave);
+
+ /*
+- * Dump the init state again. This is to identify the init state
+- * of any feature which is not represented by all zero's.
++ * All components are now in init state. Read the state back so
++ * that init_fpstate contains all non-zero init state. This only
++ * works with XSAVE, but not with XSAVEOPT and XSAVES because
++ * those use the init optimization which skips writing data for
++ * components in init state.
++ *
++ * XSAVE could be used, but that would require to reshuffle the
++ * data when XSAVES is available because XSAVES uses xstate
++ * compaction. But doing so is a pointless exercise because most
++ * components have an all zeros init state except for the legacy
++ * ones (FP and SSE). Those can be saved with FXSAVE into the
++ * legacy area. Adding new features requires to ensure that init
++ * state is all zeroes or if not to add the necessary handling
++ * here.
+ */
+- copy_xregs_to_kernel_booting(&init_fpstate.xsave);
++ fxsave(&init_fpstate.fxsave);
+ }
+
+ static int xfeature_uncompacted_offset(int xfeature_nr)
--- /dev/null
+From 9301982c424a003c0095bf157154a85bf5322bd0 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 18 Jun 2021 16:18:24 +0200
+Subject: x86/fpu: Preserve supervisor states in sanitize_restored_user_xstate()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 9301982c424a003c0095bf157154a85bf5322bd0 upstream.
+
+sanitize_restored_user_xstate() preserves the supervisor states only
+when the fx_only argument is zero, which allows unprivileged user space
+to put supervisor states back into init state.
+
+Preserve them unconditionally.
+
+ [ bp: Fix a typo or two in the text. ]
+
+Fixes: 5d6b6a6f9b5c ("x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates")
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20210618143444.438635017@linutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/signal.c | 26 ++++++++------------------
+ 1 file changed, 8 insertions(+), 18 deletions(-)
+
+--- a/arch/x86/kernel/fpu/signal.c
++++ b/arch/x86/kernel/fpu/signal.c
+@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpre
+
+ if (use_xsave()) {
+ /*
+- * Note: we don't need to zero the reserved bits in the
+- * xstate_header here because we either didn't copy them at all,
+- * or we checked earlier that they aren't set.
++ * Clear all feature bits which are not set in
++ * user_xfeatures and clear all extended features
++ * for fx_only mode.
+ */
++ u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures;
+
+ /*
+- * 'user_xfeatures' might have bits clear which are
+- * set in header->xfeatures. This represents features that
+- * were in init state prior to a signal delivery, and need
+- * to be reset back to the init state. Clear any user
+- * feature bits which are set in the kernel buffer to get
+- * them back to the init state.
+- *
+- * Supervisor state is unchanged by input from userspace.
+- * Ensure supervisor state bits stay set and supervisor
+- * state is not modified.
++ * Supervisor state has to be preserved. The sigframe
++ * restore can only modify user features, i.e. @mask
++ * cannot contain them.
+ */
+- if (fx_only)
+- header->xfeatures = XFEATURE_MASK_FPSSE;
+- else
+- header->xfeatures &= user_xfeatures |
+- xfeatures_mask_supervisor();
++ header->xfeatures &= mask | xfeatures_mask_supervisor();
+ }
+
+ if (use_fxsr()) {
--- /dev/null
+From 3de218ff39b9e3f0d453fe3154f12a174de44b25 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Wed, 23 Jun 2021 15:09:13 +0200
+Subject: xen/events: reset active flag for lateeoi events later
+
+From: Juergen Gross <jgross@suse.com>
+
+commit 3de218ff39b9e3f0d453fe3154f12a174de44b25 upstream.
+
+In order to avoid a race condition for user events when changing
+cpu affinity reset the active flag only when EOI-ing the event.
+
+This is working fine as all user events are lateeoi events. Note that
+lateeoi_ack_mask_dynirq() is not modified as there is no explicit call
+to xen_irq_lateeoi() expected later.
+
+Cc: stable@vger.kernel.org
+Reported-by: Julien Grall <julien@xen.org>
+Fixes: b6622798bc50b62 ("xen/events: avoid handling the same event on two cpus at the same time")
+Tested-by: Julien Grall <julien@xen.org>
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrvsky@oracle.com>
+Link: https://lore.kernel.org/r/20210623130913.9405-1-jgross@suse.com
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/xen/events/events_base.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struc
+ }
+
+ info->eoi_time = 0;
++
++ /* is_active hasn't been reset yet, do it now. */
++ smp_store_release(&info->is_active, 0);
+ do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
+ }
+
+@@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port
+ BUG();
+ }
+
++/* Not called for lateeoi events. */
+ static void event_handler_exit(struct irq_info *info)
+ {
+ smp_store_release(&info->is_active, 0);
+@@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct ir
+
+ if (VALID_EVTCHN(evtchn)) {
+ do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+- event_handler_exit(info);
++ /*
++ * Don't call event_handler_exit().
++ * Need to keep is_active non-zero in order to ignore re-raised
++ * events after cpu affinity changes while a lateeoi is pending.
++ */
++ clear_evtchn(evtchn);
+ }
+ }
+