From: Greg Kroah-Hartman Date: Sun, 27 Jun 2021 14:27:15 +0000 (+0200) Subject: 5.12-stable patches X-Git-Tag: v5.12.14~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b1cd1baa1984d1ee486b73f2927ad7f22ec49646;p=thirdparty%2Fkernel%2Fstable-queue.git 5.12-stable patches added patches: ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch i2c-robotfuzz-osif-fix-control-request-directions.patch kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch s390-clear-pt_regs-flags-on-irq-entry.patch s390-fix-system-call-restart-with-multiple-signals.patch s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch s390-topology-clear-thread-group-maps-for-offline-cpus.patch x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch xen-events-reset-active-flag-for-lateeoi-events-later.patch --- diff --git a/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch b/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch new file mode 100644 index 00000000000..efa5c7420f6 --- /dev/null +++ b/queue-5.12/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch @@ -0,0 +1,54 @@ +From 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Tue, 1 Jun 2021 09:40:25 -0400 +Subject: ceph: must hold snap_rwsem when filling inode for async create + +From: Jeff Layton + +commit 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 upstream. + +...and add a lockdep assertion for it to ceph_fill_inode(). + +Cc: stable@vger.kernel.org # v5.7+ +Fixes: 9a8d03ca2e2c3 ("ceph: attempt to do async create when possible") +Signed-off-by: Jeff Layton +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/file.c | 3 +++ + fs/ceph/inode.c | 2 ++ + 2 files changed, 5 insertions(+) + +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -578,6 +578,7 @@ static int ceph_finish_async_create(stru + struct ceph_inode_info *ci = ceph_inode(dir); + struct inode *inode; + struct timespec64 now; ++ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_vino vino = { .ino = req->r_deleg_ino, + .snap = CEPH_NOSNAP }; + +@@ -615,8 +616,10 @@ static int ceph_finish_async_create(stru + + ceph_file_layout_to_legacy(lo, &in.layout); + ++ down_read(&mdsc->snap_rwsem); + ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, + req->r_fmode, NULL); ++ up_read(&mdsc->snap_rwsem); + if (ret) { + dout("%s failed to fill inode: %d\n", __func__, ret); + ceph_dir_clear_complete(dir); +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode, + bool new_version = false; + bool fill_inline = false; + ++ lockdep_assert_held(&mdsc->snap_rwsem); ++ + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, + inode, ceph_vinop(inode), le64_to_cpu(info->version), + ci->i_version); diff --git a/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch b/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch new file mode 100644 index 00000000000..dfaa7b63ffc --- /dev/null +++ b/queue-5.12/i2c-robotfuzz-osif-fix-control-request-directions.patch @@ -0,0 +1,50 @@ +From 4ca070ef0dd885616ef294d269a9bf8e3b258e1a Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Mon, 24 May 2021 11:09:12 +0200 +Subject: i2c: robotfuzz-osif: fix control-request directions + +From: Johan Hovold + +commit 4ca070ef0dd885616ef294d269a9bf8e3b258e1a upstream. + +The direction of the pipe argument must match the request-type direction +bit or control requests may fail depending on the host-controller-driver +implementation. + +Control transfers without a data stage are treated as OUT requests by +the USB stack and should be using usb_sndctrlpipe(). Failing to do so +will now trigger a warning. + +Fix the OSIFI2C_SET_BIT_RATE and OSIFI2C_STOP requests which erroneously +used the osif_usb_read() helper and set the IN direction bit. + +Reported-by: syzbot+9d7dadd15b8819d73f41@syzkaller.appspotmail.com +Fixes: 83e53a8f120f ("i2c: Add bus driver for for OSIF USB i2c device.") +Cc: stable@vger.kernel.org # 3.14 +Signed-off-by: Johan Hovold +Signed-off-by: Wolfram Sang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/i2c/busses/i2c-robotfuzz-osif.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c ++++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c +@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter + } + } + +- ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); ++ ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); + if (ret) { + dev_err(&adapter->dev, "failure sending STOP\n"); + return -EREMOTEIO; +@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interfa + * Set bus frequency. The frequency is: + * 120,000,000 / ( 16 + 2 * div * 4^prescale). + * Using dev = 52, prescale = 0 give 100KHz */ +- ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, ++ ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, + NULL, 0); + if (ret) { + dev_err(&interface->dev, "failure sending bit rate"); diff --git a/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch b/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch new file mode 100644 index 00000000000..c167187471b --- /dev/null +++ b/queue-5.12/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch @@ -0,0 +1,181 @@ +From 5fa54346caf67b4b1b10b1f390316ae466da4d53 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Thu, 24 Jun 2021 18:39:48 -0700 +Subject: kthread: prevent deadlock when kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync() + +From: Petr Mladek + +commit 5fa54346caf67b4b1b10b1f390316ae466da4d53 upstream. + +The system might hang with the following backtrace: + + schedule+0x80/0x100 + schedule_timeout+0x48/0x138 + wait_for_common+0xa4/0x134 + wait_for_completion+0x1c/0x2c + kthread_flush_work+0x114/0x1cc + kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144 + kthread_cancel_delayed_work_sync+0x18/0x2c + xxxx_pm_notify+0xb0/0xd8 + blocking_notifier_call_chain_robust+0x80/0x194 + pm_notifier_call_chain_robust+0x28/0x4c + suspend_prepare+0x40/0x260 + enter_state+0x80/0x3f4 + pm_suspend+0x60/0xdc + state_store+0x108/0x144 + kobj_attr_store+0x38/0x88 + sysfs_kf_write+0x64/0xc0 + kernfs_fop_write_iter+0x108/0x1d0 + vfs_write+0x2f4/0x368 + ksys_write+0x7c/0xec + +It is caused by the following race between kthread_mod_delayed_work() +and kthread_cancel_delayed_work_sync(): + +CPU0 CPU1 + +Context: Thread A Context: Thread B + +kthread_mod_delayed_work() + spin_lock() + __kthread_cancel_work() + spin_unlock() + del_timer_sync() + kthread_cancel_delayed_work_sync() + spin_lock() + __kthread_cancel_work() + spin_unlock() + del_timer_sync() + spin_lock() + + work->canceling++ + spin_unlock + spin_lock() + queue_delayed_work() + // dwork is put into the worker->delayed_work_list + + spin_unlock() + + kthread_flush_work() + // flush_work is put at the tail of the dwork + + wait_for_completion() + +Context: IRQ + + kthread_delayed_work_timer_fn() + spin_lock() + list_del_init(&work->node); + spin_unlock() + +BANG: flush_work is not longer linked and will never get proceed. + +The problem is that kthread_mod_delayed_work() checks work->canceling +flag before canceling the timer. + +A simple solution is to (re)check work->canceling after +__kthread_cancel_work(). But then it is not clear what should be +returned when __kthread_cancel_work() removed the work from the queue +(list) and it can't queue it again with the new @delay. + +The return value might be used for reference counting. The caller has +to know whether a new work has been queued or an existing one was +replaced. + +The proper solution is that kthread_mod_delayed_work() will remove the +work from the queue (list) _only_ when work->canceling is not set. The +flag must be checked after the timer is stopped and the remaining +operations can be done under worker->lock. + +Note that kthread_mod_delayed_work() could remove the timer and then +bail out. It is fine. The other canceling caller needs to cancel the +timer as well. The important thing is that the queue (list) +manipulation is done atomically under worker->lock. + +Link: https://lkml.kernel.org/r/20210610133051.15337-3-pmladek@suse.com +Fixes: 9a6b06c8d9a220860468a ("kthread: allow to modify delayed kthread work") +Signed-off-by: Petr Mladek +Reported-by: Martin Liu +Cc: +Cc: Minchan Kim +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Oleg Nesterov +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/kthread.c | 35 ++++++++++++++++++++++++----------- + 1 file changed, 24 insertions(+), 11 deletions(-) + +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -1119,8 +1119,11 @@ static void kthread_cancel_delayed_work_ + } + + /* +- * This function removes the work from the worker queue. Also it makes sure +- * that it won't get queued later via the delayed work's timer. ++ * This function removes the work from the worker queue. ++ * ++ * It is called under worker->lock. The caller must make sure that ++ * the timer used by delayed work is not running, e.g. by calling ++ * kthread_cancel_delayed_work_timer(). + * + * The work might still be in use when this function finishes. See the + * current_work proceed by the worker. +@@ -1128,13 +1131,8 @@ static void kthread_cancel_delayed_work_ + * Return: %true if @work was pending and successfully canceled, + * %false if @work was not pending + */ +-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, +- unsigned long *flags) ++static bool __kthread_cancel_work(struct kthread_work *work) + { +- /* Try to cancel the timer if exists. */ +- if (is_dwork) +- kthread_cancel_delayed_work_timer(work, flags); +- + /* + * Try to remove the work from a worker list. It might either + * be from worker->work_list or from worker->delayed_work_list. +@@ -1187,11 +1185,23 @@ bool kthread_mod_delayed_work(struct kth + /* Work must not be used with >1 worker, see kthread_queue_work() */ + WARN_ON_ONCE(work->worker != worker); + +- /* Do not fight with another command that is canceling this work. */ ++ /* ++ * Temporary cancel the work but do not fight with another command ++ * that is canceling the work as well. ++ * ++ * It is a bit tricky because of possible races with another ++ * mod_delayed_work() and cancel_delayed_work() callers. ++ * ++ * The timer must be canceled first because worker->lock is released ++ * when doing so. But the work can be removed from the queue (list) ++ * only when it can be queued again so that the return value can ++ * be used for reference counting. ++ */ ++ kthread_cancel_delayed_work_timer(work, &flags); + if (work->canceling) + goto out; ++ ret = __kthread_cancel_work(work); + +- ret = __kthread_cancel_work(work, true, &flags); + fast_queue: + __kthread_queue_delayed_work(worker, dwork, delay); + out: +@@ -1213,7 +1223,10 @@ static bool __kthread_cancel_work_sync(s + /* Work must not be used with >1 worker, see kthread_queue_work(). */ + WARN_ON_ONCE(work->worker != worker); + +- ret = __kthread_cancel_work(work, is_dwork, &flags); ++ if (is_dwork) ++ kthread_cancel_delayed_work_timer(work, &flags); ++ ++ ret = __kthread_cancel_work(work); + + if (worker->current_work != work) + goto out_fast; diff --git a/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch b/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch new file mode 100644 index 00000000000..6c741c016a0 --- /dev/null +++ b/queue-5.12/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch @@ -0,0 +1,102 @@ +From 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Thu, 24 Jun 2021 18:39:45 -0700 +Subject: kthread_worker: split code for canceling the delayed work timer + +From: Petr Mladek + +commit 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 upstream. + +Patch series "kthread_worker: Fix race between kthread_mod_delayed_work() +and kthread_cancel_delayed_work_sync()". + +This patchset fixes the race between kthread_mod_delayed_work() and +kthread_cancel_delayed_work_sync() including proper return value +handling. + +This patch (of 2): + +Simple code refactoring as a preparation step for fixing a race between +kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync(). + +It does not modify the existing behavior. + +Link: https://lkml.kernel.org/r/20210610133051.15337-2-pmladek@suse.com +Signed-off-by: Petr Mladek +Cc: +Cc: Martin Liu +Cc: Minchan Kim +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Oleg Nesterov +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/kthread.c | 46 +++++++++++++++++++++++++++++----------------- + 1 file changed, 29 insertions(+), 17 deletions(-) + +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -1092,6 +1092,33 @@ void kthread_flush_work(struct kthread_w + EXPORT_SYMBOL_GPL(kthread_flush_work); + + /* ++ * Make sure that the timer is neither set nor running and could ++ * not manipulate the work list_head any longer. ++ * ++ * The function is called under worker->lock. The lock is temporary ++ * released but the timer can't be set again in the meantime. ++ */ ++static void kthread_cancel_delayed_work_timer(struct kthread_work *work, ++ unsigned long *flags) ++{ ++ struct kthread_delayed_work *dwork = ++ container_of(work, struct kthread_delayed_work, work); ++ struct kthread_worker *worker = work->worker; ++ ++ /* ++ * del_timer_sync() must be called to make sure that the timer ++ * callback is not running. The lock must be temporary released ++ * to avoid a deadlock with the callback. In the meantime, ++ * any queuing is blocked by setting the canceling counter. ++ */ ++ work->canceling++; ++ raw_spin_unlock_irqrestore(&worker->lock, *flags); ++ del_timer_sync(&dwork->timer); ++ raw_spin_lock_irqsave(&worker->lock, *flags); ++ work->canceling--; ++} ++ ++/* + * This function removes the work from the worker queue. Also it makes sure + * that it won't get queued later via the delayed work's timer. + * +@@ -1105,23 +1132,8 @@ static bool __kthread_cancel_work(struct + unsigned long *flags) + { + /* Try to cancel the timer if exists. */ +- if (is_dwork) { +- struct kthread_delayed_work *dwork = +- container_of(work, struct kthread_delayed_work, work); +- struct kthread_worker *worker = work->worker; +- +- /* +- * del_timer_sync() must be called to make sure that the timer +- * callback is not running. The lock must be temporary released +- * to avoid a deadlock with the callback. In the meantime, +- * any queuing is blocked by setting the canceling counter. +- */ +- work->canceling++; +- raw_spin_unlock_irqrestore(&worker->lock, *flags); +- del_timer_sync(&dwork->timer); +- raw_spin_lock_irqsave(&worker->lock, *flags); +- work->canceling--; +- } ++ if (is_dwork) ++ kthread_cancel_delayed_work_timer(work, flags); + + /* + * Try to remove the work from a worker list. It might either diff --git a/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch b/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch new file mode 100644 index 00000000000..78ef42c3e53 --- /dev/null +++ b/queue-5.12/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch @@ -0,0 +1,70 @@ +From f8be156be163a052a067306417cd0ff679068c97 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Thu, 24 Jun 2021 08:29:04 -0400 +Subject: KVM: do not allow mapping valid but non-reference-counted pages + +From: Nicholas Piggin + +commit f8be156be163a052a067306417cd0ff679068c97 upstream. + +It's possible to create a region which maps valid but non-refcounted +pages (e.g., tail pages of non-compound higher order allocations). These +host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family +of APIs, which take a reference to the page, which takes it from 0 to 1. +When the reference is dropped, this will free the page incorrectly. + +Fix this by only taking a reference on valid pages if it was non-zero, +which indicates it is participating in normal refcounting (and can be +released with put_page). + +This addresses CVE-2021-22543. + +Signed-off-by: Nicholas Piggin +Tested-by: Paolo Bonzini +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1919,6 +1919,13 @@ static bool vma_is_valid(struct vm_area_ + return true; + } + ++static int kvm_try_get_pfn(kvm_pfn_t pfn) ++{ ++ if (kvm_is_reserved_pfn(pfn)) ++ return 1; ++ return get_page_unless_zero(pfn_to_page(pfn)); ++} ++ + static int hva_to_pfn_remapped(struct vm_area_struct *vma, + unsigned long addr, bool *async, + bool write_fault, bool *writable, +@@ -1968,13 +1975,21 @@ static int hva_to_pfn_remapped(struct vm + * Whoever called remap_pfn_range is also going to call e.g. + * unmap_mapping_range before the underlying pages are freed, + * causing a call to our MMU notifier. ++ * ++ * Certain IO or PFNMAP mappings can be backed with valid ++ * struct pages, but be allocated without refcounting e.g., ++ * tail pages of non-compound higher order allocations, which ++ * would then underflow the refcount when the caller does the ++ * required put_page. Don't allow those pages here. + */ +- kvm_get_pfn(pfn); ++ if (!kvm_try_get_pfn(pfn)) ++ r = -EFAULT; + + out: + pte_unmap_unlock(ptep, ptl); + *p_pfn = pfn; +- return 0; ++ ++ return r; + } + + /* diff --git a/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch b/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch new file mode 100644 index 00000000000..90ecf273d0e --- /dev/null +++ b/queue-5.12/s390-clear-pt_regs-flags-on-irq-entry.patch @@ -0,0 +1,37 @@ +From ca1f4d702d534387aa1f16379edb3b03cdb6ceda Mon Sep 17 00:00:00 2001 +From: Sven Schnelle +Date: Fri, 11 Jun 2021 16:08:18 +0200 +Subject: s390: clear pt_regs::flags on irq entry + +From: Sven Schnelle + +commit ca1f4d702d534387aa1f16379edb3b03cdb6ceda upstream. + +The current irq entry code doesn't initialize pt_regs::flags. On exit to +user mode arch_do_signal_or_restart() tests whether PIF_SYSCALL is set, +which might yield wrong results. + +Fix this by clearing pt_regs::flags in the entry.S irq handler +code. + +Reported-by: Heiko Carstens +Signed-off-by: Sven Schnelle +Reviewed-by: Heiko Carstens +Fixes: 56e62a737028 ("s390: convert to generic entry") +Cc: # 5.12 +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/entry.S | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/s390/kernel/entry.S ++++ b/arch/s390/kernel/entry.S +@@ -418,6 +418,7 @@ ENTRY(\name) + xgr %r6,%r6 + xgr %r7,%r7 + xgr %r10,%r10 ++ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC + stmg %r8,%r9,__PT_PSW(%r11) + tm %r8,0x0001 # coming from user space? diff --git a/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch b/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch new file mode 100644 index 00000000000..808128806e4 --- /dev/null +++ b/queue-5.12/s390-fix-system-call-restart-with-multiple-signals.patch @@ -0,0 +1,44 @@ +From fc66127dc3396338f287c3b494dfbf102547e770 Mon Sep 17 00:00:00 2001 +From: Sven Schnelle +Date: Fri, 11 Jun 2021 10:27:51 +0200 +Subject: s390: fix system call restart with multiple signals + +From: Sven Schnelle + +commit fc66127dc3396338f287c3b494dfbf102547e770 upstream. + +glibc complained with "The futex facility returned an unexpected error +code.". It turned out that the futex syscall returned -ERESTARTSYS because +a signal is pending. arch_do_signal_or_restart() restored the syscall +parameters (nameley regs->gprs[2]) and set PIF_SYSCALL_RESTART. When +another signal is made pending later in the exit loop +arch_do_signal_or_restart() is called again. This function clears +PIF_SYSCALL_RESTART and checks the return code which is set in +regs->gprs[2]. However, regs->gprs[2] was restored in the previous run +and no longer contains -ERESTARTSYS, so PIF_SYSCALL_RESTART isn't set +again and the syscall is skipped. + +Fix this by not clearing PIF_SYSCALL_RESTART - it is already cleared in +__do_syscall() when the syscall is restarted. + +Reported-by: Bjoern Walk +Signed-off-by: Sven Schnelle +Reviewed-by: Heiko Carstens +Fixes: 56e62a737028 ("s390: convert to generic entry") +Cc: # 5.12 +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/signal.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/s390/kernel/signal.c ++++ b/arch/s390/kernel/signal.c +@@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt + + /* No handlers present - check for system call restart */ + clear_pt_regs_flag(regs, PIF_SYSCALL); +- clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART); + if (current->thread.system_call) { + regs->int_code = current->thread.system_call; + switch (regs->gprs[2]) { diff --git a/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch b/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch new file mode 100644 index 00000000000..c86febbc3ea --- /dev/null +++ b/queue-5.12/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch @@ -0,0 +1,67 @@ +From 67147e96a332b56c7206238162771d82467f86c0 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Fri, 18 Jun 2021 16:58:47 +0200 +Subject: s390/stack: fix possible register corruption with stack switch helper + +From: Heiko Carstens + +commit 67147e96a332b56c7206238162771d82467f86c0 upstream. + +The CALL_ON_STACK macro is used to call a C function from inline +assembly, and therefore must consider the C ABI, which says that only +registers 6-13, and 15 are non-volatile (restored by the called +function). + +The inline assembly incorrectly marks all registers used to pass +parameters to the called function as read-only input operands, instead +of operands that are read and written to. This might result in +register corruption depending on usage, compiler, and compile options. + +Fix this by marking all operands used to pass parameters as read/write +operands. To keep the code simple even register 6, if used, is marked +as read-write operand. + +Fixes: ff340d2472ec ("s390: add stack switch helper") +Cc: # 4.20 +Reviewed-by: Vasily Gorbik +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/include/asm/stacktrace.h | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/arch/s390/include/asm/stacktrace.h ++++ b/arch/s390/include/asm/stacktrace.h +@@ -91,12 +91,16 @@ struct stack_frame { + CALL_ARGS_4(arg1, arg2, arg3, arg4); \ + register unsigned long r4 asm("6") = (unsigned long)(arg5) + +-#define CALL_FMT_0 "=&d" (r2) : +-#define CALL_FMT_1 "+&d" (r2) : +-#define CALL_FMT_2 CALL_FMT_1 "d" (r3), +-#define CALL_FMT_3 CALL_FMT_2 "d" (r4), +-#define CALL_FMT_4 CALL_FMT_3 "d" (r5), +-#define CALL_FMT_5 CALL_FMT_4 "d" (r6), ++/* ++ * To keep this simple mark register 2-6 as being changed (volatile) ++ * by the called function, even though register 6 is saved/nonvolatile. ++ */ ++#define CALL_FMT_0 "=&d" (r2) ++#define CALL_FMT_1 "+&d" (r2) ++#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) ++#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) ++#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) ++#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) + + #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" + #define CALL_CLOBBER_4 CALL_CLOBBER_5 +@@ -118,7 +122,7 @@ struct stack_frame { + " brasl 14,%[_fn]\n" \ + " la 15,0(%[_prev])\n" \ + : [_prev] "=&a" (prev), CALL_FMT_##nr \ +- [_stack] "R" (stack), \ ++ : [_stack] "R" (stack), \ + [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_frame] "d" (frame), \ + [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ diff --git a/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch b/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch new file mode 100644 index 00000000000..ece1f385036 --- /dev/null +++ b/queue-5.12/s390-topology-clear-thread-group-maps-for-offline-cpus.patch @@ -0,0 +1,78 @@ +From 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 Mon Sep 17 00:00:00 2001 +From: Sven Schnelle +Date: Tue, 15 Jun 2021 15:05:22 +0200 +Subject: s390/topology: clear thread/group maps for offline cpus + +From: Sven Schnelle + +commit 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 upstream. + +The current code doesn't clear the thread/group maps for offline +CPUs. This may cause kernel crashes like the one bewlow in common +code that assumes if a CPU has sibblings it is online. + +Unable to handle kernel pointer dereference in virtual kernel address space + +Call Trace: + [<000000013a4b8c3c>] blk_mq_map_swqueue+0x10c/0x388 +([<000000013a4b8bcc>] blk_mq_map_swqueue+0x9c/0x388) + [<000000013a4b9300>] blk_mq_init_allocated_queue+0x448/0x478 + [<000000013a4b9416>] blk_mq_init_queue+0x4e/0x90 + [<000003ff8019d3e6>] loop_add+0x106/0x278 [loop] + [<000003ff801b8148>] loop_init+0x148/0x1000 [loop] + [<0000000139de4924>] do_one_initcall+0x3c/0x1e0 + [<0000000139ef449a>] do_init_module+0x6a/0x2a0 + [<0000000139ef61bc>] __do_sys_finit_module+0xa4/0xc0 + [<0000000139de9e6e>] do_syscall+0x7e/0xd0 + [<000000013a8e0aec>] __do_syscall+0xbc/0x110 + [<000000013a8ee2e8>] system_call+0x78/0xa0 + +Fixes: 52aeda7accb6 ("s390/topology: remove offline CPUs from CPU topology masks") +Cc: # 5.7+ +Reported-by: Marius Hillenbrand +Signed-off-by: Sven Schnelle +Reviewed-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/topology.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/arch/s390/kernel/topology.c ++++ b/arch/s390/kernel/topology.c +@@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst + { + static cpumask_t mask; + +- cpumask_copy(&mask, cpumask_of(cpu)); ++ cpumask_clear(&mask); ++ if (!cpu_online(cpu)) ++ goto out; ++ cpumask_set_cpu(cpu, &mask); + switch (topology_mode) { + case TOPOLOGY_MODE_HW: + while (info) { +@@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst + default: + fallthrough; + case TOPOLOGY_MODE_SINGLE: +- cpumask_copy(&mask, cpumask_of(cpu)); + break; + } + cpumask_and(&mask, &mask, cpu_online_mask); ++out: + cpumask_copy(dst, &mask); + } + +@@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *ds + static cpumask_t mask; + int i; + +- cpumask_copy(&mask, cpumask_of(cpu)); ++ cpumask_clear(&mask); ++ if (!cpu_online(cpu)) ++ goto out; ++ cpumask_set_cpu(cpu, &mask); + if (topology_mode != TOPOLOGY_MODE_HW) + goto out; + cpu -= cpu % (smp_cpu_mtid + 1); diff --git a/queue-5.12/series b/queue-5.12/series index 7b9a96441cb..4d7f5daa9a6 100644 --- a/queue-5.12/series +++ b/queue-5.12/series @@ -66,3 +66,15 @@ gpiolib-cdev-zero-padding-during-conversion-to-gpiol.patch scsi-sd-call-sd_revalidate_disk-for-ioctl-blkrrpart.patch software-node-handle-software-node-injection-to-an-e.patch nilfs2-fix-memory-leak-in-nilfs_sysfs_delete_device_.patch +s390-topology-clear-thread-group-maps-for-offline-cpus.patch +s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch +s390-fix-system-call-restart-with-multiple-signals.patch +s390-clear-pt_regs-flags-on-irq-entry.patch +kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch +i2c-robotfuzz-osif-fix-control-request-directions.patch +ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch +xen-events-reset-active-flag-for-lateeoi-events-later.patch +kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch +kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch +x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch +x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch diff --git a/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch b/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch new file mode 100644 index 00000000000..83cc50c873a --- /dev/null +++ b/queue-5.12/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch @@ -0,0 +1,165 @@ +From f9dfb5e390fab2df9f7944bb91e7705aba14cd26 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 18 Jun 2021 16:18:25 +0200 +Subject: x86/fpu: Make init_fpstate correct with optimized XSAVE + +From: Thomas Gleixner + +commit f9dfb5e390fab2df9f7944bb91e7705aba14cd26 upstream. + +The XSAVE init code initializes all enabled and supported components with +XRSTOR(S) to init state. Then it XSAVEs the state of the components back +into init_fpstate which is used in several places to fill in the init state +of components. + +This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because +those use the init optimization and skip writing state of components which +are in init state. So init_fpstate.xsave still contains all zeroes after +this operation. + +There are two ways to solve that: + + 1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when + XSAVES is enabled because XSAVES uses compacted format. + + 2) Save the components which are known to have a non-zero init state by other + means. + +Looking deeper, #2 is the right thing to do because all components the +kernel supports have all-zeroes init state except the legacy features (FP, +SSE). Those cannot be hard coded because the states are not identical on all +CPUs, but they can be saved with FXSAVE which avoids all conditionals. + +Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with +a BUILD_BUG_ON() which reminds developers to validate that a newly added +component has all zeroes init state. As a bonus remove the now unused +copy_xregs_to_kernel_booting() crutch. + +The XSAVE and reshuffle method can still be implemented in the unlikely +case that components are added which have a non-zero init state and no +other means to save them. For now, FXSAVE is just simple and good enough. + + [ bp: Fix a typo or two in the text. ] + +Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Reviewed-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/fpu/internal.h | 30 +++++++------------------- + arch/x86/kernel/fpu/xstate.c | 41 +++++++++++++++++++++++++++++++++--- + 2 files changed, 46 insertions(+), 25 deletions(-) + +--- a/arch/x86/include/asm/fpu/internal.h ++++ b/arch/x86/include/asm/fpu/internal.h +@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); + } + ++static inline void fxsave(struct fxregs_state *fx) ++{ ++ if (IS_ENABLED(CONFIG_X86_32)) ++ asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); ++ else ++ asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); ++} ++ + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ + #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" + #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +@@ -270,28 +278,6 @@ static inline void copy_fxregs_to_kernel + + /* + * This function is called only during boot time when x86 caps are not set +- * up and alternative can not be used yet. +- */ +-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) +-{ +- u64 mask = xfeatures_mask_all; +- u32 lmask = mask; +- u32 hmask = mask >> 32; +- int err; +- +- WARN_ON(system_state != SYSTEM_BOOTING); +- +- if (boot_cpu_has(X86_FEATURE_XSAVES)) +- XSTATE_OP(XSAVES, xstate, lmask, hmask, err); +- else +- XSTATE_OP(XSAVE, xstate, lmask, hmask, err); +- +- /* We should never fault when copying to a kernel buffer: */ +- WARN_ON_FPU(err); +-} +- +-/* +- * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ + static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -441,12 +441,35 @@ static void __init print_xstate_offset_s + } + + /* ++ * All supported features have either init state all zeros or are ++ * handled in setup_init_fpu() individually. This is an explicit ++ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch ++ * newly added supported features at build time and make people ++ * actually look at the init state for the new feature. ++ */ ++#define XFEATURES_INIT_FPSTATE_HANDLED \ ++ (XFEATURE_MASK_FP | \ ++ XFEATURE_MASK_SSE | \ ++ XFEATURE_MASK_YMM | \ ++ XFEATURE_MASK_OPMASK | \ ++ XFEATURE_MASK_ZMM_Hi256 | \ ++ XFEATURE_MASK_Hi16_ZMM | \ ++ XFEATURE_MASK_PKRU | \ ++ XFEATURE_MASK_BNDREGS | \ ++ XFEATURE_MASK_BNDCSR | \ ++ XFEATURE_MASK_PASID) ++ ++/* + * setup the xstate image representing the init state + */ + static void __init setup_init_fpu_buf(void) + { + static int on_boot_cpu __initdata = 1; + ++ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | ++ XFEATURE_MASK_SUPERVISOR_SUPPORTED) != ++ XFEATURES_INIT_FPSTATE_HANDLED); ++ + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + +@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(vo + copy_kernel_to_xregs_booting(&init_fpstate.xsave); + + /* +- * Dump the init state again. This is to identify the init state +- * of any feature which is not represented by all zero's. ++ * All components are now in init state. Read the state back so ++ * that init_fpstate contains all non-zero init state. This only ++ * works with XSAVE, but not with XSAVEOPT and XSAVES because ++ * those use the init optimization which skips writing data for ++ * components in init state. ++ * ++ * XSAVE could be used, but that would require to reshuffle the ++ * data when XSAVES is available because XSAVES uses xstate ++ * compaction. But doing so is a pointless exercise because most ++ * components have an all zeros init state except for the legacy ++ * ones (FP and SSE). Those can be saved with FXSAVE into the ++ * legacy area. Adding new features requires to ensure that init ++ * state is all zeroes or if not to add the necessary handling ++ * here. + */ +- copy_xregs_to_kernel_booting(&init_fpstate.xsave); ++ fxsave(&init_fpstate.fxsave); + } + + static int xfeature_uncompacted_offset(int xfeature_nr) diff --git a/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch b/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch new file mode 100644 index 00000000000..79ddb0dfab5 --- /dev/null +++ b/queue-5.12/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch @@ -0,0 +1,66 @@ +From 9301982c424a003c0095bf157154a85bf5322bd0 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 18 Jun 2021 16:18:24 +0200 +Subject: x86/fpu: Preserve supervisor states in sanitize_restored_user_xstate() + +From: Thomas Gleixner + +commit 9301982c424a003c0095bf157154a85bf5322bd0 upstream. + +sanitize_restored_user_xstate() preserves the supervisor states only +when the fx_only argument is zero, which allows unprivileged user space +to put supervisor states back into init state. + +Preserve them unconditionally. + + [ bp: Fix a typo or two in the text. ] + +Fixes: 5d6b6a6f9b5c ("x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210618143444.438635017@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/signal.c | 26 ++++++++------------------ + 1 file changed, 8 insertions(+), 18 deletions(-) + +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpre + + if (use_xsave()) { + /* +- * Note: we don't need to zero the reserved bits in the +- * xstate_header here because we either didn't copy them at all, +- * or we checked earlier that they aren't set. ++ * Clear all feature bits which are not set in ++ * user_xfeatures and clear all extended features ++ * for fx_only mode. + */ ++ u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; + + /* +- * 'user_xfeatures' might have bits clear which are +- * set in header->xfeatures. This represents features that +- * were in init state prior to a signal delivery, and need +- * to be reset back to the init state. Clear any user +- * feature bits which are set in the kernel buffer to get +- * them back to the init state. +- * +- * Supervisor state is unchanged by input from userspace. +- * Ensure supervisor state bits stay set and supervisor +- * state is not modified. ++ * Supervisor state has to be preserved. The sigframe ++ * restore can only modify user features, i.e. @mask ++ * cannot contain them. + */ +- if (fx_only) +- header->xfeatures = XFEATURE_MASK_FPSSE; +- else +- header->xfeatures &= user_xfeatures | +- xfeatures_mask_supervisor(); ++ header->xfeatures &= mask | xfeatures_mask_supervisor(); + } + + if (use_fxsr()) { diff --git a/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch b/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch new file mode 100644 index 00000000000..ab19055d14c --- /dev/null +++ b/queue-5.12/xen-events-reset-active-flag-for-lateeoi-events-later.patch @@ -0,0 +1,63 @@ +From 3de218ff39b9e3f0d453fe3154f12a174de44b25 Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Wed, 23 Jun 2021 15:09:13 +0200 +Subject: xen/events: reset active flag for lateeoi events later + +From: Juergen Gross + +commit 3de218ff39b9e3f0d453fe3154f12a174de44b25 upstream. + +In order to avoid a race condition for user events when changing +cpu affinity reset the active flag only when EOI-ing the event. + +This is working fine as all user events are lateeoi events. Note that +lateeoi_ack_mask_dynirq() is not modified as there is no explicit call +to xen_irq_lateeoi() expected later. + +Cc: stable@vger.kernel.org +Reported-by: Julien Grall +Fixes: b6622798bc50b62 ("xen/events: avoid handling the same event on two cpus at the same time") +Tested-by: Julien Grall +Signed-off-by: Juergen Gross +Reviewed-by: Boris Ostrovsky +Link: https://lore.kernel.org/r/20210623130913.9405-1-jgross@suse.com +Signed-off-by: Juergen Gross +Signed-off-by: Greg Kroah-Hartman +--- + drivers/xen/events/events_base.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/xen/events/events_base.c ++++ b/drivers/xen/events/events_base.c +@@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struc + } + + info->eoi_time = 0; ++ ++ /* is_active hasn't been reset yet, do it now. */ ++ smp_store_release(&info->is_active, 0); + do_unmask(info, EVT_MASK_REASON_EOI_PENDING); + } + +@@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port + BUG(); + } + ++/* Not called for lateeoi events. */ + static void event_handler_exit(struct irq_info *info) + { + smp_store_release(&info->is_active, 0); +@@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct ir + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EOI_PENDING); +- event_handler_exit(info); ++ /* ++ * Don't call event_handler_exit(). ++ * Need to keep is_active non-zero in order to ignore re-raised ++ * events after cpu affinity changes while a lateeoi is pending. ++ */ ++ clear_evtchn(evtchn); + } + } +