From: Greg Kroah-Hartman Date: Sun, 27 Jun 2021 14:27:01 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v5.12.14~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=011d878da7b0b9132283e51bf98501de3aaf647a;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch i2c-robotfuzz-osif-fix-control-request-directions.patch kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch --- diff --git a/queue-5.10/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch b/queue-5.10/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch new file mode 100644 index 00000000000..efa5c7420f6 --- /dev/null +++ b/queue-5.10/ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch @@ -0,0 +1,54 @@ +From 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Tue, 1 Jun 2021 09:40:25 -0400 +Subject: ceph: must hold snap_rwsem when filling inode for async create + +From: Jeff Layton + +commit 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 upstream. + +...and add a lockdep assertion for it to ceph_fill_inode(). + +Cc: stable@vger.kernel.org # v5.7+ +Fixes: 9a8d03ca2e2c3 ("ceph: attempt to do async create when possible") +Signed-off-by: Jeff Layton +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Greg Kroah-Hartman +--- + fs/ceph/file.c | 3 +++ + fs/ceph/inode.c | 2 ++ + 2 files changed, 5 insertions(+) + +--- a/fs/ceph/file.c ++++ b/fs/ceph/file.c +@@ -578,6 +578,7 @@ static int ceph_finish_async_create(stru + struct ceph_inode_info *ci = ceph_inode(dir); + struct inode *inode; + struct timespec64 now; ++ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); + struct ceph_vino vino = { .ino = req->r_deleg_ino, + .snap = CEPH_NOSNAP }; + +@@ -615,8 +616,10 @@ static int ceph_finish_async_create(stru + + ceph_file_layout_to_legacy(lo, &in.layout); + ++ down_read(&mdsc->snap_rwsem); + ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, + req->r_fmode, NULL); ++ up_read(&mdsc->snap_rwsem); + if (ret) { + dout("%s failed to fill inode: %d\n", __func__, ret); + ceph_dir_clear_complete(dir); +--- a/fs/ceph/inode.c ++++ b/fs/ceph/inode.c +@@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode, + bool new_version = false; + bool fill_inline = false; + ++ lockdep_assert_held(&mdsc->snap_rwsem); ++ + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, + inode, ceph_vinop(inode), le64_to_cpu(info->version), + ci->i_version); diff --git a/queue-5.10/i2c-robotfuzz-osif-fix-control-request-directions.patch b/queue-5.10/i2c-robotfuzz-osif-fix-control-request-directions.patch new file mode 100644 index 00000000000..dfaa7b63ffc --- /dev/null +++ b/queue-5.10/i2c-robotfuzz-osif-fix-control-request-directions.patch @@ -0,0 +1,50 @@ +From 4ca070ef0dd885616ef294d269a9bf8e3b258e1a Mon Sep 17 00:00:00 2001 +From: Johan Hovold +Date: Mon, 24 May 2021 11:09:12 +0200 +Subject: i2c: robotfuzz-osif: fix control-request directions + +From: Johan Hovold + +commit 4ca070ef0dd885616ef294d269a9bf8e3b258e1a upstream. + +The direction of the pipe argument must match the request-type direction +bit or control requests may fail depending on the host-controller-driver +implementation. + +Control transfers without a data stage are treated as OUT requests by +the USB stack and should be using usb_sndctrlpipe(). Failing to do so +will now trigger a warning. + +Fix the OSIFI2C_SET_BIT_RATE and OSIFI2C_STOP requests which erroneously +used the osif_usb_read() helper and set the IN direction bit. + +Reported-by: syzbot+9d7dadd15b8819d73f41@syzkaller.appspotmail.com +Fixes: 83e53a8f120f ("i2c: Add bus driver for for OSIF USB i2c device.") +Cc: stable@vger.kernel.org # 3.14 +Signed-off-by: Johan Hovold +Signed-off-by: Wolfram Sang +Signed-off-by: Greg Kroah-Hartman +--- + drivers/i2c/busses/i2c-robotfuzz-osif.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c ++++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c +@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter + } + } + +- ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); ++ ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); + if (ret) { + dev_err(&adapter->dev, "failure sending STOP\n"); + return -EREMOTEIO; +@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interfa + * Set bus frequency. The frequency is: + * 120,000,000 / ( 16 + 2 * div * 4^prescale). + * Using dev = 52, prescale = 0 give 100KHz */ +- ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, ++ ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, + NULL, 0); + if (ret) { + dev_err(&interface->dev, "failure sending bit rate"); diff --git a/queue-5.10/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch b/queue-5.10/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch new file mode 100644 index 00000000000..52e8d1db33c --- /dev/null +++ b/queue-5.10/kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch @@ -0,0 +1,181 @@ +From 5fa54346caf67b4b1b10b1f390316ae466da4d53 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Thu, 24 Jun 2021 18:39:48 -0700 +Subject: kthread: prevent deadlock when kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync() + +From: Petr Mladek + +commit 5fa54346caf67b4b1b10b1f390316ae466da4d53 upstream. + +The system might hang with the following backtrace: + + schedule+0x80/0x100 + schedule_timeout+0x48/0x138 + wait_for_common+0xa4/0x134 + wait_for_completion+0x1c/0x2c + kthread_flush_work+0x114/0x1cc + kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144 + kthread_cancel_delayed_work_sync+0x18/0x2c + xxxx_pm_notify+0xb0/0xd8 + blocking_notifier_call_chain_robust+0x80/0x194 + pm_notifier_call_chain_robust+0x28/0x4c + suspend_prepare+0x40/0x260 + enter_state+0x80/0x3f4 + pm_suspend+0x60/0xdc + state_store+0x108/0x144 + kobj_attr_store+0x38/0x88 + sysfs_kf_write+0x64/0xc0 + kernfs_fop_write_iter+0x108/0x1d0 + vfs_write+0x2f4/0x368 + ksys_write+0x7c/0xec + +It is caused by the following race between kthread_mod_delayed_work() +and kthread_cancel_delayed_work_sync(): + +CPU0 CPU1 + +Context: Thread A Context: Thread B + +kthread_mod_delayed_work() + spin_lock() + __kthread_cancel_work() + spin_unlock() + del_timer_sync() + kthread_cancel_delayed_work_sync() + spin_lock() + __kthread_cancel_work() + spin_unlock() + del_timer_sync() + spin_lock() + + work->canceling++ + spin_unlock + spin_lock() + queue_delayed_work() + // dwork is put into the worker->delayed_work_list + + spin_unlock() + + kthread_flush_work() + // flush_work is put at the tail of the dwork + + wait_for_completion() + +Context: IRQ + + kthread_delayed_work_timer_fn() + spin_lock() + list_del_init(&work->node); + spin_unlock() + +BANG: flush_work is not longer linked and will never get proceed. + +The problem is that kthread_mod_delayed_work() checks work->canceling +flag before canceling the timer. + +A simple solution is to (re)check work->canceling after +__kthread_cancel_work(). But then it is not clear what should be +returned when __kthread_cancel_work() removed the work from the queue +(list) and it can't queue it again with the new @delay. + +The return value might be used for reference counting. The caller has +to know whether a new work has been queued or an existing one was +replaced. + +The proper solution is that kthread_mod_delayed_work() will remove the +work from the queue (list) _only_ when work->canceling is not set. The +flag must be checked after the timer is stopped and the remaining +operations can be done under worker->lock. + +Note that kthread_mod_delayed_work() could remove the timer and then +bail out. It is fine. The other canceling caller needs to cancel the +timer as well. The important thing is that the queue (list) +manipulation is done atomically under worker->lock. + +Link: https://lkml.kernel.org/r/20210610133051.15337-3-pmladek@suse.com +Fixes: 9a6b06c8d9a220860468a ("kthread: allow to modify delayed kthread work") +Signed-off-by: Petr Mladek +Reported-by: Martin Liu +Cc: +Cc: Minchan Kim +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Oleg Nesterov +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/kthread.c | 35 ++++++++++++++++++++++++----------- + 1 file changed, 24 insertions(+), 11 deletions(-) + +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -1071,8 +1071,11 @@ static void kthread_cancel_delayed_work_ + } + + /* +- * This function removes the work from the worker queue. Also it makes sure +- * that it won't get queued later via the delayed work's timer. ++ * This function removes the work from the worker queue. ++ * ++ * It is called under worker->lock. The caller must make sure that ++ * the timer used by delayed work is not running, e.g. by calling ++ * kthread_cancel_delayed_work_timer(). + * + * The work might still be in use when this function finishes. See the + * current_work proceed by the worker. +@@ -1080,13 +1083,8 @@ static void kthread_cancel_delayed_work_ + * Return: %true if @work was pending and successfully canceled, + * %false if @work was not pending + */ +-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, +- unsigned long *flags) ++static bool __kthread_cancel_work(struct kthread_work *work) + { +- /* Try to cancel the timer if exists. */ +- if (is_dwork) +- kthread_cancel_delayed_work_timer(work, flags); +- + /* + * Try to remove the work from a worker list. It might either + * be from worker->work_list or from worker->delayed_work_list. +@@ -1139,11 +1137,23 @@ bool kthread_mod_delayed_work(struct kth + /* Work must not be used with >1 worker, see kthread_queue_work() */ + WARN_ON_ONCE(work->worker != worker); + +- /* Do not fight with another command that is canceling this work. */ ++ /* ++ * Temporary cancel the work but do not fight with another command ++ * that is canceling the work as well. ++ * ++ * It is a bit tricky because of possible races with another ++ * mod_delayed_work() and cancel_delayed_work() callers. ++ * ++ * The timer must be canceled first because worker->lock is released ++ * when doing so. But the work can be removed from the queue (list) ++ * only when it can be queued again so that the return value can ++ * be used for reference counting. ++ */ ++ kthread_cancel_delayed_work_timer(work, &flags); + if (work->canceling) + goto out; ++ ret = __kthread_cancel_work(work); + +- ret = __kthread_cancel_work(work, true, &flags); + fast_queue: + __kthread_queue_delayed_work(worker, dwork, delay); + out: +@@ -1165,7 +1175,10 @@ static bool __kthread_cancel_work_sync(s + /* Work must not be used with >1 worker, see kthread_queue_work(). */ + WARN_ON_ONCE(work->worker != worker); + +- ret = __kthread_cancel_work(work, is_dwork, &flags); ++ if (is_dwork) ++ kthread_cancel_delayed_work_timer(work, &flags); ++ ++ ret = __kthread_cancel_work(work); + + if (worker->current_work != work) + goto out_fast; diff --git a/queue-5.10/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch b/queue-5.10/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch new file mode 100644 index 00000000000..6e8fecb4a50 --- /dev/null +++ b/queue-5.10/kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch @@ -0,0 +1,102 @@ +From 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 Mon Sep 17 00:00:00 2001 +From: Petr Mladek +Date: Thu, 24 Jun 2021 18:39:45 -0700 +Subject: kthread_worker: split code for canceling the delayed work timer + +From: Petr Mladek + +commit 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 upstream. + +Patch series "kthread_worker: Fix race between kthread_mod_delayed_work() +and kthread_cancel_delayed_work_sync()". + +This patchset fixes the race between kthread_mod_delayed_work() and +kthread_cancel_delayed_work_sync() including proper return value +handling. + +This patch (of 2): + +Simple code refactoring as a preparation step for fixing a race between +kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync(). + +It does not modify the existing behavior. + +Link: https://lkml.kernel.org/r/20210610133051.15337-2-pmladek@suse.com +Signed-off-by: Petr Mladek +Cc: +Cc: Martin Liu +Cc: Minchan Kim +Cc: Nathan Chancellor +Cc: Nick Desaulniers +Cc: Oleg Nesterov +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/kthread.c | 46 +++++++++++++++++++++++++++++----------------- + 1 file changed, 29 insertions(+), 17 deletions(-) + +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -1044,6 +1044,33 @@ void kthread_flush_work(struct kthread_w + EXPORT_SYMBOL_GPL(kthread_flush_work); + + /* ++ * Make sure that the timer is neither set nor running and could ++ * not manipulate the work list_head any longer. ++ * ++ * The function is called under worker->lock. The lock is temporary ++ * released but the timer can't be set again in the meantime. ++ */ ++static void kthread_cancel_delayed_work_timer(struct kthread_work *work, ++ unsigned long *flags) ++{ ++ struct kthread_delayed_work *dwork = ++ container_of(work, struct kthread_delayed_work, work); ++ struct kthread_worker *worker = work->worker; ++ ++ /* ++ * del_timer_sync() must be called to make sure that the timer ++ * callback is not running. The lock must be temporary released ++ * to avoid a deadlock with the callback. In the meantime, ++ * any queuing is blocked by setting the canceling counter. ++ */ ++ work->canceling++; ++ raw_spin_unlock_irqrestore(&worker->lock, *flags); ++ del_timer_sync(&dwork->timer); ++ raw_spin_lock_irqsave(&worker->lock, *flags); ++ work->canceling--; ++} ++ ++/* + * This function removes the work from the worker queue. Also it makes sure + * that it won't get queued later via the delayed work's timer. + * +@@ -1057,23 +1084,8 @@ static bool __kthread_cancel_work(struct + unsigned long *flags) + { + /* Try to cancel the timer if exists. */ +- if (is_dwork) { +- struct kthread_delayed_work *dwork = +- container_of(work, struct kthread_delayed_work, work); +- struct kthread_worker *worker = work->worker; +- +- /* +- * del_timer_sync() must be called to make sure that the timer +- * callback is not running. The lock must be temporary released +- * to avoid a deadlock with the callback. In the meantime, +- * any queuing is blocked by setting the canceling counter. +- */ +- work->canceling++; +- raw_spin_unlock_irqrestore(&worker->lock, *flags); +- del_timer_sync(&dwork->timer); +- raw_spin_lock_irqsave(&worker->lock, *flags); +- work->canceling--; +- } ++ if (is_dwork) ++ kthread_cancel_delayed_work_timer(work, flags); + + /* + * Try to remove the work from a worker list. It might either diff --git a/queue-5.10/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch b/queue-5.10/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch new file mode 100644 index 00000000000..b8b0dd934e8 --- /dev/null +++ b/queue-5.10/kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch @@ -0,0 +1,70 @@ +From f8be156be163a052a067306417cd0ff679068c97 Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Thu, 24 Jun 2021 08:29:04 -0400 +Subject: KVM: do not allow mapping valid but non-reference-counted pages + +From: Nicholas Piggin + +commit f8be156be163a052a067306417cd0ff679068c97 upstream. + +It's possible to create a region which maps valid but non-refcounted +pages (e.g., tail pages of non-compound higher order allocations). These +host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family +of APIs, which take a reference to the page, which takes it from 0 to 1. +When the reference is dropped, this will free the page incorrectly. + +Fix this by only taking a reference on valid pages if it was non-zero, +which indicates it is participating in normal refcounting (and can be +released with put_page). + +This addresses CVE-2021-22543. + +Signed-off-by: Nicholas Piggin +Tested-by: Paolo Bonzini +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1883,6 +1883,13 @@ static bool vma_is_valid(struct vm_area_ + return true; + } + ++static int kvm_try_get_pfn(kvm_pfn_t pfn) ++{ ++ if (kvm_is_reserved_pfn(pfn)) ++ return 1; ++ return get_page_unless_zero(pfn_to_page(pfn)); ++} ++ + static int hva_to_pfn_remapped(struct vm_area_struct *vma, + unsigned long addr, bool *async, + bool write_fault, bool *writable, +@@ -1932,13 +1939,21 @@ static int hva_to_pfn_remapped(struct vm + * Whoever called remap_pfn_range is also going to call e.g. + * unmap_mapping_range before the underlying pages are freed, + * causing a call to our MMU notifier. ++ * ++ * Certain IO or PFNMAP mappings can be backed with valid ++ * struct pages, but be allocated without refcounting e.g., ++ * tail pages of non-compound higher order allocations, which ++ * would then underflow the refcount when the caller does the ++ * required put_page. Don't allow those pages here. + */ +- kvm_get_pfn(pfn); ++ if (!kvm_try_get_pfn(pfn)) ++ r = -EFAULT; + + out: + pte_unmap_unlock(ptep, ptl); + *p_pfn = pfn; +- return 0; ++ ++ return r; + } + + /* diff --git a/queue-5.10/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch b/queue-5.10/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch new file mode 100644 index 00000000000..fafe137eb52 --- /dev/null +++ b/queue-5.10/s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch @@ -0,0 +1,67 @@ +From 67147e96a332b56c7206238162771d82467f86c0 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Fri, 18 Jun 2021 16:58:47 +0200 +Subject: s390/stack: fix possible register corruption with stack switch helper + +From: Heiko Carstens + +commit 67147e96a332b56c7206238162771d82467f86c0 upstream. + +The CALL_ON_STACK macro is used to call a C function from inline +assembly, and therefore must consider the C ABI, which says that only +registers 6-13, and 15 are non-volatile (restored by the called +function). + +The inline assembly incorrectly marks all registers used to pass +parameters to the called function as read-only input operands, instead +of operands that are read and written to. This might result in +register corruption depending on usage, compiler, and compile options. + +Fix this by marking all operands used to pass parameters as read/write +operands. To keep the code simple even register 6, if used, is marked +as read-write operand. + +Fixes: ff340d2472ec ("s390: add stack switch helper") +Cc: # 4.20 +Reviewed-by: Vasily Gorbik +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/include/asm/stacktrace.h | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/arch/s390/include/asm/stacktrace.h ++++ b/arch/s390/include/asm/stacktrace.h +@@ -90,12 +90,16 @@ struct stack_frame { + CALL_ARGS_4(arg1, arg2, arg3, arg4); \ + register unsigned long r4 asm("6") = (unsigned long)(arg5) + +-#define CALL_FMT_0 "=&d" (r2) : +-#define CALL_FMT_1 "+&d" (r2) : +-#define CALL_FMT_2 CALL_FMT_1 "d" (r3), +-#define CALL_FMT_3 CALL_FMT_2 "d" (r4), +-#define CALL_FMT_4 CALL_FMT_3 "d" (r5), +-#define CALL_FMT_5 CALL_FMT_4 "d" (r6), ++/* ++ * To keep this simple mark register 2-6 as being changed (volatile) ++ * by the called function, even though register 6 is saved/nonvolatile. ++ */ ++#define CALL_FMT_0 "=&d" (r2) ++#define CALL_FMT_1 "+&d" (r2) ++#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) ++#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) ++#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) ++#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) + + #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" + #define CALL_CLOBBER_4 CALL_CLOBBER_5 +@@ -117,7 +121,7 @@ struct stack_frame { + " brasl 14,%[_fn]\n" \ + " la 15,0(%[_prev])\n" \ + : [_prev] "=&a" (prev), CALL_FMT_##nr \ +- [_stack] "R" (stack), \ ++ : [_stack] "R" (stack), \ + [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_frame] "d" (frame), \ + [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ diff --git a/queue-5.10/series b/queue-5.10/series index 74eeb883710..d42c45e46df 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -60,3 +60,11 @@ i2c-i801-ensure-that-smbhststs_inuse_sts-is-cleared-.patch gpiolib-cdev-zero-padding-during-conversion-to-gpiol.patch scsi-sd-call-sd_revalidate_disk-for-ioctl-blkrrpart.patch nilfs2-fix-memory-leak-in-nilfs_sysfs_delete_device_.patch +s390-stack-fix-possible-register-corruption-with-stack-switch-helper.patch +kvm-do-not-allow-mapping-valid-but-non-reference-counted-pages.patch +i2c-robotfuzz-osif-fix-control-request-directions.patch +ceph-must-hold-snap_rwsem-when-filling-inode-for-async-create.patch +kthread_worker-split-code-for-canceling-the-delayed-work-timer.patch +kthread-prevent-deadlock-when-kthread_mod_delayed_work-races-with-kthread_cancel_delayed_work_sync.patch +x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch +x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch diff --git a/queue-5.10/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch b/queue-5.10/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch new file mode 100644 index 00000000000..83cc50c873a --- /dev/null +++ b/queue-5.10/x86-fpu-make-init_fpstate-correct-with-optimized-xsave.patch @@ -0,0 +1,165 @@ +From f9dfb5e390fab2df9f7944bb91e7705aba14cd26 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 18 Jun 2021 16:18:25 +0200 +Subject: x86/fpu: Make init_fpstate correct with optimized XSAVE + +From: Thomas Gleixner + +commit f9dfb5e390fab2df9f7944bb91e7705aba14cd26 upstream. + +The XSAVE init code initializes all enabled and supported components with +XRSTOR(S) to init state. Then it XSAVEs the state of the components back +into init_fpstate which is used in several places to fill in the init state +of components. + +This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because +those use the init optimization and skip writing state of components which +are in init state. So init_fpstate.xsave still contains all zeroes after +this operation. + +There are two ways to solve that: + + 1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when + XSAVES is enabled because XSAVES uses compacted format. + + 2) Save the components which are known to have a non-zero init state by other + means. + +Looking deeper, #2 is the right thing to do because all components the +kernel supports have all-zeroes init state except the legacy features (FP, +SSE). Those cannot be hard coded because the states are not identical on all +CPUs, but they can be saved with FXSAVE which avoids all conditionals. + +Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with +a BUILD_BUG_ON() which reminds developers to validate that a newly added +component has all zeroes init state. As a bonus remove the now unused +copy_xregs_to_kernel_booting() crutch. + +The XSAVE and reshuffle method can still be implemented in the unlikely +case that components are added which have a non-zero init state and no +other means to save them. For now, FXSAVE is just simple and good enough. + + [ bp: Fix a typo or two in the text. ] + +Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Reviewed-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/fpu/internal.h | 30 +++++++------------------- + arch/x86/kernel/fpu/xstate.c | 41 +++++++++++++++++++++++++++++++++--- + 2 files changed, 46 insertions(+), 25 deletions(-) + +--- a/arch/x86/include/asm/fpu/internal.h ++++ b/arch/x86/include/asm/fpu/internal.h +@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); + } + ++static inline void fxsave(struct fxregs_state *fx) ++{ ++ if (IS_ENABLED(CONFIG_X86_32)) ++ asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); ++ else ++ asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); ++} ++ + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ + #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" + #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +@@ -270,28 +278,6 @@ static inline void copy_fxregs_to_kernel + + /* + * This function is called only during boot time when x86 caps are not set +- * up and alternative can not be used yet. +- */ +-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) +-{ +- u64 mask = xfeatures_mask_all; +- u32 lmask = mask; +- u32 hmask = mask >> 32; +- int err; +- +- WARN_ON(system_state != SYSTEM_BOOTING); +- +- if (boot_cpu_has(X86_FEATURE_XSAVES)) +- XSTATE_OP(XSAVES, xstate, lmask, hmask, err); +- else +- XSTATE_OP(XSAVE, xstate, lmask, hmask, err); +- +- /* We should never fault when copying to a kernel buffer: */ +- WARN_ON_FPU(err); +-} +- +-/* +- * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ + static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) +--- a/arch/x86/kernel/fpu/xstate.c ++++ b/arch/x86/kernel/fpu/xstate.c +@@ -441,12 +441,35 @@ static void __init print_xstate_offset_s + } + + /* ++ * All supported features have either init state all zeros or are ++ * handled in setup_init_fpu() individually. This is an explicit ++ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch ++ * newly added supported features at build time and make people ++ * actually look at the init state for the new feature. ++ */ ++#define XFEATURES_INIT_FPSTATE_HANDLED \ ++ (XFEATURE_MASK_FP | \ ++ XFEATURE_MASK_SSE | \ ++ XFEATURE_MASK_YMM | \ ++ XFEATURE_MASK_OPMASK | \ ++ XFEATURE_MASK_ZMM_Hi256 | \ ++ XFEATURE_MASK_Hi16_ZMM | \ ++ XFEATURE_MASK_PKRU | \ ++ XFEATURE_MASK_BNDREGS | \ ++ XFEATURE_MASK_BNDCSR | \ ++ XFEATURE_MASK_PASID) ++ ++/* + * setup the xstate image representing the init state + */ + static void __init setup_init_fpu_buf(void) + { + static int on_boot_cpu __initdata = 1; + ++ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | ++ XFEATURE_MASK_SUPERVISOR_SUPPORTED) != ++ XFEATURES_INIT_FPSTATE_HANDLED); ++ + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + +@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(vo + copy_kernel_to_xregs_booting(&init_fpstate.xsave); + + /* +- * Dump the init state again. This is to identify the init state +- * of any feature which is not represented by all zero's. ++ * All components are now in init state. Read the state back so ++ * that init_fpstate contains all non-zero init state. This only ++ * works with XSAVE, but not with XSAVEOPT and XSAVES because ++ * those use the init optimization which skips writing data for ++ * components in init state. ++ * ++ * XSAVE could be used, but that would require to reshuffle the ++ * data when XSAVES is available because XSAVES uses xstate ++ * compaction. But doing so is a pointless exercise because most ++ * components have an all zeros init state except for the legacy ++ * ones (FP and SSE). Those can be saved with FXSAVE into the ++ * legacy area. Adding new features requires to ensure that init ++ * state is all zeroes or if not to add the necessary handling ++ * here. + */ +- copy_xregs_to_kernel_booting(&init_fpstate.xsave); ++ fxsave(&init_fpstate.fxsave); + } + + static int xfeature_uncompacted_offset(int xfeature_nr) diff --git a/queue-5.10/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch b/queue-5.10/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch new file mode 100644 index 00000000000..79ddb0dfab5 --- /dev/null +++ b/queue-5.10/x86-fpu-preserve-supervisor-states-in-sanitize_restored_user_xstate.patch @@ -0,0 +1,66 @@ +From 9301982c424a003c0095bf157154a85bf5322bd0 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 18 Jun 2021 16:18:24 +0200 +Subject: x86/fpu: Preserve supervisor states in sanitize_restored_user_xstate() + +From: Thomas Gleixner + +commit 9301982c424a003c0095bf157154a85bf5322bd0 upstream. + +sanitize_restored_user_xstate() preserves the supervisor states only +when the fx_only argument is zero, which allows unprivileged user space +to put supervisor states back into init state. + +Preserve them unconditionally. + + [ bp: Fix a typo or two in the text. ] + +Fixes: 5d6b6a6f9b5c ("x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates") +Signed-off-by: Thomas Gleixner +Signed-off-by: Borislav Petkov +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20210618143444.438635017@linutronix.de +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/fpu/signal.c | 26 ++++++++------------------ + 1 file changed, 8 insertions(+), 18 deletions(-) + +--- a/arch/x86/kernel/fpu/signal.c ++++ b/arch/x86/kernel/fpu/signal.c +@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpre + + if (use_xsave()) { + /* +- * Note: we don't need to zero the reserved bits in the +- * xstate_header here because we either didn't copy them at all, +- * or we checked earlier that they aren't set. ++ * Clear all feature bits which are not set in ++ * user_xfeatures and clear all extended features ++ * for fx_only mode. + */ ++ u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures; + + /* +- * 'user_xfeatures' might have bits clear which are +- * set in header->xfeatures. This represents features that +- * were in init state prior to a signal delivery, and need +- * to be reset back to the init state. Clear any user +- * feature bits which are set in the kernel buffer to get +- * them back to the init state. +- * +- * Supervisor state is unchanged by input from userspace. +- * Ensure supervisor state bits stay set and supervisor +- * state is not modified. ++ * Supervisor state has to be preserved. The sigframe ++ * restore can only modify user features, i.e. @mask ++ * cannot contain them. + */ +- if (fx_only) +- header->xfeatures = XFEATURE_MASK_FPSSE; +- else +- header->xfeatures &= user_xfeatures | +- xfeatures_mask_supervisor(); ++ header->xfeatures &= mask | xfeatures_mask_supervisor(); + } + + if (use_fxsr()) {